From 4bb053f90bdad0cc729f28dadc41a947e8e42d4a Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:04:20 +0530 Subject: [PATCH 01/49] feat(dashboards): fold export contract and chat rollout primitives --- ddpui/api/dashboard_native_api.py | 13 +++ ddpui/schemas/dashboard_schema.py | 12 ++ ddpui/services/dashboard_service.py | 44 ++++++++ .../api_tests/test_dashboard_native_api.py | 106 ++++++++++++++++++ ddpui/tests/api_tests/test_user_org_api.py | 13 +++ ddpui/tests/utils/test_feature_flags.py | 10 ++ ddpui/utils/feature_flags.py | 1 + seed/002_permissions.json | 10 +- seed/003_role_permissions.json | 18 ++- 9 files changed, 225 insertions(+), 2 deletions(-) diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index 144f03bb3..acaad3f8a 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -35,6 +35,7 @@ DashboardCreate, DashboardUpdate, DashboardResponse, + DashboardExportResponse, DashboardFilterResponse, FilterCreate, FilterUpdate, @@ -89,6 +90,18 @@ def get_dashboard(request, dashboard_id: int): return DashboardResponse(**DashboardService.get_dashboard_response(dashboard)) +@dashboard_native_router.get("/{dashboard_id}/export/", response=DashboardExportResponse) +@has_permission(["can_view_dashboards"]) +def export_dashboard(request, dashboard_id: int): + """Export dashboard JSON along with the chart configs it references.""" + orguser: OrgUser = request.orguser + + try: + return DashboardService.export_dashboard_context(dashboard_id, orguser.org) + except DashboardNotFoundError as err: + raise HttpError(404, "Dashboard not found") from err + + @dashboard_native_router.post("/", response=DashboardResponse) @has_permission(["can_create_dashboards"]) def create_dashboard(request, payload: DashboardCreate): diff --git a/ddpui/schemas/dashboard_schema.py b/ddpui/schemas/dashboard_schema.py index 33016815f..230d80292 100644 --- a/ddpui/schemas/dashboard_schema.py +++ b/ddpui/schemas/dashboard_schema.py @@ -181,3 +181,15 @@ class LandingPageResolveResponse(Schema): dashboard_title: Optional[str] dashboard_type: Optional[str] source: str # "personal", "org_default", or "none" + + +# ============================================================================= +# Dashboard Export Schemas +# ============================================================================= + + +class DashboardExportResponse(Schema): + """Response schema for exporting dashboard context and referenced charts""" + + dashboard: DashboardResponse + charts: List[dict] diff --git a/ddpui/services/dashboard_service.py b/ddpui/services/dashboard_service.py index 1ddbe8396..82193121e 100644 --- a/ddpui/services/dashboard_service.py +++ b/ddpui/services/dashboard_service.py @@ -986,6 +986,50 @@ def validate_dashboard_config(dashboard: Dashboard) -> Dict[str, Any]: return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings} + @staticmethod + def export_dashboard_context(dashboard_id: int, org: Org) -> Dict[str, Any]: + """Return dashboard data along with the full config for referenced charts.""" + dashboard = DashboardService.get_dashboard(dashboard_id, org) + dashboard_response = DashboardService.get_dashboard_response(dashboard) + + charts = [] + for component_data in (dashboard.components or {}).values(): + if component_data.get("type") != DashboardComponentType.CHART.value: + continue + + chart_id = component_data.get("config", {}).get("chartId") + if not chart_id: + continue + + try: + chart = Chart.objects.get(id=chart_id, org=org) + except Chart.DoesNotExist: + logger.warning( + "Chart %s referenced by dashboard %s was not found", + chart_id, + dashboard_id, + ) + continue + + charts.append( + { + "id": chart.id, + "title": chart.title, + "description": chart.description, + "chart_type": chart.chart_type, + "schema_name": chart.schema_name, + "table_name": chart.table_name, + "extra_config": chart.extra_config or {}, + "created_at": chart.created_at.isoformat(), + "updated_at": chart.updated_at.isoformat(), + } + ) + + return { + "dashboard": dashboard_response, + "charts": charts, + } + def delete_dashboard_safely(dashboard_id: int, orguser: OrgUser) -> tuple[bool, str]: """ diff --git a/ddpui/tests/api_tests/test_dashboard_native_api.py b/ddpui/tests/api_tests/test_dashboard_native_api.py index f4e6228d1..eb11c0ef5 100644 --- a/ddpui/tests/api_tests/test_dashboard_native_api.py +++ b/ddpui/tests/api_tests/test_dashboard_native_api.py @@ -31,6 +31,7 @@ from ddpui.api.dashboard_native_api import ( list_dashboards, get_dashboard, + export_dashboard, create_dashboard, update_dashboard, delete_dashboard, @@ -130,6 +131,38 @@ def sample_filter(sample_dashboard): pass +@pytest.fixture +def sample_charts(orguser, org): + """Charts that can be referenced from dashboard components.""" + charts = [ + Chart.objects.create( + title="Funding by Quarter", + description="Quarterly funding totals", + chart_type="bar", + schema_name="public", + table_name="funding", + extra_config={"metrics": ["sum_amount"]}, + created_by=orguser, + last_modified_by=orguser, + org=org, + ), + Chart.objects.create( + title="Donor Count", + description="Unique donors over time", + chart_type="line", + schema_name="public", + table_name="donors", + extra_config={"metrics": ["count_distinct_donor_id"]}, + created_by=orguser, + last_modified_by=orguser, + org=org, + ), + ] + yield charts + for chart in charts: + chart.delete() + + # ================================================================================ # Test list_dashboards endpoint # ================================================================================ @@ -269,6 +302,79 @@ def test_get_dashboard_wrong_org(self, orguser, seed_db): other_org.delete() +class TestExportDashboard: + """Tests for export_dashboard endpoint.""" + + def test_export_dashboard_success(self, orguser, sample_dashboard, sample_charts, seed_db): + """Test exporting dashboard data and referenced chart configs.""" + request = mock_request(orguser) + sample_dashboard.components = { + "chart-1": {"type": "chart", "config": {"chartId": sample_charts[0].id}}, + "text-1": {"type": "text", "config": {"content": "Notes"}}, + "chart-2": {"type": "chart", "config": {"chartId": sample_charts[1].id}}, + } + sample_dashboard.save(update_fields=["components"]) + + response = export_dashboard(request, dashboard_id=sample_dashboard.id) + + assert response["dashboard"]["id"] == sample_dashboard.id + assert [chart["id"] for chart in response["charts"]] == [ + sample_charts[0].id, + sample_charts[1].id, + ] + assert response["charts"][0]["extra_config"] == {"metrics": ["sum_amount"]} + + def test_export_dashboard_not_found(self, orguser, seed_db): + """Test exporting a non-existent dashboard returns 404.""" + request = mock_request(orguser) + + with pytest.raises(HttpError) as excinfo: + export_dashboard(request, dashboard_id=99999) + + assert excinfo.value.status_code == 404 + + def test_export_dashboard_wrong_org(self, orguser, seed_db): + """Test exporting a dashboard from another org returns 404.""" + other_org = Org.objects.create(name="Other Export Org", slug="other-export-org") + other_user = User.objects.create(username="otherexport", email="otherexport@test.com") + other_orguser = OrgUser.objects.create( + user=other_user, + org=other_org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + other_dashboard = Dashboard.objects.create( + title="Other Export Dashboard", + dashboard_type="native", + created_by=other_orguser, + org=other_org, + ) + + request = mock_request(orguser) + + with pytest.raises(HttpError) as excinfo: + export_dashboard(request, dashboard_id=other_dashboard.id) + + assert excinfo.value.status_code == 404 + + other_dashboard.delete() + other_orguser.delete() + other_user.delete() + other_org.delete() + + def test_export_dashboard_skips_missing_chart(self, orguser, sample_dashboard, seed_db): + """Test exporting ignores chart references that no longer exist.""" + request = mock_request(orguser) + sample_dashboard.components = { + "chart-1": {"type": "chart", "config": {"chartId": 999999}}, + } + sample_dashboard.save(update_fields=["components"]) + + response = export_dashboard(request, dashboard_id=sample_dashboard.id) + + assert response["dashboard"]["id"] == sample_dashboard.id + assert response["charts"] == [] + + # ================================================================================ # Test create_dashboard endpoint # ================================================================================ diff --git a/ddpui/tests/api_tests/test_user_org_api.py b/ddpui/tests/api_tests/test_user_org_api.py index 2e2fa790d..f41ebe2df 100644 --- a/ddpui/tests/api_tests/test_user_org_api.py +++ b/ddpui/tests/api_tests/test_user_org_api.py @@ -167,6 +167,19 @@ def test_seed_data(seed_db): assert Permission.objects.count() > 5 +def test_can_manage_org_settings_seeded_for_admin_roles(seed_db): + """Account managers and super admins should have org settings access.""" + permission = Permission.objects.get(slug="can_manage_org_settings") + role_slugs = set( + Role.objects.filter(rolepermissions__permission=permission).values_list("slug", flat=True) + ) + + assert SUPER_ADMIN_ROLE in role_slugs + assert ACCOUNT_MANAGER_ROLE in role_slugs + assert PIPELINE_MANAGER_ROLE not in role_slugs + assert GUEST_ROLE not in role_slugs + + def test_get_current_userv2_has_user(authuser, org_with_workspace, org_without_workspace): """tests /worksspace/detatch/""" orguser1 = OrgUser.objects.create( diff --git a/ddpui/tests/utils/test_feature_flags.py b/ddpui/tests/utils/test_feature_flags.py index 62fd38717..928ffc0af 100644 --- a/ddpui/tests/utils/test_feature_flags.py +++ b/ddpui/tests/utils/test_feature_flags.py @@ -95,6 +95,16 @@ def test_invalid_flag_no_db_entry(self): self.assertEqual(OrgFeatureFlag.objects.filter(flag_name="INVALID_FLAG").count(), 0) self.assertEqual(OrgFeatureFlag.objects.filter(flag_name="ANOTHER_INVALID_FLAG").count(), 0) + def test_ai_dashboard_chat_flag_is_available(self): + """Test the dashboard chat feature flag can be enabled and listed.""" + enable_feature_flag("AI_DASHBOARD_CHAT", org=self.org) + + self.assertTrue(is_feature_flag_enabled("AI_DASHBOARD_CHAT", org=self.org)) + + all_flags = get_all_feature_flags_for_org(self.org) + self.assertIn("AI_DASHBOARD_CHAT", all_flags) + self.assertTrue(all_flags["AI_DASHBOARD_CHAT"]) + def test_uniqueness_constraint(self): """Test that the database uniqueness constraint prevents duplicate (org, flag_name) pairs""" from django.db import transaction diff --git a/ddpui/utils/feature_flags.py b/ddpui/utils/feature_flags.py index d6da7e50b..c385eb2c2 100644 --- a/ddpui/utils/feature_flags.py +++ b/ddpui/utils/feature_flags.py @@ -8,6 +8,7 @@ "LOG_SUMMARIZATION": "Summarize logs using AI", "AI_DATA_ANALYSIS": "Enable data analysis using AI", "DATA_STATISTICS": "Enable detailed data statistics in explore", + "AI_DASHBOARD_CHAT": "Enable chat with dashboards", } diff --git a/seed/002_permissions.json b/seed/002_permissions.json index 74572a95b..4d895119f 100644 --- a/seed/002_permissions.json +++ b/seed/002_permissions.json @@ -582,5 +582,13 @@ "name": "Can Manage Organization Default Dashboard", "slug": "can_manage_org_default_dashboard" } + }, + { + "model": "ddpui.Permission", + "pk": 74, + "fields": { + "name": "Can Manage Organization Settings", + "slug": "can_manage_org_settings" + } } -] \ No newline at end of file +] diff --git a/seed/003_role_permissions.json b/seed/003_role_permissions.json index 4709b3ee8..bbf3319a0 100644 --- a/seed/003_role_permissions.json +++ b/seed/003_role_permissions.json @@ -2246,5 +2246,21 @@ "role": 5, "permission": 59 } + }, + { + "model": "ddpui.RolePermission", + "pk": 278, + "fields": { + "role": 1, + "permission": 74 + } + }, + { + "model": "ddpui.RolePermission", + "pk": 279, + "fields": { + "role": 2, + "permission": 74 + } } -] \ No newline at end of file +] From 62c16a39cf0cb96a6548e894b5a06e4b519b807f Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:24:32 +0530 Subject: [PATCH 02/49] chore(ai-chat): resolve backend runtime dependencies for chroma and langgraph --- pyproject.toml | 22 +- uv.lock | 1072 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 1017 insertions(+), 77 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2263a37f2..eeae97d7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "channels==4.1.0", "channels-redis==4.2.0", "charset-normalizer==3.1.0", + "chromadb==0.6.3", "click==8.1.3", "click-didyoumean==0.3.0", "click-plugins==1.1.1", @@ -71,7 +72,7 @@ dependencies = [ "faker==17.6.0", "filelock==3.16.1", "flower>=2.0.1", - "fsspec==2023.1.0", + "fsspec==2023.10.0", "future==0.18.3", "gitdb==4.0.11", "gitpython==3.1.43", @@ -93,12 +94,12 @@ dependencies = [ "grpcio-status==1.62.2", "grpclib==0.4.3", "gunicorn==20.1.0", + "huggingface-hub==0.26.5", "h11==0.14.0", "h2==4.1.0", "hologram==0.0.16", "hpack==4.0.0", - "httpcore==0.16.3", - "httpx==0.23.3", + "httpx==0.27.0", "hyperframe==6.0.1", "identify==2.6.1", "idna==3.4", @@ -110,14 +111,15 @@ dependencies = [ "jedi==0.19.0", "jinja2==3.1.4", "jmespath==1.0.1", - "jsonpatch==1.32", + "jsonpatch==1.33", "jsonpointer==2.3", "jsonschema==4.23.0", "jsonschema-specifications==2023.12.1", "kiwisolver==1.4.5", "kombu==5.2.4", - "kubernetes==26.1.0", + "kubernetes>=28.1.0", "lazy-object-proxy==1.9.0", + "langgraph==0.0.69", "leather==0.3.4", "logbook==1.5.3", "mako==1.2.4", @@ -139,9 +141,11 @@ dependencies = [ "nodeenv==1.9.1", "numpy>=2.2.4", "oauthlib==3.2.2", + "onnxruntime==1.20.1", + "openai==1.55.3", "ordered-set==4.1.0", - "orjson==3.8.8", - "packaging==23.0", + "orjson==3.9.12", + "packaging==23.2", "pandas==2.2.2", "paramiko==3.4.0", "parsedatetime==2.4", @@ -223,14 +227,14 @@ dependencies = [ "starkbank-ecdsa==2.2.0", "stringcase==1.2.0", "tabulate==0.9.0", - "tenacity==9.0.0", + "tenacity==8.5.0", "text-unidecode==1.3", "toml==0.10.2", "tomli==2.0.1", "tomlkit==0.11.7", "tornado==6.3.2", "traitlets==5.9.0", - "typer==0.7.0", + "typer==0.9.0", "typing-extensions==4.12.2", "tzdata==2022.7", "tzlocal==4.3", diff --git a/uv.lock b/uv.lock index a21c51403..8134a061d 100644 --- a/uv.lock +++ b/uv.lock @@ -2,7 +2,9 @@ version = 1 revision = 2 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.12'", + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", "python_full_version == '3.11.*'", "python_full_version < '3.11'", ] @@ -42,6 +44,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a3/e7b3b9d34239bae066df135060e225929d639731050c920fdc740d6b7897/amqp-5.1.1-py3-none-any.whl", hash = "sha256:6f0956d2c23d8fa6e7691934d8c3930eadb44972cbbd1a7ae3a520f735d43359", size = 50810, upload-time = "2022-04-17T06:39:09.3Z" }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "anyio" version = "3.6.2" @@ -354,6 +365,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dd/ab/55062f6eaf9fc537b62b7425ab53ef4366032256e1dda8ef52a9a31f7a6e/botocore-1.42.32-py3-none-any.whl", hash = "sha256:9c1ce43687cc4c0bba12054b229b3464265c699e2de4723998d86791254a5a37", size = 14573367, upload-time = "2026-01-21T20:39:56.65Z" }, ] +[[package]] +name = "build" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "os_name == 'nt'" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, + { name = "packaging" }, + { name = "pyproject-hooks" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/1c/23e33405a7c9eac261dff640926b8b5adaed6a6eb3e1767d441ed611d0c0/build-1.3.0.tar.gz", hash = "sha256:698edd0ea270bde950f53aed21f3a0135672206f3911e0176261a31e0e07b397", size = 48544, upload-time = "2025-08-01T21:27:09.268Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/8c/2b30c12155ad8de0cf641d76a8b396a16d2c36bc6d50b621a62b7c4567c1/build-1.3.0-py3-none-any.whl", hash = "sha256:7145f0b5061ba90a1500d60bd1b13ca0a8a4cebdd0cc16ed8adf1c0e739f43b4", size = 23382, upload-time = "2025-08-01T21:27:07.844Z" }, +] + [[package]] name = "cachetools" version = "5.3.0" @@ -499,6 +526,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/81/14b3b8f01ddaddad6cdec97f2f599aa2fa466bd5ee9af99b08b7713ccd29/charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d", size = 46166, upload-time = "2023-03-06T09:49:36.848Z" }, ] +[[package]] +name = "chroma-hnswlib" +version = "0.7.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/09/10d57569e399ce9cbc5eee2134996581c957f63a9addfa6ca657daf006b8/chroma_hnswlib-0.7.6.tar.gz", hash = "sha256:4dce282543039681160259d29fcde6151cc9106c6461e0485f57cdccd83059b7", size = 32256, upload-time = "2024-07-22T20:19:29.259Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/74/b9dde05ea8685d2f8c4681b517e61c7887e974f6272bb24ebc8f2105875b/chroma_hnswlib-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f35192fbbeadc8c0633f0a69c3d3e9f1a4eab3a46b65458bbcbcabdd9e895c36", size = 195821, upload-time = "2024-07-22T20:18:26.163Z" }, + { url = "https://files.pythonhosted.org/packages/fd/58/101bfa6bc41bc6cc55fbb5103c75462a7bf882e1704256eb4934df85b6a8/chroma_hnswlib-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f007b608c96362b8f0c8b6b2ac94f67f83fcbabd857c378ae82007ec92f4d82", size = 183854, upload-time = "2024-07-22T20:18:27.6Z" }, + { url = "https://files.pythonhosted.org/packages/17/ff/95d49bb5ce134f10d6aa08d5f3bec624eaff945f0b17d8c3fce888b9a54a/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:456fd88fa0d14e6b385358515aef69fc89b3c2191706fd9aee62087b62aad09c", size = 2358774, upload-time = "2024-07-22T20:18:29.161Z" }, + { url = "https://files.pythonhosted.org/packages/3a/6d/27826180a54df80dbba8a4f338b022ba21c0c8af96fd08ff8510626dee8f/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dfaae825499c2beaa3b75a12d7ec713b64226df72a5c4097203e3ed532680da", size = 2392739, upload-time = "2024-07-22T20:18:30.938Z" }, + { url = "https://files.pythonhosted.org/packages/d6/63/ee3e8b7a8f931918755faacf783093b61f32f59042769d9db615999c3de0/chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:2487201982241fb1581be26524145092c95902cb09fc2646ccfbc407de3328ec", size = 150955, upload-time = "2024-07-22T20:18:32.268Z" }, + { url = "https://files.pythonhosted.org/packages/f5/af/d15fdfed2a204c0f9467ad35084fbac894c755820b203e62f5dcba2d41f1/chroma_hnswlib-0.7.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81181d54a2b1e4727369486a631f977ffc53c5533d26e3d366dda243fb0998ca", size = 196911, upload-time = "2024-07-22T20:18:33.46Z" }, + { url = "https://files.pythonhosted.org/packages/0d/19/aa6f2139f1ff7ad23a690ebf2a511b2594ab359915d7979f76f3213e46c4/chroma_hnswlib-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4ab4e11f1083dd0a11ee4f0e0b183ca9f0f2ed63ededba1935b13ce2b3606f", size = 185000, upload-time = "2024-07-22T20:18:36.16Z" }, + { url = "https://files.pythonhosted.org/packages/79/b1/1b269c750e985ec7d40b9bbe7d66d0a890e420525187786718e7f6b07913/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53db45cd9173d95b4b0bdccb4dbff4c54a42b51420599c32267f3abbeb795170", size = 2377289, upload-time = "2024-07-22T20:18:37.761Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2d/d5663e134436e5933bc63516a20b5edc08b4c1b1588b9680908a5f1afd04/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c093f07a010b499c00a15bc9376036ee4800d335360570b14f7fe92badcdcf9", size = 2411755, upload-time = "2024-07-22T20:18:39.949Z" }, + { url = "https://files.pythonhosted.org/packages/3e/79/1bce519cf186112d6d5ce2985392a89528c6e1e9332d680bf752694a4cdf/chroma_hnswlib-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:0540b0ac96e47d0aa39e88ea4714358ae05d64bbe6bf33c52f316c664190a6a3", size = 151888, upload-time = "2024-07-22T20:18:45.003Z" }, + { url = "https://files.pythonhosted.org/packages/93/ac/782b8d72de1c57b64fdf5cb94711540db99a92768d93d973174c62d45eb8/chroma_hnswlib-0.7.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e87e9b616c281bfbe748d01705817c71211613c3b063021f7ed5e47173556cb7", size = 197804, upload-time = "2024-07-22T20:18:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/32/4e/fd9ce0764228e9a98f6ff46af05e92804090b5557035968c5b4198bc7af9/chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec5ca25bc7b66d2ecbf14502b5729cde25f70945d22f2aaf523c2d747ea68912", size = 185421, upload-time = "2024-07-22T20:18:47.72Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3d/b59a8dedebd82545d873235ef2d06f95be244dfece7ee4a1a6044f080b18/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305ae491de9d5f3c51e8bd52d84fdf2545a4a2bc7af49765cda286b7bb30b1d4", size = 2389672, upload-time = "2024-07-22T20:18:49.583Z" }, + { url = "https://files.pythonhosted.org/packages/74/1e/80a033ea4466338824974a34f418e7b034a7748bf906f56466f5caa434b0/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:822ede968d25a2c88823ca078a58f92c9b5c4142e38c7c8b4c48178894a0a3c5", size = 2436986, upload-time = "2024-07-22T20:18:51.872Z" }, +] + +[[package]] +name = "chromadb" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bcrypt" }, + { name = "build" }, + { name = "chroma-hnswlib" }, + { name = "fastapi" }, + { name = "grpcio" }, + { name = "httpx" }, + { name = "importlib-resources" }, + { name = "kubernetes" }, + { name = "mmh3" }, + { name = "numpy" }, + { name = "onnxruntime" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-sdk" }, + { name = "orjson" }, + { name = "overrides" }, + { name = "posthog" }, + { name = "pydantic" }, + { name = "pypika" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/cd/f0f2de3f466ff514fb6b58271c14f6d22198402bb5b71b8d890231265946/chromadb-0.6.3.tar.gz", hash = "sha256:c8f34c0b704b9108b04491480a36d42e894a960429f87c6516027b5481d59ed3", size = 29297929, upload-time = "2025-01-14T22:20:40.184Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/8e/5c186c77bf749b6fe0528385e507e463f1667543328d76fd00a49e1a4e6a/chromadb-0.6.3-py3-none-any.whl", hash = "sha256:4851258489a3612b558488d98d09ae0fe0a28d5cad6bd1ba64b96fdc419dc0e5", size = 611129, upload-time = "2025-01-14T22:20:33.784Z" }, +] + [[package]] name = "click" version = "8.1.3" @@ -589,6 +680,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "humanfriendly" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" }, +] + [[package]] name = "comm" version = "0.1.4" @@ -747,6 +850,7 @@ dependencies = [ { name = "channels" }, { name = "channels-redis" }, { name = "charset-normalizer" }, + { name = "chromadb" }, { name = "click" }, { name = "click-didyoumean" }, { name = "click-plugins" }, @@ -805,8 +909,8 @@ dependencies = [ { name = "h2" }, { name = "hologram" }, { name = "hpack" }, - { name = "httpcore" }, { name = "httpx" }, + { name = "huggingface-hub" }, { name = "hyperframe" }, { name = "identify" }, { name = "idna" }, @@ -825,6 +929,7 @@ dependencies = [ { name = "kiwisolver" }, { name = "kombu" }, { name = "kubernetes" }, + { name = "langgraph" }, { name = "lazy-object-proxy" }, { name = "leather" }, { name = "logbook" }, @@ -847,6 +952,8 @@ dependencies = [ { name = "nodeenv" }, { name = "numpy" }, { name = "oauthlib" }, + { name = "onnxruntime" }, + { name = "openai" }, { name = "ordered-set" }, { name = "orjson" }, { name = "packaging" }, @@ -990,6 +1097,7 @@ requires-dist = [ { name = "channels", specifier = "==4.1.0" }, { name = "channels-redis", specifier = "==4.2.0" }, { name = "charset-normalizer", specifier = "==3.1.0" }, + { name = "chromadb", specifier = "==0.6.3" }, { name = "click", specifier = "==8.1.3" }, { name = "click-didyoumean", specifier = "==0.3.0" }, { name = "click-plugins", specifier = "==1.1.1" }, @@ -1022,7 +1130,7 @@ requires-dist = [ { name = "faker", specifier = "==17.6.0" }, { name = "filelock", specifier = "==3.16.1" }, { name = "flower", specifier = ">=2.0.1" }, - { name = "fsspec", specifier = "==2023.1.0" }, + { name = "fsspec", specifier = "==2023.10.0" }, { name = "future", specifier = "==0.18.3" }, { name = "gitdb", specifier = "==4.0.11" }, { name = "gitpython", specifier = "==3.1.43" }, @@ -1048,8 +1156,8 @@ requires-dist = [ { name = "h2", specifier = "==4.1.0" }, { name = "hologram", specifier = "==0.0.16" }, { name = "hpack", specifier = "==4.0.0" }, - { name = "httpcore", specifier = "==0.16.3" }, - { name = "httpx", specifier = "==0.23.3" }, + { name = "httpx", specifier = "==0.27.0" }, + { name = "huggingface-hub", specifier = "==0.26.5" }, { name = "hyperframe", specifier = "==6.0.1" }, { name = "identify", specifier = "==2.6.1" }, { name = "idna", specifier = "==3.4" }, @@ -1061,13 +1169,14 @@ requires-dist = [ { name = "jedi", specifier = "==0.19.0" }, { name = "jinja2", specifier = "==3.1.4" }, { name = "jmespath", specifier = "==1.0.1" }, - { name = "jsonpatch", specifier = "==1.32" }, + { name = "jsonpatch", specifier = "==1.33" }, { name = "jsonpointer", specifier = "==2.3" }, { name = "jsonschema", specifier = "==4.23.0" }, { name = "jsonschema-specifications", specifier = "==2023.12.1" }, { name = "kiwisolver", specifier = "==1.4.5" }, { name = "kombu", specifier = "==5.2.4" }, - { name = "kubernetes", specifier = "==26.1.0" }, + { name = "kubernetes", specifier = ">=28.1.0" }, + { name = "langgraph", specifier = "==0.0.69" }, { name = "lazy-object-proxy", specifier = "==1.9.0" }, { name = "leather", specifier = "==0.3.4" }, { name = "logbook", specifier = "==1.5.3" }, @@ -1090,9 +1199,11 @@ requires-dist = [ { name = "nodeenv", specifier = "==1.9.1" }, { name = "numpy", specifier = ">=2.2.4" }, { name = "oauthlib", specifier = "==3.2.2" }, + { name = "onnxruntime", specifier = "==1.20.1" }, + { name = "openai", specifier = "==1.55.3" }, { name = "ordered-set", specifier = "==4.1.0" }, - { name = "orjson", specifier = "==3.8.8" }, - { name = "packaging", specifier = "==23.0" }, + { name = "orjson", specifier = "==3.9.12" }, + { name = "packaging", specifier = "==23.2" }, { name = "pandas", specifier = "==2.2.2" }, { name = "paramiko", specifier = "==3.4.0" }, { name = "parsedatetime", specifier = "==2.4" }, @@ -1174,14 +1285,14 @@ requires-dist = [ { name = "starkbank-ecdsa", specifier = "==2.2.0" }, { name = "stringcase", specifier = "==1.2.0" }, { name = "tabulate", specifier = "==0.9.0" }, - { name = "tenacity", specifier = "==9.0.0" }, + { name = "tenacity", specifier = "==8.5.0" }, { name = "text-unidecode", specifier = "==1.3" }, { name = "toml", specifier = "==0.10.2" }, { name = "tomli", specifier = "==2.0.1" }, { name = "tomlkit", specifier = "==0.11.7" }, { name = "tornado", specifier = "==6.3.2" }, { name = "traitlets", specifier = "==5.9.0" }, - { name = "typer", specifier = "==0.7.0" }, + { name = "typer", specifier = "==0.9.0" }, { name = "typing-extensions", specifier = "==4.12.2" }, { name = "tzdata", specifier = "==2022.7" }, { name = "tzlocal", specifier = "==4.3" }, @@ -1236,6 +1347,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/41/9307e4f5f9976bc8b7fea0b66367734e8faf3ec84bc0d412d8cfabbb66cd/distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", size = 468850, upload-time = "2023-12-12T07:13:59.966Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + [[package]] name = "django" version = "4.2" @@ -1338,6 +1458,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/94/fdfb7b2f0b16cd3ed4d4171c55c1c07a2d1e3b106c5978c8ad0c15b4a48b/djangorestframework_simplejwt-5.5.1-py3-none-any.whl", hash = "sha256:2c30f3707053d384e9f315d11c2daccfcb548d4faa453111ca19a542b732e469", size = 107674, upload-time = "2025-07-21T16:52:07.493Z" }, ] +[[package]] +name = "durationpy" +version = "0.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/a4/e44218c2b394e31a6dd0d6b095c4e1f32d0be54c2a4b250032d717647bab/durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba", size = 3335, upload-time = "2025-05-17T13:52:37.26Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, +] + [[package]] name = "exceptiongroup" version = "1.1.1" @@ -1368,6 +1497,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/8a/7e22279c61f3caee0f99776d8b2dfd412480d0998dad6e31552837e9550b/Faker-17.6.0-py3-none-any.whl", hash = "sha256:5aaa16fa9cfde7d117eef70b6b293a705021e57158f3fa6b44ed1b70202d2065", size = 1698455, upload-time = "2023-03-03T16:59:14.857Z" }, ] +[[package]] +name = "fastapi" +version = "0.125.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/71/2df15009fb4bdd522a069d2fbca6007c6c5487fce5cb965be00fc335f1d1/fastapi-0.125.0.tar.gz", hash = "sha256:16b532691a33e2c5dee1dac32feb31dc6eb41a3dd4ff29a95f9487cb21c054c0", size = 370550, upload-time = "2025-12-17T21:41:44.15Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/2f/ff2fcc98f500713368d8b650e1bbc4a0b3ebcdd3e050dcdaad5f5a13fd7e/fastapi-0.125.0-py3-none-any.whl", hash = "sha256:2570ec4f3aecf5cca8f0428aed2398b774fcdfee6c2116f86e80513f2f86a7a1", size = 112888, upload-time = "2025-12-17T21:41:41.286Z" }, +] + [[package]] name = "filelock" version = "3.16.1" @@ -1377,6 +1521,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163, upload-time = "2024-09-17T19:02:00.268Z" }, ] +[[package]] +name = "flatbuffers" +version = "25.12.19" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" }, +] + [[package]] name = "flower" version = "2.0.1" @@ -1395,11 +1547,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2023.1.0" +version = "2023.10.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6e/de/d14309e99f60010055bfd04c9e57d20a8d7b3f1c2144f0d85c1747017718/fsspec-2023.1.0.tar.gz", hash = "sha256:fbae7f20ff801eb5f7d0bedf81f25c787c0dfac5e982d98fa3884a9cde2b5411", size = 142987, upload-time = "2023-01-19T20:56:20.033Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/f7/16ec1f92523165d10301cfa8cb83df0356dbe615d4ca5ed611a16f53e09a/fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5", size = 165452, upload-time = "2023-10-21T17:37:04.636Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/64/f0d369ede0ca54fdd520bdee5086dbaf0af81dac53a2ce847bd1ec6e0bf1/fsspec-2023.1.0-py3-none-any.whl", hash = "sha256:b833e2e541e9e8cde0ab549414187871243177feb3d344f9d27b25a93f5d8139", size = 143043, upload-time = "2023-01-19T20:56:18.246Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529", size = 166384, upload-time = "2023-10-21T17:37:02.632Z" }, ] [[package]] @@ -1771,32 +1923,104 @@ wheels = [ [[package]] name = "httpcore" -version = "0.16.3" +version = "1.0.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "anyio" }, { name = "certifi" }, { name = "h11" }, - { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/42/5c456b02816845d163fab0f32936b6a5b649f3f915beff6f819f4f6c90b2/httpcore-0.16.3.tar.gz", hash = "sha256:c5d6f04e2fc530f39e0c077e6a30caa53f1451096120f1f38b954afd0b17c0cb", size = 54929, upload-time = "2022-12-20T12:13:57.041Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/7e/ef97af4623024e8159993b3114ce208de4f677098ae058ec5882a1bf7605/httpcore-0.16.3-py3-none-any.whl", hash = "sha256:da1fb708784a938aa084bde4feb8317056c55037247c787bd7e19eb2c2949dc0", size = 69561, upload-time = "2022-12-20T12:13:55.471Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/9f/45/ad3e1b4d448f22c0cff4f5692f5ed0666658578e358b8d58a19846048059/httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad", size = 85385, upload-time = "2025-04-11T14:42:46.661Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/8d/f052b1e336bb2c1fc7ed1aaed898aa570c0b61a09707b108979d9fc6e308/httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be", size = 78732, upload-time = "2025-04-11T14:42:44.896Z" }, +] + +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/e5/c07e0bcf4ec8db8164e9f6738c048b2e66aabf30e7506f440c4cc6953f60/httptools-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:11d01b0ff1fe02c4c32d60af61a4d613b74fad069e47e06e9067758c01e9ac78", size = 204531, upload-time = "2025-10-10T03:54:20.887Z" }, + { url = "https://files.pythonhosted.org/packages/7e/4f/35e3a63f863a659f92ffd92bef131f3e81cf849af26e6435b49bd9f6f751/httptools-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d86c1e5afdc479a6fdabf570be0d3eb791df0ae727e8dbc0259ed1249998d4", size = 109408, upload-time = "2025-10-10T03:54:22.455Z" }, + { url = "https://files.pythonhosted.org/packages/f5/71/b0a9193641d9e2471ac541d3b1b869538a5fb6419d52fd2669fa9c79e4b8/httptools-0.7.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c8c751014e13d88d2be5f5f14fc8b89612fcfa92a9cc480f2bc1598357a23a05", size = 440889, upload-time = "2025-10-10T03:54:23.753Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d9/2e34811397b76718750fea44658cb0205b84566e895192115252e008b152/httptools-0.7.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:654968cb6b6c77e37b832a9be3d3ecabb243bbe7a0b8f65fbc5b6b04c8fcabed", size = 440460, upload-time = "2025-10-10T03:54:25.313Z" }, + { url = "https://files.pythonhosted.org/packages/01/3f/a04626ebeacc489866bb4d82362c0657b2262bef381d68310134be7f40bb/httptools-0.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b580968316348b474b020edf3988eecd5d6eec4634ee6561e72ae3a2a0e00a8a", size = 425267, upload-time = "2025-10-10T03:54:26.81Z" }, + { url = "https://files.pythonhosted.org/packages/a5/99/adcd4f66614db627b587627c8ad6f4c55f18881549bab10ecf180562e7b9/httptools-0.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d496e2f5245319da9d764296e86c5bb6fcf0cf7a8806d3d000717a889c8c0b7b", size = 424429, upload-time = "2025-10-10T03:54:28.174Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/ec8fc904a8fd30ba022dfa85f3bbc64c3c7cd75b669e24242c0658e22f3c/httptools-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbf8317bfccf0fed3b5680c559d3459cccf1abe9039bfa159e62e391c7270568", size = 86173, upload-time = "2025-10-10T03:54:29.5Z" }, + { url = "https://files.pythonhosted.org/packages/9c/08/17e07e8d89ab8f343c134616d72eebfe03798835058e2ab579dcc8353c06/httptools-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657", size = 206521, upload-time = "2025-10-10T03:54:31.002Z" }, + { url = "https://files.pythonhosted.org/packages/aa/06/c9c1b41ff52f16aee526fd10fbda99fa4787938aa776858ddc4a1ea825ec/httptools-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70", size = 110375, upload-time = "2025-10-10T03:54:31.941Z" }, + { url = "https://files.pythonhosted.org/packages/cc/cc/10935db22fda0ee34c76f047590ca0a8bd9de531406a3ccb10a90e12ea21/httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df", size = 456621, upload-time = "2025-10-10T03:54:33.176Z" }, + { url = "https://files.pythonhosted.org/packages/0e/84/875382b10d271b0c11aa5d414b44f92f8dd53e9b658aec338a79164fa548/httptools-0.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cad6b591a682dcc6cf1397c3900527f9affef1e55a06c4547264796bbd17cf5e", size = 454954, upload-time = "2025-10-10T03:54:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/30/e1/44f89b280f7e46c0b1b2ccee5737d46b3bb13136383958f20b580a821ca0/httptools-0.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb844698d11433d2139bbeeb56499102143beb582bd6c194e3ba69c22f25c274", size = 440175, upload-time = "2025-10-10T03:54:35.942Z" }, + { url = "https://files.pythonhosted.org/packages/6f/7e/b9287763159e700e335028bc1824359dc736fa9b829dacedace91a39b37e/httptools-0.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f65744d7a8bdb4bda5e1fa23e4ba16832860606fcc09d674d56e425e991539ec", size = 440310, upload-time = "2025-10-10T03:54:37.1Z" }, + { url = "https://files.pythonhosted.org/packages/b3/07/5b614f592868e07f5c94b1f301b5e14a21df4e8076215a3bccb830a687d8/httptools-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:135fbe974b3718eada677229312e97f3b31f8a9c8ffa3ae6f565bf808d5b6bcb", size = 86875, upload-time = "2025-10-10T03:54:38.421Z" }, + { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" }, + { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" }, + { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" }, + { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" }, + { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/34/50/9d095fcbb6de2d523e027a2f304d4551855c2f46e0b82befd718b8b20056/httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270", size = 203619, upload-time = "2025-10-10T03:54:54.321Z" }, + { url = "https://files.pythonhosted.org/packages/07/f0/89720dc5139ae54b03f861b5e2c55a37dba9a5da7d51e1e824a1f343627f/httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3", size = 108714, upload-time = "2025-10-10T03:54:55.163Z" }, + { url = "https://files.pythonhosted.org/packages/b3/cb/eea88506f191fb552c11787c23f9a405f4c7b0c5799bf73f2249cd4f5228/httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1", size = 472909, upload-time = "2025-10-10T03:54:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/a548bdfae6369c0d078bab5769f7b66f17f1bfaa6fa28f81d6be6959066b/httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b", size = 470831, upload-time = "2025-10-10T03:54:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/14df99e1c43bd132eec921c2e7e11cda7852f65619bc0fc5bdc2d0cb126c/httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60", size = 452631, upload-time = "2025-10-10T03:54:58.219Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/b7e131f7be8d854d48cb6d048113c30f9a46dca0c9a8b08fcb3fcd588cdc/httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca", size = 452910, upload-time = "2025-10-10T03:54:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/53/cf/878f3b91e4e6e011eff6d1fa9ca39f7eb17d19c9d7971b04873734112f30/httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96", size = 88205, upload-time = "2025-10-10T03:55:00.389Z" }, ] [[package]] name = "httpx" -version = "0.23.3" +version = "0.27.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "anyio" }, { name = "certifi" }, { name = "httpcore" }, - { name = "rfc3986", extra = ["idna2008"] }, + { name = "idna" }, { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f5/50/04d5e8ee398a10c767a341a25f59ff8711ae3adf0143c7f8b45fc560d72d/httpx-0.23.3.tar.gz", hash = "sha256:9818458eb565bb54898ccb9b8b251a28785dd4a55afbc23d0eb410754fe7d0f9", size = 77527, upload-time = "2023-01-04T09:41:21.226Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/2d/3da5bdf4408b8b2800061c339f240c1802f2e82d55e50bd39c5a881f47f0/httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5", size = 126413, upload-time = "2024-02-21T13:07:52.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/7b/ddacf6dcebb42466abd03f368782142baa82e08fc0c1f8eaa05b4bae87d5/httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5", size = 75590, upload-time = "2024-02-21T13:07:50.455Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "0.26.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/51/21/2be5c66f29e798650a3e66bb350dee63bd9ab02cfc3ed7197cf4a905203e/huggingface_hub-0.26.5.tar.gz", hash = "sha256:1008bd18f60bfb65e8dbc0a97249beeeaa8c99d3c2fa649354df9fa5a13ed83b", size = 375951, upload-time = "2024-12-06T18:24:30.105Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/a2/0260c0f5d73bdf06e8d3fc1013a82b9f0633dc21750c9e3f3cb1dba7bb8c/httpx-0.23.3-py3-none-any.whl", hash = "sha256:a211fcce9b1254ea24f0cd6af9869b3d29aba40154e947d2a07bb499b3e310d6", size = 71472, upload-time = "2023-01-04T09:41:19.714Z" }, + { url = "https://files.pythonhosted.org/packages/44/5a/dc6af87c61f89b23439eb95521e4e99862636cfd538ae12fd36be5483e5f/huggingface_hub-0.26.5-py3-none-any.whl", hash = "sha256:fb7386090bbe892072e64b85f7c4479fd2d65eea5f2543327c970d5169e83924", size = 447766, upload-time = "2024-12-06T18:24:27.357Z" }, +] + +[[package]] +name = "humanfriendly" +version = "10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyreadline3", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" }, ] [[package]] @@ -1910,6 +2134,103 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271, upload-time = "2024-05-05T23:41:59.928Z" }, ] +[[package]] +name = "jiter" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/5a/41da76c5ea07bec1b0472b6b2fdb1b651074d504b19374d7e130e0cdfb25/jiter-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2ffc63785fd6c7977defe49b9824ae6ce2b2e2b77ce539bdaf006c26da06342e", size = 311164, upload-time = "2026-02-02T12:35:17.688Z" }, + { url = "https://files.pythonhosted.org/packages/40/cb/4a1bf994a3e869f0d39d10e11efb471b76d0ad70ecbfb591427a46c880c2/jiter-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a638816427006c1e3f0013eb66d391d7a3acda99a7b0cf091eff4497ccea33a", size = 320296, upload-time = "2026-02-02T12:35:19.828Z" }, + { url = "https://files.pythonhosted.org/packages/09/82/acd71ca9b50ecebadc3979c541cd717cce2fe2bc86236f4fa597565d8f1a/jiter-0.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19928b5d1ce0ff8c1ee1b9bdef3b5bfc19e8304f1b904e436caf30bc15dc6cf5", size = 352742, upload-time = "2026-02-02T12:35:21.258Z" }, + { url = "https://files.pythonhosted.org/packages/71/03/d1fc996f3aecfd42eb70922edecfb6dd26421c874503e241153ad41df94f/jiter-0.13.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:309549b778b949d731a2f0e1594a3f805716be704a73bf3ad9a807eed5eb5721", size = 363145, upload-time = "2026-02-02T12:35:24.653Z" }, + { url = "https://files.pythonhosted.org/packages/f1/61/a30492366378cc7a93088858f8991acd7d959759fe6138c12a4644e58e81/jiter-0.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcdabaea26cb04e25df3103ce47f97466627999260290349a88c8136ecae0060", size = 487683, upload-time = "2026-02-02T12:35:26.162Z" }, + { url = "https://files.pythonhosted.org/packages/20/4e/4223cffa9dbbbc96ed821c5aeb6bca510848c72c02086d1ed3f1da3d58a7/jiter-0.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a377af27b236abbf665a69b2bdd680e3b5a0bd2af825cd3b81245279a7606c", size = 373579, upload-time = "2026-02-02T12:35:27.582Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c9/b0489a01329ab07a83812d9ebcffe7820a38163c6d9e7da644f926ff877c/jiter-0.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe49d3ff6db74321f144dff9addd4a5874d3105ac5ba7c5b77fac099cfae31ae", size = 362904, upload-time = "2026-02-02T12:35:28.925Z" }, + { url = "https://files.pythonhosted.org/packages/05/af/53e561352a44afcba9a9bc67ee1d320b05a370aed8df54eafe714c4e454d/jiter-0.13.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2113c17c9a67071b0f820733c0893ed1d467b5fcf4414068169e5c2cabddb1e2", size = 392380, upload-time = "2026-02-02T12:35:30.385Z" }, + { url = "https://files.pythonhosted.org/packages/76/2a/dd805c3afb8ed5b326c5ae49e725d1b1255b9754b1b77dbecdc621b20773/jiter-0.13.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ab1185ca5c8b9491b55ebf6c1e8866b8f68258612899693e24a92c5fdb9455d5", size = 517939, upload-time = "2026-02-02T12:35:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/20/2a/7b67d76f55b8fe14c937e7640389612f05f9a4145fc28ae128aaa5e62257/jiter-0.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9621ca242547edc16400981ca3231e0c91c0c4c1ab8573a596cd9bb3575d5c2b", size = 551696, upload-time = "2026-02-02T12:35:33.306Z" }, + { url = "https://files.pythonhosted.org/packages/85/9c/57cdd64dac8f4c6ab8f994fe0eb04dc9fd1db102856a4458fcf8a99dfa62/jiter-0.13.0-cp310-cp310-win32.whl", hash = "sha256:a7637d92b1c9d7a771e8c56f445c7f84396d48f2e756e5978840ecba2fac0894", size = 204592, upload-time = "2026-02-02T12:35:34.58Z" }, + { url = "https://files.pythonhosted.org/packages/a7/38/f4f3ea5788b8a5bae7510a678cdc747eda0c45ffe534f9878ff37e7cf3b3/jiter-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c1b609e5cbd2f52bb74fb721515745b407df26d7b800458bd97cb3b972c29e7d", size = 206016, upload-time = "2026-02-02T12:35:36.435Z" }, + { url = "https://files.pythonhosted.org/packages/71/29/499f8c9eaa8a16751b1c0e45e6f5f1761d180da873d417996cc7bddc8eef/jiter-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ea026e70a9a28ebbdddcbcf0f1323128a8db66898a06eaad3a4e62d2f554d096", size = 311157, upload-time = "2026-02-02T12:35:37.758Z" }, + { url = "https://files.pythonhosted.org/packages/50/f6/566364c777d2ab450b92100bea11333c64c38d32caf8dc378b48e5b20c46/jiter-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66aa3e663840152d18cc8ff1e4faad3dd181373491b9cfdc6004b92198d67911", size = 319729, upload-time = "2026-02-02T12:35:39.246Z" }, + { url = "https://files.pythonhosted.org/packages/73/dd/560f13ec5e4f116d8ad2658781646cca91b617ae3b8758d4a5076b278f70/jiter-0.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3524798e70655ff19aec58c7d05adb1f074fecff62da857ea9be2b908b6d701", size = 354766, upload-time = "2026-02-02T12:35:40.662Z" }, + { url = "https://files.pythonhosted.org/packages/7c/0d/061faffcfe94608cbc28a0d42a77a74222bdf5055ccdbe5fd2292b94f510/jiter-0.13.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec7e287d7fbd02cb6e22f9a00dd9c9cd504c40a61f2c61e7e1f9690a82726b4c", size = 362587, upload-time = "2026-02-02T12:35:42.025Z" }, + { url = "https://files.pythonhosted.org/packages/92/c9/c66a7864982fd38a9773ec6e932e0398d1262677b8c60faecd02ffb67bf3/jiter-0.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47455245307e4debf2ce6c6e65a717550a0244231240dcf3b8f7d64e4c2f22f4", size = 487537, upload-time = "2026-02-02T12:35:43.459Z" }, + { url = "https://files.pythonhosted.org/packages/6c/86/84eb4352cd3668f16d1a88929b5888a3fe0418ea8c1dfc2ad4e7bf6e069a/jiter-0.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee9da221dca6e0429c2704c1b3655fe7b025204a71d4d9b73390c759d776d165", size = 373717, upload-time = "2026-02-02T12:35:44.928Z" }, + { url = "https://files.pythonhosted.org/packages/6e/09/9fe4c159358176f82d4390407a03f506a8659ed13ca3ac93a843402acecf/jiter-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24ab43126d5e05f3d53a36a8e11eb2f23304c6c1117844aaaf9a0aa5e40b5018", size = 362683, upload-time = "2026-02-02T12:35:46.636Z" }, + { url = "https://files.pythonhosted.org/packages/c9/5e/85f3ab9caca0c1d0897937d378b4a515cae9e119730563572361ea0c48ae/jiter-0.13.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9da38b4fedde4fb528c740c2564628fbab737166a0e73d6d46cb4bb5463ff411", size = 392345, upload-time = "2026-02-02T12:35:48.088Z" }, + { url = "https://files.pythonhosted.org/packages/12/4c/05b8629ad546191939e6f0c2f17e29f542a398f4a52fb987bc70b6d1eb8b/jiter-0.13.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b34c519e17658ed88d5047999a93547f8889f3c1824120c26ad6be5f27b6cf5", size = 517775, upload-time = "2026-02-02T12:35:49.482Z" }, + { url = "https://files.pythonhosted.org/packages/4d/88/367ea2eb6bc582c7052e4baf5ddf57ebe5ab924a88e0e09830dfb585c02d/jiter-0.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2a6394e6af690d462310a86b53c47ad75ac8c21dc79f120714ea449979cb1d3", size = 551325, upload-time = "2026-02-02T12:35:51.104Z" }, + { url = "https://files.pythonhosted.org/packages/f3/12/fa377ffb94a2f28c41afaed093e0d70cfe512035d5ecb0cad0ae4792d35e/jiter-0.13.0-cp311-cp311-win32.whl", hash = "sha256:0f0c065695f616a27c920a56ad0d4fc46415ef8b806bf8fc1cacf25002bd24e1", size = 204709, upload-time = "2026-02-02T12:35:52.467Z" }, + { url = "https://files.pythonhosted.org/packages/cb/16/8e8203ce92f844dfcd3d9d6a5a7322c77077248dbb12da52d23193a839cd/jiter-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:0733312953b909688ae3c2d58d043aa040f9f1a6a75693defed7bc2cc4bf2654", size = 204560, upload-time = "2026-02-02T12:35:53.925Z" }, + { url = "https://files.pythonhosted.org/packages/44/26/97cc40663deb17b9e13c3a5cf29251788c271b18ee4d262c8f94798b8336/jiter-0.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:5d9b34ad56761b3bf0fbe8f7e55468704107608512350962d3317ffd7a4382d5", size = 189608, upload-time = "2026-02-02T12:35:55.304Z" }, + { url = "https://files.pythonhosted.org/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" }, + { url = "https://files.pythonhosted.org/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" }, + { url = "https://files.pythonhosted.org/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" }, + { url = "https://files.pythonhosted.org/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" }, + { url = "https://files.pythonhosted.org/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" }, + { url = "https://files.pythonhosted.org/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" }, + { url = "https://files.pythonhosted.org/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" }, + { url = "https://files.pythonhosted.org/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" }, + { url = "https://files.pythonhosted.org/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" }, + { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" }, + { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" }, + { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" }, + { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" }, + { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" }, + { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" }, + { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" }, + { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" }, + { url = "https://files.pythonhosted.org/packages/27/da/68f38d12e7111d2016cd198161b36e1f042bd115c169255bcb7ec823a3bf/jiter-0.13.0-cp313-cp313-win32.whl", hash = "sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72", size = 200630, upload-time = "2026-02-02T12:36:31.808Z" }, + { url = "https://files.pythonhosted.org/packages/25/65/3bd1a972c9a08ecd22eb3b08a95d1941ebe6938aea620c246cf426ae09c2/jiter-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc", size = 202602, upload-time = "2026-02-02T12:36:33.679Z" }, + { url = "https://files.pythonhosted.org/packages/15/fe/13bd3678a311aa67686bb303654792c48206a112068f8b0b21426eb6851e/jiter-0.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b", size = 185939, upload-time = "2026-02-02T12:36:35.065Z" }, + { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" }, + { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" }, + { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" }, + { url = "https://files.pythonhosted.org/packages/a2/ec/a4c72c822695fa80e55d2b4142b73f0012035d9fcf90eccc56bc060db37c/jiter-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d", size = 201950, upload-time = "2026-02-02T12:36:40.791Z" }, + { url = "https://files.pythonhosted.org/packages/b6/00/393553ec27b824fbc29047e9c7cd4a3951d7fbe4a76743f17e44034fa4e4/jiter-0.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d", size = 185852, upload-time = "2026-02-02T12:36:42.077Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" }, + { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" }, + { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" }, + { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" }, + { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" }, + { url = "https://files.pythonhosted.org/packages/18/05/981c9669d86850c5fbb0d9e62bba144787f9fba84546ba43d624ee27ef29/jiter-0.13.0-cp314-cp314-win32.whl", hash = "sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6", size = 202108, upload-time = "2026-02-02T12:37:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/cdcf54dd0b0341db7d25413229888a346c7130bd20820530905fdb65727b/jiter-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f", size = 204027, upload-time = "2026-02-02T12:37:03.075Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f9/724bcaaab7a3cd727031fe4f6995cb86c4bd344909177c186699c8dec51a/jiter-0.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d", size = 187199, upload-time = "2026-02-02T12:37:04.414Z" }, + { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" }, + { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" }, + { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" }, + { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" }, + { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" }, + { url = "https://files.pythonhosted.org/packages/47/66/eea81dfff765ed66c68fd2ed8c96245109e13c896c2a5015c7839c92367e/jiter-0.13.0-cp314-cp314t-win32.whl", hash = "sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6", size = 201196, upload-time = "2026-02-02T12:37:19.101Z" }, + { url = "https://files.pythonhosted.org/packages/ff/32/4ac9c7a76402f8f00d00842a7f6b83b284d0cf7c1e9d4227bc95aa6d17fa/jiter-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8", size = 204215, upload-time = "2026-02-02T12:37:20.495Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8e/7def204fea9f9be8b3c21a6f2dd6c020cf56c7d5ff753e0e23ed7f9ea57e/jiter-0.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024", size = 187152, upload-time = "2026-02-02T12:37:22.124Z" }, + { url = "https://files.pythonhosted.org/packages/79/b3/3c29819a27178d0e461a8571fb63c6ae38be6dc36b78b3ec2876bbd6a910/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b1cbfa133241d0e6bdab48dcdc2604e8ba81512f6bbd68ec3e8e1357dd3c316c", size = 307016, upload-time = "2026-02-02T12:37:42.755Z" }, + { url = "https://files.pythonhosted.org/packages/eb/ae/60993e4b07b1ac5ebe46da7aa99fdbb802eb986c38d26e3883ac0125c4e0/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:db367d8be9fad6e8ebbac4a7578b7af562e506211036cba2c06c3b998603c3d2", size = 305024, upload-time = "2026-02-02T12:37:44.774Z" }, + { url = "https://files.pythonhosted.org/packages/77/fa/2227e590e9cf98803db2811f172b2d6460a21539ab73006f251c66f44b14/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45f6f8efb2f3b0603092401dc2df79fa89ccbc027aaba4174d2d4133ed661434", size = 339337, upload-time = "2026-02-02T12:37:46.668Z" }, + { url = "https://files.pythonhosted.org/packages/2d/92/015173281f7eb96c0ef580c997da8ef50870d4f7f4c9e03c845a1d62ae04/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:597245258e6ad085d064780abfb23a284d418d3e61c57362d9449c6c7317ee2d", size = 346395, upload-time = "2026-02-02T12:37:48.09Z" }, + { url = "https://files.pythonhosted.org/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" }, + { url = "https://files.pythonhosted.org/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" }, + { url = "https://files.pythonhosted.org/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" }, + { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, +] + [[package]] name = "jmespath" version = "1.0.1" @@ -1921,14 +2242,14 @@ wheels = [ [[package]] name = "jsonpatch" -version = "1.32" +version = "1.33" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpointer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/21/67/83452af2a6db7c4596d1e2ecaa841b9a900980103013b867f2865e5e1cf0/jsonpatch-1.32.tar.gz", hash = "sha256:b6ddfe6c3db30d81a96aaeceb6baf916094ffa23d7dd5fa2c13e13f8b6e600c2", size = 20853, upload-time = "2021-03-13T19:16:38.264Z" } +sdist = { url = "https://files.pythonhosted.org/packages/42/78/18813351fe5d63acad16aec57f94ec2b70a09e53ca98145589e185423873/jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c", size = 21699, upload-time = "2023-06-26T12:07:29.144Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/55/f7c93bae36d869292aedfbcbae8b091386194874f16390d680136edd2b28/jsonpatch-1.32-py2.py3-none-any.whl", hash = "sha256:26ac385719ac9f54df8a2f0827bb8253aa3ea8ab7b3368457bcdb8c14595a397", size = 12547, upload-time = "2021-03-13T19:16:37.071Z" }, + { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898, upload-time = "2023-06-16T21:01:28.466Z" }, ] [[package]] @@ -2035,23 +2356,64 @@ wheels = [ [[package]] name = "kubernetes" -version = "26.1.0" +version = "35.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, - { name = "google-auth" }, + { name = "durationpy" }, { name = "python-dateutil" }, { name = "pyyaml" }, { name = "requests" }, { name = "requests-oauthlib" }, - { name = "setuptools" }, { name = "six" }, { name = "urllib3" }, { name = "websocket-client" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/34/19/2f351c0eaf05234dc33a6e0ffc7894e9dedab0ff341311c5b4ba44f2d8ac/kubernetes-26.1.0.tar.gz", hash = "sha256:5854b0c508e8d217ca205591384ab58389abdae608576f9c9afc35a3c76a366c", size = 736370, upload-time = "2023-02-16T01:04:37.088Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/8f/85bf51ad4150f64e8c665daf0d9dfe9787ae92005efb9a4d1cba592bd79d/kubernetes-35.0.0.tar.gz", hash = "sha256:3d00d344944239821458b9efd484d6df9f011da367ecb155dadf9513f05f09ee", size = 1094642, upload-time = "2026-01-16T01:05:27.76Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/70/05b685ea2dffcb2adbf3cdcea5d8865b7bc66f67249084cf845012a0ff13/kubernetes-35.0.0-py2.py3-none-any.whl", hash = "sha256:39e2b33b46e5834ef6c3985ebfe2047ab39135d41de51ce7641a7ca5b372a13d", size = 2017602, upload-time = "2026-01-16T01:05:25.991Z" }, +] + +[[package]] +name = "langchain-core" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonpatch" }, + { name = "langsmith" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e3/59/f72fd34b459e6f49ee522fcdb0b908be44b887f0e6c8679c5b85b719a60e/langchain_core-0.2.2.tar.gz", hash = "sha256:6884a87f7ac8e0d43e4d83c5f9efa95236c7bd535e22a0a51db19156875b4cd6", size = 242534, upload-time = "2024-05-29T19:56:32.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/e2/9d7ccae2e2b2983912d71eacf8626d7b5f186389cd82eaa2930896132feb/langchain_core-0.2.2-py3-none-any.whl", hash = "sha256:4b3b55a5f214acbcf8d6d8e322da3a9d6248d6b6f45ac1b86ab0494fd3716128", size = 309535, upload-time = "2024-05-29T19:56:30.44Z" }, +] + +[[package]] +name = "langgraph" +version = "0.0.69" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/e1/0124a5be9fb9a8cc9fcf5156de1aa0266207bb7d7331513335800cce2480/langgraph-0.0.69.tar.gz", hash = "sha256:25391195461e66783811f29dff34f071af8f3302382b80ac2fb524c12f0bc1f2", size = 68689, upload-time = "2024-06-14T18:59:04.041Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/74/21/ada0c5eedb678ab663f8e387734418fdd1a26be28fc919a0c32e52964047/kubernetes-26.1.0-py2.py3-none-any.whl", hash = "sha256:e3db6800abf7e36c38d2629b5cb6b74d10988ee0cba6fba45595a7cbe60c0042", size = 1446361, upload-time = "2023-02-16T01:04:34.33Z" }, + { url = "https://files.pythonhosted.org/packages/fd/38/1d68efc0a1cfd022de0dce98857c3a0d3c60f67e4010e238bae4ac7b45c6/langgraph-0.0.69-py3-none-any.whl", hash = "sha256:7eb628b25ed75d717c9521d98f147424df7dbd04cf0f12bfcf2b5c3122b04137", size = 86836, upload-time = "2024-06-14T18:59:01.886Z" }, +] + +[[package]] +name = "langsmith" +version = "0.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/78/05312ccd0925c914bfc58bdd0ca38da7a908c5c580964ea8000748e55255/langsmith-0.1.5.tar.gz", hash = "sha256:aa7a2861aa3d9ae563a077c622953533800466c4e2e539b0d567b84d5fd5b157", size = 58485, upload-time = "2024-02-21T01:57:59.523Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/ff/a330448e7f335298bed0129a71742ec8a96c41196a3d96304ed890ff25a7/langsmith-0.1.5-py3-none-any.whl", hash = "sha256:a1811821a923d90e53bcbacdd0988c3c366aff8f4c120d8777e7af8ecda06268", size = 61031, upload-time = "2024-02-21T01:57:56.778Z" }, ] [[package]] @@ -2195,6 +2557,120 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/e4/9f/004f810169a48ed5c520279d98327e7793b6491f09d42cb2c5636c994f34/minimal-snowplow-tracker-0.0.2.tar.gz", hash = "sha256:acabf7572db0e7f5cbf6983d495eef54081f71be392330eb3aadb9ccb39daaa4", size = 12542, upload-time = "2018-10-13T12:58:37.368Z" } +[[package]] +name = "mmh3" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/91/1a/edb23803a168f070ded7a3014c6d706f63b90c84ccc024f89d794a3b7a6d/mmh3-5.2.1.tar.gz", hash = "sha256:bbea5b775f0ac84945191fb83f845a6fd9a21a03ea7f2e187defac7e401616ad", size = 33775, upload-time = "2026-03-05T15:55:57.716Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/bb/88ee54afa5644b0f35ab5b435f208394feb963e5bb47c4e404deb625ffa4/mmh3-5.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5d87a3584093e1a89987e3d36d82c98d9621b2cb944e22a420aa1401e096758f", size = 56080, upload-time = "2026-03-05T15:53:40.452Z" }, + { url = "https://files.pythonhosted.org/packages/cc/bf/5404c2fd6ac84819e8ff1b7e34437b37cf55a2b11318894909e7bb88de3f/mmh3-5.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30e4d2084df019880d55f6f7bea35328d9b464ebee090baa372c096dc77556fb", size = 40462, upload-time = "2026-03-05T15:53:41.751Z" }, + { url = "https://files.pythonhosted.org/packages/de/0b/52bffad0b52ae4ea53e222b594bd38c08ecac1fc410323220a7202e43da5/mmh3-5.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0bbc17250b10d3466875a40a52520a6bac3c02334ca709207648abd3c223ed5c", size = 40077, upload-time = "2026-03-05T15:53:42.753Z" }, + { url = "https://files.pythonhosted.org/packages/a0/9e/326c93d425b9fa4cbcdc71bc32aaba520db37577d632a24d25d927594eca/mmh3-5.2.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:76219cd1eefb9bf4af7856e3ae563d15158efa145c0aab01e9933051a1954045", size = 95302, upload-time = "2026-03-05T15:53:43.867Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b1/e20d5f0d19c4c0f3df213fa7dcfa0942c4fb127d38e11f398ae8ddf6cccc/mmh3-5.2.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb9d44c25244e11c8be3f12c938ca8ba8404620ef8092245d2093c6ab3df260f", size = 101174, upload-time = "2026-03-05T15:53:45.194Z" }, + { url = "https://files.pythonhosted.org/packages/7f/4a/1a9bb3e33c18b1e1cee2c249a3053c4d4d9c93ecb30738f39a62249a7e86/mmh3-5.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2d5d542bf2abd0fd0361e8017d03f7cb5786214ceb4a40eef1539d6585d93386", size = 103979, upload-time = "2026-03-05T15:53:46.334Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/dab9ee7545429e7acdd38d23d0104471d31de09a0c695f1b751e0ff34532/mmh3-5.2.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:08043f7cb1fb9467c3fbbbaea7896986e7fbc81f4d3fd9289a73d9110ab6207a", size = 110898, upload-time = "2026-03-05T15:53:47.443Z" }, + { url = "https://files.pythonhosted.org/packages/72/08/408f11af7fe9e76b883142bb06536007cc7f237be2a5e9ad4e837716e627/mmh3-5.2.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:add7ac388d1e0bf57259afbcf9ed05621a3bf11ce5ee337e7536f1e1aaf056b0", size = 118308, upload-time = "2026-03-05T15:53:49.1Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/0551be7fe0000736d9ad12ffa1f130d7a0c17b49193d6dc41c82bd9404c6/mmh3-5.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:41105377f6282e8297f182e393a79cfffd521dde37ace52b106373bdcd9ca5cb", size = 101671, upload-time = "2026-03-05T15:53:50.317Z" }, + { url = "https://files.pythonhosted.org/packages/44/17/6e4f80c4e6ad590139fa2017c3aeca54e7cc9ef68e08aa142a0c90f40a97/mmh3-5.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3cb61db880ec11e984348227b333259994c2c85caa775eb7875decb3768db890", size = 96682, upload-time = "2026-03-05T15:53:51.48Z" }, + { url = "https://files.pythonhosted.org/packages/ad/a7/b82fccd38c1fa815de72e94ebe9874562964a10e21e6c1bc3b01d3f15a0e/mmh3-5.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e8b5378de2b139c3a830f0209c1e91f7705919a4b3e563a10955104f5097a70a", size = 110287, upload-time = "2026-03-05T15:53:52.68Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a1/2644069031c8cec0be46f0346f568a53f42fddd843f03cc890306699c1e2/mmh3-5.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e904f2417f0d6f6d514f3f8b836416c360f306ddaee1f84de8eef1e722d212e5", size = 111899, upload-time = "2026-03-05T15:53:53.791Z" }, + { url = "https://files.pythonhosted.org/packages/51/7b/6614f3eb8fb33f931fa7616c6d477247e48ec6c5082b02eeeee998cffa94/mmh3-5.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f1fbb0a99125b1287c6d9747f937dc66621426836d1a2d50d05aecfc81911b57", size = 100078, upload-time = "2026-03-05T15:53:55.234Z" }, + { url = "https://files.pythonhosted.org/packages/27/9a/dd4d5a5fb893e64f71b42b69ecae97dd78db35075412488b24036bc5599c/mmh3-5.2.1-cp310-cp310-win32.whl", hash = "sha256:b4cce60d0223074803c9dbe0721ad3fa51dafe7d462fee4b656a1aa01ee07518", size = 40756, upload-time = "2026-03-05T15:53:56.319Z" }, + { url = "https://files.pythonhosted.org/packages/c9/34/0b25889450f8aeffcec840aa73251e853f059c1b72ed1d1c027b956f95f5/mmh3-5.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:6f01f044112d43a20be2f13a11683666d87151542ad627fe41a18b9791d2802f", size = 41519, upload-time = "2026-03-05T15:53:57.41Z" }, + { url = "https://files.pythonhosted.org/packages/fd/31/8fd42e3c526d0bcb1db7f569c0de6729e180860a0495e387a53af33c2043/mmh3-5.2.1-cp310-cp310-win_arm64.whl", hash = "sha256:7501e9be34cb21e72fcfe672aafd0eee65c16ba2afa9dcb5500a587d3a0580f0", size = 39285, upload-time = "2026-03-05T15:53:58.697Z" }, + { url = "https://files.pythonhosted.org/packages/65/d7/3312a59df3c1cdd783f4cf0c4ee8e9decff9c5466937182e4cc7dbbfe6c5/mmh3-5.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae0f0bd7d30c0ad61b9a504e8e272cb8391eed3f1587edf933f4f6b33437450", size = 56082, upload-time = "2026-03-05T15:53:59.702Z" }, + { url = "https://files.pythonhosted.org/packages/61/96/6f617baa098ca0d2989bfec6d28b5719532cd8d8848782662f5b755f657f/mmh3-5.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9aeaf53eaa075dd63e81512522fd180097312fb2c9f476333309184285c49ce0", size = 40458, upload-time = "2026-03-05T15:54:01.548Z" }, + { url = "https://files.pythonhosted.org/packages/c1/b4/9cd284bd6062d711e13d26c04d4778ab3f690c1c38a4563e3c767ec8802e/mmh3-5.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0634581290e6714c068f4aa24020acf7880927d1f0084fa753d9799ae9610082", size = 40079, upload-time = "2026-03-05T15:54:02.743Z" }, + { url = "https://files.pythonhosted.org/packages/f6/09/a806334ce1d3d50bf782b95fcee8b3648e1e170327d4bb7b4bad2ad7d956/mmh3-5.2.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e080c0637aea036f35507e803a4778f119a9b436617694ae1c5c366805f1e997", size = 97242, upload-time = "2026-03-05T15:54:04.536Z" }, + { url = "https://files.pythonhosted.org/packages/ee/93/723e317dd9e041c4dc4566a2eb53b01ad94de31750e0b834f1643905e97c/mmh3-5.2.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:db0562c5f71d18596dcd45e854cf2eeba27d7543e1a3acdafb7eef728f7fe85d", size = 103082, upload-time = "2026-03-05T15:54:06.387Z" }, + { url = "https://files.pythonhosted.org/packages/61/b5/f96121e69cc48696075071531cf574f112e1ffd08059f4bffb41210e6fc5/mmh3-5.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d9f9a3ce559a5267014b04b82956993270f63ec91765e13e9fd73daf2d2738e", size = 106054, upload-time = "2026-03-05T15:54:07.506Z" }, + { url = "https://files.pythonhosted.org/packages/82/49/192b987ec48d0b2aecf8ac285a9b11fbc00030f6b9c694664ae923458dde/mmh3-5.2.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:960b1b3efa39872ac8b6cc3a556edd6fb90ed74f08c9c45e028f1005b26aa55d", size = 112910, upload-time = "2026-03-05T15:54:09.403Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a1/03e91fd334ed0144b83343a76eb11f17434cd08f746401488cfeafb2d241/mmh3-5.2.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d30b650595fdbe32366b94cb14f30bb2b625e512bd4e1df00611f99dc5c27fd4", size = 120551, upload-time = "2026-03-05T15:54:10.587Z" }, + { url = "https://files.pythonhosted.org/packages/93/b9/b89a71d2ff35c3a764d1c066c7313fc62c7cc48fa48a4b3b0304a4a0146f/mmh3-5.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82f3802bfc4751f420d591c5c864de538b71cea117fce67e4595c2afede08a15", size = 99096, upload-time = "2026-03-05T15:54:11.76Z" }, + { url = "https://files.pythonhosted.org/packages/36/b5/613772c1c6ed5f7b63df55eb131e887cc43720fec392777b95a79d34e640/mmh3-5.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:915e7a2418f10bd1151b1953df06d896db9783c9cfdb9a8ee1f9b3a4331ab503", size = 98524, upload-time = "2026-03-05T15:54:13.122Z" }, + { url = "https://files.pythonhosted.org/packages/5e/0e/1524566fe8eaf871e4f7bc44095929fcd2620488f402822d848df19d679c/mmh3-5.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:fc78739b5ec6e4fb02301984a3d442a91406e7700efbe305071e7fd1c78278f2", size = 106239, upload-time = "2026-03-05T15:54:14.601Z" }, + { url = "https://files.pythonhosted.org/packages/04/94/21adfa7d90a7a697137ad6de33eeff6445420ca55e433a5d4919c79bc3b5/mmh3-5.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:41aac7002a749f08727cb91babff1daf8deac317c0b1f317adc69be0e6c375d1", size = 109797, upload-time = "2026-03-05T15:54:15.819Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e6/1aacc3a219e1aa62fa65669995d4a3562b35be5200ec03680c7e4bec9676/mmh3-5.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9d8089d853c7963a8ce87fff93e2a67075c0bc08684a08ea6ad13577c38ffc38", size = 97228, upload-time = "2026-03-05T15:54:16.992Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b9/5e4cca8dcccf298add0a27f3c357bc8cf8baf821d35cdc6165e4bd5a48b0/mmh3-5.2.1-cp311-cp311-win32.whl", hash = "sha256:baeb47635cb33375dee4924cd93d7f5dcaa786c740b08423b0209b824a1ee728", size = 40751, upload-time = "2026-03-05T15:54:18.714Z" }, + { url = "https://files.pythonhosted.org/packages/72/fc/5b11d49247f499bcda591171e9cf3b6ee422b19e70aa2cef2e0ae65ca3b9/mmh3-5.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:1e4ecee40ba19e6975e1120829796770325841c2f153c0e9aecca927194c6a2a", size = 41517, upload-time = "2026-03-05T15:54:19.764Z" }, + { url = "https://files.pythonhosted.org/packages/8a/5f/2a511ee8a1c2a527c77726d5231685b72312c5a1a1b7639ad66a9652aa84/mmh3-5.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:c302245fd6c33d96bd169c7ccf2513c20f4c1e417c07ce9dce107c8bc3f8411f", size = 39287, upload-time = "2026-03-05T15:54:20.904Z" }, + { url = "https://files.pythonhosted.org/packages/92/94/bc5c3b573b40a328c4d141c20e399039ada95e5e2a661df3425c5165fd84/mmh3-5.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0cc21533878e5586b80d74c281d7f8da7932bc8ace50b8d5f6dbf7e3935f63f1", size = 56087, upload-time = "2026-03-05T15:54:21.92Z" }, + { url = "https://files.pythonhosted.org/packages/f6/80/64a02cc3e95c3af0aaa2590849d9ed24a9f14bb93537addde688e039b7c3/mmh3-5.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4eda76074cfca2787c8cf1bec603eaebdddd8b061ad5502f85cddae998d54f00", size = 40500, upload-time = "2026-03-05T15:54:22.953Z" }, + { url = "https://files.pythonhosted.org/packages/8b/72/e6d6602ce18adf4ddcd0e48f2e13590cc92a536199e52109f46f259d3c46/mmh3-5.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:eee884572b06bbe8a2b54f424dbd996139442cf83c76478e1ec162512e0dd2c7", size = 40034, upload-time = "2026-03-05T15:54:23.943Z" }, + { url = "https://files.pythonhosted.org/packages/59/c2/bf4537a8e58e21886ef16477041238cab5095c836496e19fafc34b7445d2/mmh3-5.2.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d0b7e803191db5f714d264044e06189c8ccd3219e936cc184f07106bd17fd7b", size = 97292, upload-time = "2026-03-05T15:54:25.335Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e2/51ed62063b44d10b06d975ac87af287729eeb5e3ed9772f7584a17983e90/mmh3-5.2.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e6c219e375f6341d0959af814296372d265a8ca1af63825f65e2e87c618f006", size = 103274, upload-time = "2026-03-05T15:54:26.44Z" }, + { url = "https://files.pythonhosted.org/packages/75/ce/12a7524dca59eec92e5b31fdb13ede1e98eda277cf2b786cf73bfbc24e81/mmh3-5.2.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:26fb5b9c3946bf7f1daed7b37e0c03898a6f062149127570f8ede346390a0825", size = 106158, upload-time = "2026-03-05T15:54:28.578Z" }, + { url = "https://files.pythonhosted.org/packages/86/1f/d3ba6dd322d01ab5d44c46c8f0c38ab6bbbf9b5e20e666dfc05bf4a23604/mmh3-5.2.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3c38d142c706201db5b2345166eeef1e7740e3e2422b470b8ba5c8727a9b4c7a", size = 113005, upload-time = "2026-03-05T15:54:29.767Z" }, + { url = "https://files.pythonhosted.org/packages/b6/a9/15d6b6f913294ea41b44d901741298e3718e1cb89ee626b3694625826a43/mmh3-5.2.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50885073e2909251d4718634a191c49ae5f527e5e1736d738e365c3e8be8f22b", size = 120744, upload-time = "2026-03-05T15:54:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/70b73923fd0284c439860ff5c871b20210dfdbe9a6b9dd0ee6496d77f174/mmh3-5.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b3f99e1756fc48ad507b95e5d86f2fb21b3d495012ff13e6592ebac14033f166", size = 99111, upload-time = "2026-03-05T15:54:32.353Z" }, + { url = "https://files.pythonhosted.org/packages/dd/38/99f7f75cd27d10d8b899a1caafb9d531f3903e4d54d572220e3d8ac35e89/mmh3-5.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:62815d2c67f2dd1be76a253d88af4e1da19aeaa1820146dec52cf8bee2958b16", size = 98623, upload-time = "2026-03-05T15:54:33.801Z" }, + { url = "https://files.pythonhosted.org/packages/fd/68/6e292c0853e204c44d2f03ea5f090be3317a0e2d9417ecb62c9eb27687df/mmh3-5.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8f767ba0911602ddef289404e33835a61168314ebd3c729833db2ed685824211", size = 106437, upload-time = "2026-03-05T15:54:35.177Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c6/fedd7284c459cfb58721d461fcf5607a4c1f5d9ab195d113d51d10164d16/mmh3-5.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:67e41a497bac88cc1de96eeba56eeb933c39d54bc227352f8455aa87c4ca4000", size = 110002, upload-time = "2026-03-05T15:54:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ac/ca8e0c19a34f5b71390171d2ff0b9f7f187550d66801a731bb68925126a4/mmh3-5.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d74a03fb57757ece25aa4b3c1c60157a1cece37a020542785f942e2f827eed5", size = 97507, upload-time = "2026-03-05T15:54:37.804Z" }, + { url = "https://files.pythonhosted.org/packages/df/94/6ebb9094cfc7ac5e7950776b9d13a66bb4a34f83814f32ba2abc9494fc68/mmh3-5.2.1-cp312-cp312-win32.whl", hash = "sha256:7374d6e3ef72afe49697ecd683f3da12f4fc06af2d75433d0580c6746d2fa025", size = 40773, upload-time = "2026-03-05T15:54:40.077Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/cd3527198cf159495966551c84a5f36805a10ac17b294f41f67b83f6a4d6/mmh3-5.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:3a9fed49c6ce4ed7e73f13182760c65c816da006debe67f37635580dfb0fae00", size = 41560, upload-time = "2026-03-05T15:54:41.148Z" }, + { url = "https://files.pythonhosted.org/packages/15/96/6fe5ebd0f970a076e3ed5512871ce7569447b962e96c125528a2f9724470/mmh3-5.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:bbfcb95d9a744e6e2827dfc66ad10e1020e0cac255eb7f85652832d5a264c2fc", size = 39313, upload-time = "2026-03-05T15:54:42.171Z" }, + { url = "https://files.pythonhosted.org/packages/25/a5/9daa0508a1569a54130f6198d5462a92deda870043624aa3ea72721aa765/mmh3-5.2.1-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:723b2681ed4cc07d3401bbea9c201ad4f2a4ca6ba8cddaff6789f715dd2b391e", size = 40832, upload-time = "2026-03-05T15:54:43.212Z" }, + { url = "https://files.pythonhosted.org/packages/0a/6b/3230c6d80c1f4b766dedf280a92c2241e99f87c1504ff74205ec8cebe451/mmh3-5.2.1-cp313-cp313-android_21_x86_64.whl", hash = "sha256:3619473a0e0d329fd4aec8075628f8f616be2da41605300696206d6f36920c3d", size = 41964, upload-time = "2026-03-05T15:54:44.204Z" }, + { url = "https://files.pythonhosted.org/packages/62/fb/648bfddb74a872004b6ee751551bfdda783fe6d70d2e9723bad84dbe5311/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:e48d4dbe0f88e53081da605ae68644e5182752803bbc2beb228cca7f1c4454d6", size = 39114, upload-time = "2026-03-05T15:54:45.205Z" }, + { url = "https://files.pythonhosted.org/packages/95/c2/ab7901f87af438468b496728d11264cb397b3574d41506e71b92128e0373/mmh3-5.2.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a482ac121de6973897c92c2f31defc6bafb11c83825109275cffce54bb64933f", size = 39819, upload-time = "2026-03-05T15:54:46.509Z" }, + { url = "https://files.pythonhosted.org/packages/2f/ed/6f88dda0df67de1612f2e130ffea34cf84aaee5bff5b0aff4dbff2babe34/mmh3-5.2.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:17fbb47f0885ace8327ce1235d0416dc86a211dcd8cc1e703f41523be32cfec8", size = 40330, upload-time = "2026-03-05T15:54:47.864Z" }, + { url = "https://files.pythonhosted.org/packages/3d/66/7516d23f53cdf90f43fce24ab80c28f45e6851d78b46bef8c02084edf583/mmh3-5.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d51fde50a77f81330523562e3c2734ffdca9c4c9e9d355478117905e1cfe16c6", size = 56078, upload-time = "2026-03-05T15:54:48.9Z" }, + { url = "https://files.pythonhosted.org/packages/bc/34/4d152fdf4a91a132cb226b671f11c6b796eada9ab78080fb5ce1e95adaab/mmh3-5.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:19bbd3b841174ae6ed588536ab5e1b1fe83d046e668602c20266547298d939a9", size = 40498, upload-time = "2026-03-05T15:54:49.942Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4c/8e3af1b6d85a299767ec97bd923f12b06267089c1472c27c1696870d1175/mmh3-5.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be77c402d5e882b6fbacfd90823f13da8e0a69658405a39a569c6b58fdb17b03", size = 40033, upload-time = "2026-03-05T15:54:50.994Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/966ea560e32578d453c9e9db53d602cbb1d0da27317e232afa7c38ceba11/mmh3-5.2.1-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:fd96476f04db5ceba1cfa0f21228f67c1f7402296f0e73fee3513aa680ad237b", size = 97320, upload-time = "2026-03-05T15:54:52.072Z" }, + { url = "https://files.pythonhosted.org/packages/bb/0d/2c5f9893b38aeb6b034d1a44ecd55a010148054f6a516abe53b5e4057297/mmh3-5.2.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:707151644085dd0f20fe4f4b573d28e5130c4aaa5f587e95b60989c5926653b5", size = 103299, upload-time = "2026-03-05T15:54:53.569Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fc/2ebaef4a4d4376f89761274dc274035ffd96006ab496b4ee5af9b08f21a9/mmh3-5.2.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3737303ca9ea0f7cb83028781148fcda4f1dac7821db0c47672971dabcf63593", size = 106222, upload-time = "2026-03-05T15:54:55.092Z" }, + { url = "https://files.pythonhosted.org/packages/57/09/ea7ffe126d0ba0406622602a2d05e1e1a6841cc92fc322eb576c95b27fad/mmh3-5.2.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2778fed822d7db23ac5008b181441af0c869455b2e7d001f4019636ac31b6fe4", size = 113048, upload-time = "2026-03-05T15:54:56.305Z" }, + { url = "https://files.pythonhosted.org/packages/85/57/9447032edf93a64aa9bef4d9aa596400b1756f40411890f77a284f6293ca/mmh3-5.2.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d57dea657357230cc780e13920d7fa7db059d58fe721c80020f94476da4ca0a1", size = 120742, upload-time = "2026-03-05T15:54:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/53/82/a86cc87cc88c92e9e1a598fee509f0409435b57879a6129bf3b3e40513c7/mmh3-5.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:169e0d178cb59314456ab30772429a802b25d13227088085b0d49b9fe1533104", size = 99132, upload-time = "2026-03-05T15:54:58.583Z" }, + { url = "https://files.pythonhosted.org/packages/54/f7/6b16eb1b40ee89bb740698735574536bc20d6cdafc65ae702ea235578e05/mmh3-5.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7e4e1f580033335c6f76d1e0d6b56baf009d1a64d6a4816347e4271ba951f46d", size = 98686, upload-time = "2026-03-05T15:55:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/e8/88/a601e9f32ad1410f438a6d0544298ea621f989bd34a0731a7190f7dec799/mmh3-5.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2bd9f19f7f1fcebd74e830f4af0f28adad4975d40d80620be19ffb2b2af56c9f", size = 106479, upload-time = "2026-03-05T15:55:01.532Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5c/ce29ae3dfc4feec4007a437a1b7435fb9507532a25147602cd5b52be86db/mmh3-5.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c88653877aeb514c089d1b3d473451677b8b9a6d1497dbddf1ae7934518b06d2", size = 110030, upload-time = "2026-03-05T15:55:02.934Z" }, + { url = "https://files.pythonhosted.org/packages/13/30/ae444ef2ff87c805d525da4fa63d27cda4fe8a48e77003a036b8461cfd5c/mmh3-5.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fceef7fe67c81e1585198215e42ad3fdba3a25644beda8fbdaf85f4d7b93175a", size = 97536, upload-time = "2026-03-05T15:55:04.135Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f9/dc3787ee5c813cc27fe79f45ad4500d9b5437f23a7402435cc34e07c7718/mmh3-5.2.1-cp313-cp313-win32.whl", hash = "sha256:54b64fb2433bc71488e7a449603bf8bd31fbcf9cb56fbe1eb6d459e90b86c37b", size = 40769, upload-time = "2026-03-05T15:55:05.277Z" }, + { url = "https://files.pythonhosted.org/packages/43/67/850e0b5a1e97799822ebfc4ca0e8c6ece3ed8baf7dcdf64de817dfdda2ca/mmh3-5.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:cae6383181f1e345317742d2ddd88f9e7d2682fa4c9432e3a74e47d92dce0229", size = 41563, upload-time = "2026-03-05T15:55:06.283Z" }, + { url = "https://files.pythonhosted.org/packages/c0/cc/98c90b28e1da5458e19fbfaf4adb5289208d3bfccd45dd14eab216a2f0bb/mmh3-5.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:022aa1a528604e6c83d0a7705fdef0b5355d897a9e0fa3a8d26709ceaa06965d", size = 39310, upload-time = "2026-03-05T15:55:07.323Z" }, + { url = "https://files.pythonhosted.org/packages/63/b4/65bc1fb2bb7f83e91c30865023b1847cf89a5f237165575e8c83aa536584/mmh3-5.2.1-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:d771f085fcdf4035786adfb1d8db026df1eb4b41dac1c3d070d1e49512843227", size = 40794, upload-time = "2026-03-05T15:55:09.773Z" }, + { url = "https://files.pythonhosted.org/packages/c4/86/7168b3d83be8eb553897b1fac9da8bbb06568e5cfe555ffc329ebb46f59d/mmh3-5.2.1-cp314-cp314-android_24_x86_64.whl", hash = "sha256:7f196cd7910d71e9d9860da0ff7a77f64d22c1ad931f1dd18559a06e03109fc0", size = 41923, upload-time = "2026-03-05T15:55:10.924Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9b/b653ab611c9060ce8ff0ba25c0226757755725e789292f3ca138a58082cd/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:b1f12bd684887a0a5d55e6363ca87056f361e45451105012d329b86ec19dbe0b", size = 39131, upload-time = "2026-03-05T15:55:11.961Z" }, + { url = "https://files.pythonhosted.org/packages/9b/b4/5a2e0d34ab4d33543f01121e832395ea510132ea8e52cdf63926d9d81754/mmh3-5.2.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d106493a60dcb4aef35a0fac85105e150a11cf8bc2b0d388f5a33272d756c966", size = 39825, upload-time = "2026-03-05T15:55:13.013Z" }, + { url = "https://files.pythonhosted.org/packages/bd/69/81699a8f39a3f8d368bec6443435c0c392df0d200ad915bf0d222b588e03/mmh3-5.2.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:44983e45310ee5b9f73397350251cdf6e63a466406a105f1d16cb5baa659270b", size = 40344, upload-time = "2026-03-05T15:55:14.026Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b3/71c8c775807606e8fd8acc5c69016e1caf3200d50b50b6dd4b40ce10b76c/mmh3-5.2.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:368625fb01666655985391dbad3860dc0ba7c0d6b9125819f3121ee7292b4ac8", size = 56291, upload-time = "2026-03-05T15:55:15.137Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/2c24517d4b2ce9e4917362d24f274d3d541346af764430249ddcc4cb3a08/mmh3-5.2.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:72d1cc63bcc91e14933f77d51b3df899d6a07d184ec515ea7f56bff659e124d7", size = 40575, upload-time = "2026-03-05T15:55:16.518Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/e4a360164365ac9f07a25f0f7928e3a66eb9ecc989384060747aa170e6aa/mmh3-5.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e8b4b5580280b9265af3e0409974fb79c64cf7523632d03fbf11df18f8b0181e", size = 40052, upload-time = "2026-03-05T15:55:17.735Z" }, + { url = "https://files.pythonhosted.org/packages/97/ca/120d92223a7546131bbbc31c9174168ee7a73b1366f5463ffe69d9e691fe/mmh3-5.2.1-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:4cbbde66f1183db040daede83dd86c06d663c5bb2af6de1142b7c8c37923dd74", size = 97311, upload-time = "2026-03-05T15:55:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/b6/71/c1a60c1652b8813ef9de6d289784847355417ee0f2980bca002fe87f4ae5/mmh3-5.2.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8ff038d52ef6aa0f309feeba00c5095c9118d0abf787e8e8454d6048db2037fc", size = 103279, upload-time = "2026-03-05T15:55:20.448Z" }, + { url = "https://files.pythonhosted.org/packages/48/29/ad97f4be1509cdcb28ae32c15593ce7c415db47ace37f8fad35b493faa9a/mmh3-5.2.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a4130d0b9ce5fad6af07421b1aecc7e079519f70d6c05729ab871794eded8617", size = 106290, upload-time = "2026-03-05T15:55:21.6Z" }, + { url = "https://files.pythonhosted.org/packages/77/29/1f86d22e281bd8827ba373600a4a8b0c0eae5ca6aa55b9a8c26d2a34decc/mmh3-5.2.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6e0bfe77d238308839699944164b96a2eeccaf55f2af400f54dc20669d8d5f2", size = 113116, upload-time = "2026-03-05T15:55:22.826Z" }, + { url = "https://files.pythonhosted.org/packages/a7/7c/339971ea7ed4c12d98f421f13db3ea576a9114082ccb59d2d1a0f00ccac1/mmh3-5.2.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f963eafc0a77a6c0562397da004f5876a9bcf7265a7bcc3205e29636bc4a1312", size = 120740, upload-time = "2026-03-05T15:55:24.3Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/3c7c4bdb8e926bb3c972d1e2907d77960c1c4b250b41e8366cf20c6e4373/mmh3-5.2.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:92883836caf50d5255be03d988d75bc93e3f86ba247b7ca137347c323f731deb", size = 99143, upload-time = "2026-03-05T15:55:25.456Z" }, + { url = "https://files.pythonhosted.org/packages/df/0a/33dd8706e732458c8375eae63c981292de07a406bad4ec03e5269654aa2c/mmh3-5.2.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:57b52603e89355ff318025dd55158f6e71396c0f1f609d548e9ea9c94cc6ce0a", size = 98703, upload-time = "2026-03-05T15:55:26.723Z" }, + { url = "https://files.pythonhosted.org/packages/51/04/76bbce05df76cbc3d396f13b2ea5b1578ef02b6a5187e132c6c33f99d596/mmh3-5.2.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f40a95186a72fa0b67d15fef0f157bfcda00b4f59c8a07cbe5530d41ac35d105", size = 106484, upload-time = "2026-03-05T15:55:28.214Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8f/c6e204a2c70b719c1f62ffd9da27aef2dddcba875ea9c31ca0e87b975a46/mmh3-5.2.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:58370d05d033ee97224c81263af123dea3d931025030fd34b61227a768a8858a", size = 110012, upload-time = "2026-03-05T15:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/7181efd8e39db386c1ebc3e6b7d1f702a09d7c1197a6f2742ed6b5c16597/mmh3-5.2.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7be6dfb49e48fd0a7d91ff758a2b51336f1cd21f9d44b20f6801f072bd080cdd", size = 97508, upload-time = "2026-03-05T15:55:31.01Z" }, + { url = "https://files.pythonhosted.org/packages/42/0f/afa7ca2615fd85e1469474bb860e381443d0b868c083b62b41cb1d7ca32f/mmh3-5.2.1-cp314-cp314-win32.whl", hash = "sha256:54fe8518abe06a4c3852754bfd498b30cc58e667f376c513eac89a244ce781a4", size = 41387, upload-time = "2026-03-05T15:55:32.403Z" }, + { url = "https://files.pythonhosted.org/packages/71/0d/46d42a260ee1357db3d486e6c7a692e303c017968e14865e00efa10d09fc/mmh3-5.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:3f796b535008708846044c43302719c6956f39ca2d93f2edda5319e79a29efbb", size = 42101, upload-time = "2026-03-05T15:55:33.646Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7b/848a8378059d96501a41159fca90d6a99e89736b0afbe8e8edffeac8c74b/mmh3-5.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:cd471ede0d802dd936b6fab28188302b2d497f68436025857ca72cd3810423fe", size = 39836, upload-time = "2026-03-05T15:55:35.026Z" }, + { url = "https://files.pythonhosted.org/packages/27/61/1dabea76c011ba8547c25d30c91c0ec22544487a8750997a27a0c9e1180b/mmh3-5.2.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:5174a697ce042fa77c407e05efe41e03aa56dae9ec67388055820fb48cf4c3ba", size = 57727, upload-time = "2026-03-05T15:55:36.162Z" }, + { url = "https://files.pythonhosted.org/packages/b7/32/731185950d1cf2d5e28979cc8593016ba1619a295faba10dda664a4931b5/mmh3-5.2.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:0a3984146e414684a6be2862d84fcb1035f4984851cb81b26d933bab6119bf00", size = 41308, upload-time = "2026-03-05T15:55:37.254Z" }, + { url = "https://files.pythonhosted.org/packages/76/aa/66c76801c24b8c9418b4edde9b5e57c75e72c94e29c48f707e3962534f18/mmh3-5.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:bd6e7d363aa93bd3421b30b6af97064daf47bc96005bddba67c5ffbc6df426b8", size = 40758, upload-time = "2026-03-05T15:55:38.61Z" }, + { url = "https://files.pythonhosted.org/packages/9e/bb/79a1f638a02f0ae389f706d13891e2fbf7d8c0a22ecde67ba828951bb60a/mmh3-5.2.1-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:113f78e7463a36dbbcea05bfe688efd7fa759d0f0c56e73c974d60dcfec3dfcc", size = 109670, upload-time = "2026-03-05T15:55:40.13Z" }, + { url = "https://files.pythonhosted.org/packages/26/94/8cd0e187a288985bcfc79bf5144d1d712df9dee74365f59d26e3a1865be6/mmh3-5.2.1-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e8ec5f606e0809426d2440e0683509fb605a8820a21ebd120dcdba61b74ef7f", size = 117399, upload-time = "2026-03-05T15:55:42.076Z" }, + { url = "https://files.pythonhosted.org/packages/42/94/dfea6059bd5c5beda565f58a4096e43f4858fb6d2862806b8bbd12cbb284/mmh3-5.2.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22b0f9971ec4e07e8223f2beebe96a6cfc779d940b6f27d26604040dd74d3a44", size = 120386, upload-time = "2026-03-05T15:55:43.481Z" }, + { url = "https://files.pythonhosted.org/packages/47/cb/f9c45e62aaa67220179f487772461d891bb582bb2f9783c944832c60efd9/mmh3-5.2.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:85ffc9920ffc39c5eee1e3ac9100c913a0973996fbad5111f939bbda49204bb7", size = 125924, upload-time = "2026-03-05T15:55:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/a5/83/fe54a4a7c11bc9f623dfc1707decd034245602b076dfc1dcc771a4163170/mmh3-5.2.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7aec798c2b01aaa65a55f1124f3405804184373abb318a3091325aece235f67c", size = 135280, upload-time = "2026-03-05T15:55:45.866Z" }, + { url = "https://files.pythonhosted.org/packages/97/67/fe7e9e9c143daddd210cd22aef89cbc425d58ecf238d2b7d9eb0da974105/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:55dbbd8ffbc40d1697d5e2d0375b08599dae8746b0b08dea05eee4ce81648fac", size = 110050, upload-time = "2026-03-05T15:55:47.074Z" }, + { url = "https://files.pythonhosted.org/packages/43/c4/6d4b09fcbef80794de447c9378e39eefc047156b290fa3dd2d5257ca8227/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:6c85c38a279ca9295a69b9b088a2e48aa49737bb1b34e6a9dc6297c110e8d912", size = 111158, upload-time = "2026-03-05T15:55:48.239Z" }, + { url = "https://files.pythonhosted.org/packages/81/a6/ca51c864bdb30524beb055a6d8826db3906af0834ec8c41d097a6e8573d5/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:6290289fa5fb4c70fd7f72016e03633d60388185483ff3b162912c81205ae2cf", size = 116890, upload-time = "2026-03-05T15:55:49.405Z" }, + { url = "https://files.pythonhosted.org/packages/cc/04/5a1fe2e2ad843d03e89af25238cbc4f6840a8bb6c4329a98ab694c71deda/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:4fc6cd65dc4d2fdb2625e288939a3566e36127a84811a4913f02f3d5931da52d", size = 123121, upload-time = "2026-03-05T15:55:50.61Z" }, + { url = "https://files.pythonhosted.org/packages/af/4d/3c820c6f4897afd25905270a9f2330a23f77a207ea7356f7aadace7273c0/mmh3-5.2.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:623f938f6a039536cc02b7582a07a080f13fdfd48f87e63201d92d7e34d09a18", size = 110187, upload-time = "2026-03-05T15:55:52.143Z" }, + { url = "https://files.pythonhosted.org/packages/21/54/1d71cd143752361c0aebef16ad3f55926a6faf7b112d355745c1f8a25f7f/mmh3-5.2.1-cp314-cp314t-win32.whl", hash = "sha256:29bc3973676ae334412efdd367fcd11d036b7be3efc1ce2407ef8676dabfeb82", size = 41934, upload-time = "2026-03-05T15:55:53.564Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e4/63a2a88f31d93dea03947cccc2a076946857e799ea4f7acdecbf43b324aa/mmh3-5.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:28cfab66577000b9505a0d068c731aee7ca85cd26d4d63881fab17857e0fe1fb", size = 43036, upload-time = "2026-03-05T15:55:55.252Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0f/59204bf136d1201f8d7884cfbaf7498c5b4674e87a4c693f9bde63741ce1/mmh3-5.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:dfd51b4c56b673dfbc43d7d27ef857dd91124801e2806c69bb45585ce0fa019b", size = 40391, upload-time = "2026-03-05T15:55:56.697Z" }, +] + [[package]] name = "monotonic" version = "1.6" @@ -2213,6 +2689,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/ff/1ad78678bee731ae5414ea5e97396b3f91de32186028daa614d322ac5a8b/more_itertools-8.14.0-py3-none-any.whl", hash = "sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2", size = 52193, upload-time = "2022-08-09T14:04:02.452Z" }, ] +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + [[package]] name = "msgpack" version = "1.0.5" @@ -2397,6 +2882,186 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688, upload-time = "2022-10-17T20:04:24.037Z" }, ] +[[package]] +name = "onnxruntime" +version = "1.20.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coloredlogs" }, + { name = "flatbuffers" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "protobuf" }, + { name = "sympy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/28/99f903b0eb1cd6f3faa0e343217d9fb9f47b84bca98bd9859884631336ee/onnxruntime-1.20.1-cp310-cp310-macosx_13_0_universal2.whl", hash = "sha256:e50ba5ff7fed4f7d9253a6baf801ca2883cc08491f9d32d78a80da57256a5439", size = 30996314, upload-time = "2024-11-21T00:48:31.43Z" }, + { url = "https://files.pythonhosted.org/packages/6d/c6/c4c0860bee2fde6037bdd9dcd12d323f6e38cf00fcc9a5065b394337fc55/onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b2908b50101a19e99c4d4e97ebb9905561daf61829403061c1adc1b588bc0de", size = 11954010, upload-time = "2024-11-21T00:48:35.254Z" }, + { url = "https://files.pythonhosted.org/packages/63/47/3dc0b075ab539f16b3d8b09df6b504f51836086ee709690a6278d791737d/onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d82daaec24045a2e87598b8ac2b417b1cce623244e80e663882e9fe1aae86410", size = 13330452, upload-time = "2024-11-21T00:48:40.02Z" }, + { url = "https://files.pythonhosted.org/packages/27/ef/80fab86289ecc01a734b7ddf115dfb93d8b2e004bd1e1977e12881c72b12/onnxruntime-1.20.1-cp310-cp310-win32.whl", hash = "sha256:4c4b251a725a3b8cf2aab284f7d940c26094ecd9d442f07dd81ab5470e99b83f", size = 9813849, upload-time = "2024-11-21T00:48:43.569Z" }, + { url = "https://files.pythonhosted.org/packages/a9/e6/33ab10066c9875a29d55e66ae97c3bf91b9b9b987179455d67c32261a49c/onnxruntime-1.20.1-cp310-cp310-win_amd64.whl", hash = "sha256:d3b616bb53a77a9463707bb313637223380fc327f5064c9a782e8ec69c22e6a2", size = 11329702, upload-time = "2024-11-21T00:48:46.599Z" }, + { url = "https://files.pythonhosted.org/packages/95/8d/2634e2959b34aa8a0037989f4229e9abcfa484e9c228f99633b3241768a6/onnxruntime-1.20.1-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:06bfbf02ca9ab5f28946e0f912a562a5f005301d0c419283dc57b3ed7969bb7b", size = 30998725, upload-time = "2024-11-21T00:48:51.013Z" }, + { url = "https://files.pythonhosted.org/packages/a5/da/c44bf9bd66cd6d9018a921f053f28d819445c4d84b4dd4777271b0fe52a2/onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f6243e34d74423bdd1edf0ae9596dd61023b260f546ee17d701723915f06a9f7", size = 11955227, upload-time = "2024-11-21T00:48:54.556Z" }, + { url = "https://files.pythonhosted.org/packages/11/ac/4120dfb74c8e45cce1c664fc7f7ce010edd587ba67ac41489f7432eb9381/onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5eec64c0269dcdb8d9a9a53dc4d64f87b9e0c19801d9321246a53b7eb5a7d1bc", size = 13331703, upload-time = "2024-11-21T00:48:57.97Z" }, + { url = "https://files.pythonhosted.org/packages/12/f1/cefacac137f7bb7bfba57c50c478150fcd3c54aca72762ac2c05ce0532c1/onnxruntime-1.20.1-cp311-cp311-win32.whl", hash = "sha256:a19bc6e8c70e2485a1725b3d517a2319603acc14c1f1a017dda0afe6d4665b41", size = 9813977, upload-time = "2024-11-21T00:49:00.519Z" }, + { url = "https://files.pythonhosted.org/packages/2c/2d/2d4d202c0bcfb3a4cc2b171abb9328672d7f91d7af9ea52572722c6d8d96/onnxruntime-1.20.1-cp311-cp311-win_amd64.whl", hash = "sha256:8508887eb1c5f9537a4071768723ec7c30c28eb2518a00d0adcd32c89dea3221", size = 11329895, upload-time = "2024-11-21T00:49:03.845Z" }, + { url = "https://files.pythonhosted.org/packages/e5/39/9335e0874f68f7d27103cbffc0e235e32e26759202df6085716375c078bb/onnxruntime-1.20.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:22b0655e2bf4f2161d52706e31f517a0e54939dc393e92577df51808a7edc8c9", size = 31007580, upload-time = "2024-11-21T00:49:07.029Z" }, + { url = "https://files.pythonhosted.org/packages/c5/9d/a42a84e10f1744dd27c6f2f9280cc3fb98f869dd19b7cd042e391ee2ab61/onnxruntime-1.20.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f56e898815963d6dc4ee1c35fc6c36506466eff6d16f3cb9848cea4e8c8172", size = 11952833, upload-time = "2024-11-21T00:49:10.563Z" }, + { url = "https://files.pythonhosted.org/packages/47/42/2f71f5680834688a9c81becbe5c5bb996fd33eaed5c66ae0606c3b1d6a02/onnxruntime-1.20.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb71a814f66517a65628c9e4a2bb530a6edd2cd5d87ffa0af0f6f773a027d99e", size = 13333903, upload-time = "2024-11-21T00:49:12.984Z" }, + { url = "https://files.pythonhosted.org/packages/c8/f1/aabfdf91d013320aa2fc46cf43c88ca0182860ff15df872b4552254a9680/onnxruntime-1.20.1-cp312-cp312-win32.whl", hash = "sha256:bd386cc9ee5f686ee8a75ba74037750aca55183085bf1941da8efcfe12d5b120", size = 9814562, upload-time = "2024-11-21T00:49:15.453Z" }, + { url = "https://files.pythonhosted.org/packages/dd/80/76979e0b744307d488c79e41051117634b956612cc731f1028eb17ee7294/onnxruntime-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:19c2d843eb074f385e8bbb753a40df780511061a63f9def1b216bf53860223fb", size = 11331482, upload-time = "2024-11-21T00:49:19.412Z" }, + { url = "https://files.pythonhosted.org/packages/f7/71/c5d980ac4189589267a06f758bd6c5667d07e55656bed6c6c0580733ad07/onnxruntime-1.20.1-cp313-cp313-macosx_13_0_universal2.whl", hash = "sha256:cc01437a32d0042b606f462245c8bbae269e5442797f6213e36ce61d5abdd8cc", size = 31007574, upload-time = "2024-11-21T00:49:23.225Z" }, + { url = "https://files.pythonhosted.org/packages/81/0d/13bbd9489be2a6944f4a940084bfe388f1100472f38c07080a46fbd4ab96/onnxruntime-1.20.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb44b08e017a648924dbe91b82d89b0c105b1adcfe31e90d1dc06b8677ad37be", size = 11951459, upload-time = "2024-11-21T00:49:26.269Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ea/4454ae122874fd52bbb8a961262de81c5f932edeb1b72217f594c700d6ef/onnxruntime-1.20.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bda6aebdf7917c1d811f21d41633df00c58aff2bef2f598f69289c1f1dabc4b3", size = 13331620, upload-time = "2024-11-21T00:49:28.875Z" }, + { url = "https://files.pythonhosted.org/packages/d8/e0/50db43188ca1c945decaa8fc2a024c33446d31afed40149897d4f9de505f/onnxruntime-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:d30367df7e70f1d9fc5a6a68106f5961686d39b54d3221f760085524e8d38e16", size = 11331758, upload-time = "2024-11-21T00:49:31.417Z" }, + { url = "https://files.pythonhosted.org/packages/d8/55/3821c5fd60b52a6c82a00bba18531793c93c4addfe64fbf061e235c5617a/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9158465745423b2b5d97ed25aa7740c7d38d2993ee2e5c3bfacb0c4145c49d8", size = 11950342, upload-time = "2024-11-21T00:49:34.164Z" }, + { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040, upload-time = "2024-11-21T00:49:37.271Z" }, +] + +[[package]] +name = "openai" +version = "1.55.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1e/39/d4859d897da053b61b84403f67dbef1abd075e441cb354892ff14f98e2c7/openai-1.55.3.tar.gz", hash = "sha256:547e85b94535469f137a779d8770c8c5adebd507c2cc6340ca401a7c4d5d16f0", size = 314571, upload-time = "2024-11-28T16:56:47.832Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/06/691ef3f0112ecf0d7420d0bf35b5d16cf81554141f4b4913a9831031013d/openai-1.55.3-py3-none-any.whl", hash = "sha256:2a235d0e1e312cd982f561b18c27692e253852f4e5fb6ccf08cb13540a9bdaa1", size = 389558, upload-time = "2024-11-28T16:56:46.174Z" }, +] + +[[package]] +name = "opentelemetry-api" +version = "1.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e4/ab/1be294b194af410f350f867a54621b4f33b7551adce2ae795e907148fc1e/opentelemetry_exporter_otlp_proto_grpc-1.15.0.tar.gz", hash = "sha256:844f2a4bb9bcda34e4eb6fe36765e5031aacb36dc60ed88c90fc246942ea26e7", size = 27262, upload-time = "2022-12-09T22:28:44.359Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/8f/73ad108bcfd61b4169be5ad8b76acaf9158f224740da10ab9ea3469d551a/opentelemetry_exporter_otlp_proto_grpc-1.15.0-py3-none-any.whl", hash = "sha256:c2a5492ba7d140109968135d641d06ce3c5bd73c50665f787526065d57d7fd1d", size = 20378, upload-time = "2022-12-09T22:28:14.623Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/3e/143cf5c034e58037307e6a24f06e0dd64b2c49ae60a965fc580027581931/opentelemetry_instrumentation_asgi-0.61b0.tar.gz", hash = "sha256:9d08e127244361dc33976d39dd4ca8f128b5aa5a7ae425208400a80a095019b5", size = 26691, upload-time = "2026-03-04T14:20:21.038Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/78/154470cf9d741a7487fbb5067357b87386475bbb77948a6707cae982e158/opentelemetry_instrumentation_asgi-0.61b0-py3-none-any.whl", hash = "sha256:e4b3ce6b66074e525e717efff20745434e5efd5d9df6557710856fba356da7a4", size = 16980, upload-time = "2026-03-04T14:19:10.894Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/35/aa727bb6e6ef930dcdc96a617b83748fece57b43c47d83ba8d83fbeca657/opentelemetry_instrumentation_fastapi-0.61b0.tar.gz", hash = "sha256:3a24f35b07c557ae1bbc483bf8412221f25d79a405f8b047de8b670722e2fa9f", size = 24800, upload-time = "2026-03-04T14:20:32.759Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/05/acfeb2cccd434242a0a7d0ea29afaf077e04b42b35b485d89aee4e0d9340/opentelemetry_instrumentation_fastapi-0.61b0-py3-none-any.whl", hash = "sha256:a1a844d846540d687d377516b2ff698b51d87c781b59f47c214359c4a241047c", size = 13485, upload-time = "2026-03-04T14:19:30.351Z" }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1e/80/b3b2a98039574e57b6b15982219ae025d55f8c46d50dde258865ce5601b4/opentelemetry_proto-1.15.0.tar.gz", hash = "sha256:9c4008e40ac8cab359daac283fbe7002c5c29c77ea2674ad5626a249e64e0101", size = 35713, upload-time = "2022-12-09T22:28:55.409Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/56/8343d94af8f32594f6b0bd273f72a40e430fb5970a353237af53af5d3031/opentelemetry_proto-1.15.0-py3-none-any.whl", hash = "sha256:044b6d044b4d10530f250856f933442b8753a17f94ae37c207607f733fb9a844", size = 52616, upload-time = "2022-12-09T22:28:30.03Z" }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" }, +] + +[[package]] +name = "opentelemetry-util-http" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/3c/f0196223efc5c4ca19f8fad3d5462b171ac6333013335ce540c01af419e9/opentelemetry_util_http-0.61b0.tar.gz", hash = "sha256:1039cb891334ad2731affdf034d8fb8b48c239af9b6dd295e5fabd07f1c95572", size = 11361, upload-time = "2026-03-04T14:20:57.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/e5/c08aaaf2f64288d2b6ef65741d2de5454e64af3e050f34285fb1907492fe/opentelemetry_util_http-0.61b0-py3-none-any.whl", hash = "sha256:8e715e848233e9527ea47e275659ea60a57a75edf5206a3b937e236a6da5fc33", size = 9281, upload-time = "2026-03-04T14:20:08.364Z" }, +] + [[package]] name = "ordered-set" version = "4.1.0" @@ -2408,37 +3073,57 @@ wheels = [ [[package]] name = "orjson" -version = "3.8.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/79/961d7e607d1fd52cf01c1333994fa5ba628b8da8910d436737f1537cf71a/orjson-3.8.8.tar.gz", hash = "sha256:c096d7a523bae6ffb9c4a228ba4691d66113f0f2231579dc945523fbef09c6da", size = 656977, upload-time = "2023-03-20T23:25:35.386Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/90/f000562b9240a951c3597366ad3f59f5eb6a1006f4b70891da984b6b5d07/orjson-3.8.8-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:18fcdea75d8b571dc9b185652b81397b62878ae7934fd62e6a0103a5b8448e34", size = 140519, upload-time = "2023-03-20T23:26:16.61Z" }, - { url = "https://files.pythonhosted.org/packages/ac/1a/0e549b3eafaab9319aed0a70124428fc44f60e8f76d5634fc7865ce41a2d/orjson-3.8.8-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:306618884929b596e2e083f82b5617da812df25b0c467542371f1d51f0c5a6f5", size = 490792, upload-time = "2023-03-20T23:26:18.935Z" }, - { url = "https://files.pythonhosted.org/packages/44/8b/b23be7e4a307d6155ea498e3b35c1b0191125bf3ec3f2c0e267d54ce6eb5/orjson-3.8.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edc65ddb6ae6f8fbb2bbf78ac98f75b729c9eeb0776d5508dd76d3a948dda1dd", size = 260740, upload-time = "2023-03-21T00:01:07.569Z" }, - { url = "https://files.pythonhosted.org/packages/c9/4b/7c7401ba03cabf6273ad435430f3744c1fc1dbb87914836cd01f5a9f2e26/orjson-3.8.8-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6a6d55e01bce74516dff15302627a13b1f4edcb1c3942dd660978dee423ccf2", size = 276052, upload-time = "2023-03-21T00:01:10.211Z" }, - { url = "https://files.pythonhosted.org/packages/a9/d9/e049c004808f9171b04d5b0c95663eace2f570a5327832705a842429edef/orjson-3.8.8-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28075c4b502d792fb6703e983d456b2a30d5d6f332d26092eb312dc782e64c64", size = 299058, upload-time = "2023-03-21T00:01:12.29Z" }, - { url = "https://files.pythonhosted.org/packages/a5/44/0830f50df83461388cd35c5a9460afb2a3a67c49dd07cc1b4882c969e731/orjson-3.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eda4c37e48ff549763183a1549c10eec6ea40439520b17d09359cd74a425069", size = 275270, upload-time = "2023-03-20T23:42:48.498Z" }, - { url = "https://files.pythonhosted.org/packages/a3/91/8f6823060e02b3e1e1c48d6adb2f9618aeb886c0fe58c07350c4bdb71ac1/orjson-3.8.8-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a3eac485a15493164867729f44e1e1247b3094ff19d37708e8cdc9c88a93c623", size = 143522, upload-time = "2023-03-20T23:38:33.526Z" }, - { url = "https://files.pythonhosted.org/packages/2f/27/487ee571f36b3e41926e83f8ca64f279623ac52adeb1275796fda85dad72/orjson-3.8.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:88bf40e5468444c04374d1b8f1877cebbaef6bb7406cb6b4a34a570c5cbb87bc", size = 324053, upload-time = "2023-03-20T23:48:59.186Z" }, - { url = "https://files.pythonhosted.org/packages/a4/cb/785a597e575c7c924a06cddcb66f7a96a6b8237c33cc5cafb1126471efce/orjson-3.8.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:747bd4e09d8aa61e1ff677a7dd1cffd28a5d13c22f3769123c58ec988bf1b83d", size = 318485, upload-time = "2023-03-20T23:49:01.401Z" }, - { url = "https://files.pythonhosted.org/packages/37/7e/22a5261c9d7cef248e29d7d89969869aeb8a80e1684ffae558032e31b461/orjson-3.8.8-cp310-none-win_amd64.whl", hash = "sha256:dd7d86c5f5f820ac9d4783477e86eb984b63bdb32359935609eb33cf65049c54", size = 204970, upload-time = "2023-03-20T23:31:33.753Z" }, - { url = "https://files.pythonhosted.org/packages/be/08/75f28cb36264579bca84225859aadd4fb970d24213dff7398faf4c4f7345/orjson-3.8.8-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:52293a6097750c2d434737966fe6e2a1ed489ac70cc8e584f5944af83de0b787", size = 140519, upload-time = "2023-03-20T23:27:13.897Z" }, - { url = "https://files.pythonhosted.org/packages/05/fd/63aff551b5a007b760af61cb5c9209d8bf8b40a292655d97869e6402236e/orjson-3.8.8-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:9322450f392dceb49810d2f820b1932af22d66f67f1d45c31f160067dd06359f", size = 490789, upload-time = "2023-03-20T23:27:16.795Z" }, - { url = "https://files.pythonhosted.org/packages/95/84/748b7e07e9fa52817494945146798b8f2e0443d4ad43942bcf8a5f881abd/orjson-3.8.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68d59e3ae84a9b6f14b45a89f7fde4a08a87ea5eb76bfc854b354640de8156f5", size = 260740, upload-time = "2023-03-21T00:01:14.364Z" }, - { url = "https://files.pythonhosted.org/packages/2b/c7/09a7a9a8d62ba873ec17788af215812be724401f778f93866a69c1ed5d7b/orjson-3.8.8-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:022347dad2253081eaa25366834bb8b06a5aceb0e83b39c6b0aa865759e49d69", size = 276050, upload-time = "2023-03-21T00:01:16.353Z" }, - { url = "https://files.pythonhosted.org/packages/66/1b/71e2cbf315723b03a0789563bf8ea131020cd9df7020f1062ddd5c821e84/orjson-3.8.8-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddfcc54793e266056fe1c257d0804c336bca1c5c1ee7979d674e1fc19cfb0a6a", size = 299059, upload-time = "2023-03-21T00:01:18.567Z" }, - { url = "https://files.pythonhosted.org/packages/f4/55/7043a342e2b57ccc2b1e3b277da6fa9a3a761eec315c03a01fe3be32cbea/orjson-3.8.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:449d8ed1e0e6b24e9df5a06b59fd66ea7f7293e141257069601ae8ff9fad705c", size = 275271, upload-time = "2023-03-20T23:43:01.231Z" }, - { url = "https://files.pythonhosted.org/packages/6a/e5/0ed216684216a47d810c5384e53b23a7b6a54a6498bd602e429a8a215ecd/orjson-3.8.8-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:0204bc414bc6f7a595211569840b422d96649fd8686efa1fbbcb12eed5dd9521", size = 143521, upload-time = "2023-03-20T23:38:45.43Z" }, - { url = "https://files.pythonhosted.org/packages/db/ca/45fbe4d4c0c0879e3eb5b364e1176e2e4ea7f2ebc3ef93c302b65429f9fe/orjson-3.8.8-cp311-none-win_amd64.whl", hash = "sha256:e991a5c2c5f2f299c77e1d07ef2812ff5b68e1d97a2aab01aca29cf756473aa3", size = 204970, upload-time = "2023-03-20T23:28:51.922Z" }, +version = "3.9.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3d/27/6a821fc97a2b68705cba3158e5ddb300938500a8c2b19dc084f6d43587d4/orjson-3.9.12.tar.gz", hash = "sha256:da908d23a3b3243632b523344403b128722a5f45e278a8343c2bb67538dff0e4", size = 4821075, upload-time = "2024-01-18T17:24:19.045Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/c5/6c9f6084c8b8e55b3af2e05cf0c656442dc0088135f2a9d034a1ef41895c/orjson-3.9.12-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6b4e2bed7d00753c438e83b613923afdd067564ff7ed696bfe3a7b073a236e07", size = 250591, upload-time = "2024-01-18T17:19:41.656Z" }, + { url = "https://files.pythonhosted.org/packages/65/a3/f07f1ed78002a03ce4a33f285b06b81bfca188a6652cd8f563a4f60cec47/orjson-3.9.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd1b8ec63f0bf54a50b498eedeccdca23bd7b658f81c524d18e410c203189365", size = 142528, upload-time = "2024-01-18T17:22:47.683Z" }, + { url = "https://files.pythonhosted.org/packages/64/f6/cc6564986f2d9c15c5372af0b1cbf4ee67a1eac1553c151bec6d0a8f5ce6/orjson-3.9.12-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab8add018a53665042a5ae68200f1ad14c7953fa12110d12d41166f111724656", size = 130798, upload-time = "2024-01-18T17:22:50.208Z" }, + { url = "https://files.pythonhosted.org/packages/09/37/f6896df8adb9ca831423765f732203248431380afc0d6c9786647fd275a0/orjson-3.9.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12756a108875526b76e505afe6d6ba34960ac6b8c5ec2f35faf73ef161e97e07", size = 159981, upload-time = "2024-01-18T17:22:52.785Z" }, + { url = "https://files.pythonhosted.org/packages/62/a9/fc2cc6722a4abe40aec7641002755ebef89f43ac849286c6b8117ba548a9/orjson-3.9.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:890e7519c0c70296253660455f77e3a194554a3c45e42aa193cdebc76a02d82b", size = 157028, upload-time = "2024-01-18T17:22:55.313Z" }, + { url = "https://files.pythonhosted.org/packages/f9/3a/5ba53e3c6dd62860331d7d14c236bf7e85fd52983d22419a5a431fc3ddce/orjson-3.9.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d664880d7f016efbae97c725b243b33c2cbb4851ddc77f683fd1eec4a7894146", size = 139793, upload-time = "2024-01-18T17:22:57.927Z" }, + { url = "https://files.pythonhosted.org/packages/27/a5/aa5668088a0fb02d64e5d58bddcd3adbe8559d64820343ef7bdb04959839/orjson-3.9.12-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cfdaede0fa5b500314ec7b1249c7e30e871504a57004acd116be6acdda3b8ab3", size = 315448, upload-time = "2024-01-18T17:23:00.665Z" }, + { url = "https://files.pythonhosted.org/packages/73/0a/7448623e16abeb690e8da6f4184c75deb3cba756c806279a9adc20926c13/orjson-3.9.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6492ff5953011e1ba9ed1bf086835fd574bd0a3cbe252db8e15ed72a30479081", size = 309061, upload-time = "2024-01-18T17:23:03.036Z" }, + { url = "https://files.pythonhosted.org/packages/22/84/2010e461edf593e7af9876ad1cee002834b8fa9ac3576aca74c6f76a1ca5/orjson-3.9.12-cp310-none-win32.whl", hash = "sha256:29bf08e2eadb2c480fdc2e2daae58f2f013dff5d3b506edd1e02963b9ce9f8a9", size = 140905, upload-time = "2024-01-18T17:24:02.18Z" }, + { url = "https://files.pythonhosted.org/packages/67/da/28ff5de12191a2d626b52494498da33087406d33f1c504b83cb796ce71cb/orjson-3.9.12-cp310-none-win_amd64.whl", hash = "sha256:0fc156fba60d6b50743337ba09f052d8afc8b64595112996d22f5fce01ab57da", size = 134850, upload-time = "2024-01-18T17:19:12.986Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a3/b8f435fd22245bbe301abb0c1c3a29c8f0e1065333491f98974f59bab3bd/orjson-3.9.12-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:2849f88a0a12b8d94579b67486cbd8f3a49e36a4cb3d3f0ab352c596078c730c", size = 250589, upload-time = "2024-01-18T17:21:41.686Z" }, + { url = "https://files.pythonhosted.org/packages/53/0a/711394924624222a0fe86553bd3a38654bf17608c89a70f452c9a32c56da/orjson-3.9.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3186b18754befa660b31c649a108a915493ea69b4fc33f624ed854ad3563ac65", size = 142531, upload-time = "2024-01-18T17:23:05.758Z" }, + { url = "https://files.pythonhosted.org/packages/9f/74/e1975f84c0899a237d29a04c33ee6fc7ee547465b8541bbbabc158d060d8/orjson-3.9.12-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbbf313c9fb9d4f6cf9c22ced4b6682230457741daeb3d7060c5d06c2e73884a", size = 130818, upload-time = "2024-01-18T17:23:07.847Z" }, + { url = "https://files.pythonhosted.org/packages/4d/5a/413aa3107ee4cd4a29c61fbef00534e9b135795f76cd2a1183b16edd16cd/orjson-3.9.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99e8cd005b3926c3db9b63d264bd05e1bf4451787cc79a048f27f5190a9a0311", size = 159983, upload-time = "2024-01-18T17:23:10.464Z" }, + { url = "https://files.pythonhosted.org/packages/c6/e0/d54d01dca09d219e1e8eb8cc97cc07e5e17cb6ab81eae82597ed6c4fb0e8/orjson-3.9.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59feb148392d9155f3bfed0a2a3209268e000c2c3c834fb8fe1a6af9392efcbf", size = 157029, upload-time = "2024-01-18T17:23:12.449Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f0/506623ccfc4b2ec326c0be70a1bd2162e0e8202087062f6b05075a15045e/orjson-3.9.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4ae815a172a1f073b05b9e04273e3b23e608a0858c4e76f606d2d75fcabde0c", size = 139796, upload-time = "2024-01-18T17:23:15.029Z" }, + { url = "https://files.pythonhosted.org/packages/0d/70/032ffa99b1fb806b018dab0b59a45489f07d6e69c06ddaa13dc000b8862f/orjson-3.9.12-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed398f9a9d5a1bf55b6e362ffc80ac846af2122d14a8243a1e6510a4eabcb71e", size = 315447, upload-time = "2024-01-18T17:23:17.907Z" }, + { url = "https://files.pythonhosted.org/packages/b1/9f/f418a0d3a641fb4e4fc156b1a1e32a23adb8eb77fbcdf9f61a503d7b74ae/orjson-3.9.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d3cfb76600c5a1e6be91326b8f3b83035a370e727854a96d801c1ea08b708073", size = 309068, upload-time = "2024-01-18T17:23:20.795Z" }, + { url = "https://files.pythonhosted.org/packages/0e/00/638e36d13c69a7b4d5fafbdadccf130c72080f621d67fc1020490cdcea66/orjson-3.9.12-cp311-none-win32.whl", hash = "sha256:a2b6f5252c92bcab3b742ddb3ac195c0fa74bed4319acd74f5d54d79ef4715dc", size = 140907, upload-time = "2024-01-18T17:25:05.867Z" }, + { url = "https://files.pythonhosted.org/packages/41/c5/ac4a0f8a1ae80373f4dce810f861cb2bab4aded9ceccc510d73b966fb607/orjson-3.9.12-cp311-none-win_amd64.whl", hash = "sha256:c95488e4aa1d078ff5776b58f66bd29d628fa59adcb2047f4efd3ecb2bd41a71", size = 134853, upload-time = "2024-01-18T17:19:18.966Z" }, + { url = "https://files.pythonhosted.org/packages/0c/74/be2349ccba34fdce4f38607ce7df9a3faf64d31f49a6d8289537e8442f2d/orjson-3.9.12-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d6ce2062c4af43b92b0221ed4f445632c6bf4213f8a7da5396a122931377acd9", size = 250640, upload-time = "2024-01-18T17:21:47.642Z" }, + { url = "https://files.pythonhosted.org/packages/54/7c/e49520e76a976b98b7f0e5d34a2072418eb85b2ebcbeeba855498955f919/orjson-3.9.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:950951799967558c214cd6cceb7ceceed6f81d2c3c4135ee4a2c9c69f58aa225", size = 142469, upload-time = "2024-01-18T17:23:23.172Z" }, + { url = "https://files.pythonhosted.org/packages/4b/06/016366526c0a9409195d05401d6f42cf7614c5f2e7046dcd6cc064a772b7/orjson-3.9.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2dfaf71499d6fd4153f5c86eebb68e3ec1bf95851b030a4b55c7637a37bbdee4", size = 130826, upload-time = "2024-01-18T17:23:25.941Z" }, + { url = "https://files.pythonhosted.org/packages/d7/cb/4bb410d8825ce7171579452c01844e853c80e1437b7db40d9d503a8b4160/orjson-3.9.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:659a8d7279e46c97661839035a1a218b61957316bf0202674e944ac5cfe7ed83", size = 160021, upload-time = "2024-01-18T17:23:28.036Z" }, + { url = "https://files.pythonhosted.org/packages/e5/cb/96af73bb6b06d3cd967394a0834496e2600f44db6b7622bb540e93e4e98a/orjson-3.9.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af17fa87bccad0b7f6fd8ac8f9cbc9ee656b4552783b10b97a071337616db3e4", size = 157045, upload-time = "2024-01-18T17:23:31.265Z" }, + { url = "https://files.pythonhosted.org/packages/dc/ac/7cc0c187536b5e6fc50b15d4b601d40b4b9825a1bcaf0ed19c83b12ff90e/orjson-3.9.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd52dec9eddf4c8c74392f3fd52fa137b5f2e2bed1d9ae958d879de5f7d7cded", size = 139861, upload-time = "2024-01-18T17:23:33.578Z" }, + { url = "https://files.pythonhosted.org/packages/04/91/c817ad546b8640013627161e561ae519c95d5ccf49b7eee2f994bbb7c6c3/orjson-3.9.12-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:640e2b5d8e36b970202cfd0799d11a9a4ab46cf9212332cd642101ec952df7c8", size = 315361, upload-time = "2024-01-18T17:23:35.802Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d6/1f9db09d7fcd7cf118775a34038daedb82cff14e57c7a223ae8b1725af7d/orjson-3.9.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:daa438bd8024e03bcea2c5a92cd719a663a58e223fba967296b6ab9992259dbf", size = 309184, upload-time = "2024-01-18T17:23:39.171Z" }, + { url = "https://files.pythonhosted.org/packages/71/f5/87f3728d3aff8d76e7343f9cce0ac2958bb43e1dd7222a32d9c50981d508/orjson-3.9.12-cp312-none-win_amd64.whl", hash = "sha256:1bb8f657c39ecdb924d02e809f992c9aafeb1ad70127d53fb573a6a6ab59d549", size = 134919, upload-time = "2024-01-18T17:19:45.415Z" }, +] + +[[package]] +name = "overrides" +version = "7.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812, upload-time = "2024-01-27T21:01:33.423Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832, upload-time = "2024-01-27T21:01:31.393Z" }, ] [[package]] name = "packaging" -version = "23.0" +version = "23.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/47/d5/aca8ff6f49aa5565df1c826e7bf5e85a6df852ee063600c1efa5b932968c/packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97", size = 126241, upload-time = "2023-01-08T18:18:54.74Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/2b/9b9c33ffed44ee921d0967086d653047286054117d584f1b1a7c22ceaf7b/packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", size = 146714, upload-time = "2023-10-01T13:50:05.279Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/35/a31aed2993e398f6b09a790a181a7927eb14610ee8bbf02dc14d31677f1c/packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2", size = 42678, upload-time = "2023-01-08T18:18:53.118Z" }, + { url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011, upload-time = "2023-10-01T13:50:03.745Z" }, ] [[package]] @@ -2937,6 +3622,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/4d/e616aed5bcd6556033ade80f47dae49349f74ea86d424516ea5ad94b4472/pyparsing-3.1.3-py3-none-any.whl", hash = "sha256:1e80fdf93e6c1aeaf4702523f1d48f66d52fa6459096a8f812591157270a5896", size = 104076, upload-time = "2024-08-25T05:38:36.318Z" }, ] +[[package]] +name = "pypika" +version = "0.51.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/78/cbaebba88e05e2dcda13ca203131b38d3640219f20ebb49676d26714861b/pypika-0.51.1.tar.gz", hash = "sha256:c30c7c1048fbf056fd3920c5a2b88b0c29dd190a9b2bee971fd17e4abe4d0ebe", size = 80919, upload-time = "2026-02-04T11:27:48.304Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/83/c77dfeed04022e8930b08eedca2b6e5efed256ab3321396fde90066efb65/pypika-0.51.1-py2.py3-none-any.whl", hash = "sha256:77985b4d7ce71b9905255bf12468cf598349e98837c037541cfc240e528aec46", size = 60585, upload-time = "2026-02-04T11:27:46.251Z" }, +] + +[[package]] +name = "pyproject-hooks" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228, upload-time = "2024-09-29T09:24:13.293Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216, upload-time = "2024-09-29T09:24:11.978Z" }, +] + +[[package]] +name = "pyreadline3" +version = "3.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839, upload-time = "2024-09-19T02:40:10.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, +] + [[package]] name = "pyrsistent" version = "0.19.3" @@ -3262,11 +3977,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/e5/63ca2c4edf4e00657584608bee1001302bbf8c5f569340b78304f2f446cb/rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97", size = 31976, upload-time = "2021-05-07T23:29:25.611Z" }, ] -[package.optional-dependencies] -idna2008 = [ - { name = "idna" }, -] - [[package]] name = "rich" version = "13.3.2" @@ -3576,12 +4286,37 @@ version = "2.2.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a3/f8/a6091be6a60ed4df9ac806c89fbc5fe1a3416d0284f3ba70aa09a3419428/starkbank-ecdsa-2.2.0.tar.gz", hash = "sha256:9399c3371b899d4a235b68a1ed7919d202fbf024bd2c863ae8ebdad343c2a63a", size = 14690, upload-time = "2022-10-24T18:36:05.27Z" } +[[package]] +name = "starlette" +version = "0.50.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, +] + [[package]] name = "stringcase" version = "1.2.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f3/1f/1241aa3d66e8dc1612427b17885f5fcd9c9ee3079fc0d28e9a3aeeb36fa3/stringcase-1.2.0.tar.gz", hash = "sha256:48a06980661908efe8d9d34eab2b6c13aefa2163b3ced26972902e3bdfd87008", size = 2958, upload-time = "2017-08-06T01:40:57.021Z" } +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + [[package]] name = "tabulate" version = "0.9.0" @@ -3593,11 +4328,11 @@ wheels = [ [[package]] name = "tenacity" -version = "9.0.0" +version = "8.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cd/94/91fccdb4b8110642462e653d5dcb27e7b674742ad68efd146367da7bdb10/tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b", size = 47421, upload-time = "2024-07-29T12:12:27.547Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/4d/6a19536c50b849338fcbe9290d562b52cbdcf30d8963d3588a68a4107df1/tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78", size = 47309, upload-time = "2024-07-05T07:25:31.836Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169, upload-time = "2024-07-29T12:12:25.825Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" }, ] [[package]] @@ -3609,6 +4344,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154, upload-time = "2019-08-30T21:37:03.543Z" }, ] +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, + { url = "https://files.pythonhosted.org/packages/84/04/655b79dbcc9b3ac5f1479f18e931a344af67e5b7d3b251d2dcdcd7558592/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:753d47ebd4542742ef9261d9da92cd545b2cacbb48349a1225466745bb866ec4", size = 3282301, upload-time = "2026-01-05T10:40:34.858Z" }, + { url = "https://files.pythonhosted.org/packages/46/cd/e4851401f3d8f6f45d8480262ab6a5c8cb9c4302a790a35aa14eeed6d2fd/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e10bf9113d209be7cd046d40fbabbaf3278ff6d18eb4da4c500443185dc1896c", size = 3161308, upload-time = "2026-01-05T10:40:40.737Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6e/55553992a89982cd12d4a66dddb5e02126c58677ea3931efcbe601d419db/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64d94e84f6660764e64e7e0b22baa72f6cd942279fdbb21d46abd70d179f0195", size = 3718964, upload-time = "2026-01-05T10:40:46.56Z" }, + { url = "https://files.pythonhosted.org/packages/59/8c/b1c87148aa15e099243ec9f0cf9d0e970cc2234c3257d558c25a2c5304e6/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f01a9c019878532f98927d2bacb79bbb404b43d3437455522a00a30718cdedb5", size = 3373542, upload-time = "2026-01-05T10:40:52.803Z" }, +] + [[package]] name = "toml" version = "0.10.2" @@ -3654,6 +4419,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/fa/b8b2ee2f528eedd03d98d49c76822f065f18e99f6f58c1e18fe5ed893098/tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf", size = 428429, upload-time = "2023-05-14T03:41:21.432Z" }, ] +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + [[package]] name = "traitlets" version = "5.9.0" @@ -3665,14 +4442,15 @@ wheels = [ [[package]] name = "typer" -version = "0.7.0" +version = "0.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e1/45/bcbc581f87c8d8f2a56b513eb994d07ea4546322818d95dc6a3caf2c928b/typer-0.7.0.tar.gz", hash = "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165", size = 251871, upload-time = "2022-11-05T19:43:54.903Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/49/39f10d0f75886439ab3dac889f14f8ad511982a754e382c9b6ca895b29e9/typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2", size = 273985, upload-time = "2023-05-02T05:20:57.63Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/44/56c3f48d2bb83d76f5c970aef8e2c3ebd6a832f09e3621c5395371fe6999/typer-0.7.0-py3-none-any.whl", hash = "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d", size = 38377, upload-time = "2022-11-05T19:43:53.402Z" }, + { url = "https://files.pythonhosted.org/packages/bf/0e/c68adf10adda05f28a6ed7b9f4cd7b8e07f641b44af88ba72d9c89e4de7a/typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee", size = 45861, upload-time = "2023-05-02T05:20:55.675Z" }, ] [[package]] @@ -3728,6 +4506,61 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8c/f1/7c45fe2a09133e103dcf0621831545c268cd3f7a5d58dc7e470be91b2cd0/uvicorn-0.21.1-py3-none-any.whl", hash = "sha256:e47cac98a6da10cd41e6fd036d472c6f58ede6c5dbee3dbee3ef7a100ed97742", size = 57824, upload-time = "2023-03-16T12:30:13.762Z" }, ] +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/14/ecceb239b65adaaf7fde510aa8bd534075695d1e5f8dadfa32b5723d9cfb/uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c", size = 1343335, upload-time = "2025-10-16T22:16:11.43Z" }, + { url = "https://files.pythonhosted.org/packages/ba/ae/6f6f9af7f590b319c94532b9567409ba11f4fa71af1148cab1bf48a07048/uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792", size = 742903, upload-time = "2025-10-16T22:16:12.979Z" }, + { url = "https://files.pythonhosted.org/packages/09/bd/3667151ad0702282a1f4d5d29288fce8a13c8b6858bf0978c219cd52b231/uvloop-0.22.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86", size = 3648499, upload-time = "2025-10-16T22:16:14.451Z" }, + { url = "https://files.pythonhosted.org/packages/b3/f6/21657bb3beb5f8c57ce8be3b83f653dd7933c2fd00545ed1b092d464799a/uvloop-0.22.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd", size = 3700133, upload-time = "2025-10-16T22:16:16.272Z" }, + { url = "https://files.pythonhosted.org/packages/09/e0/604f61d004ded805f24974c87ddd8374ef675644f476f01f1df90e4cdf72/uvloop-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2", size = 3512681, upload-time = "2025-10-16T22:16:18.07Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ce/8491fd370b0230deb5eac69c7aae35b3be527e25a911c0acdffb922dc1cd/uvloop-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec", size = 3615261, upload-time = "2025-10-16T22:16:19.596Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d5/69900f7883235562f1f50d8184bb7dd84a2fb61e9ec63f3782546fdbd057/uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9", size = 1352420, upload-time = "2025-10-16T22:16:21.187Z" }, + { url = "https://files.pythonhosted.org/packages/a8/73/c4e271b3bce59724e291465cc936c37758886a4868787da0278b3b56b905/uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77", size = 748677, upload-time = "2025-10-16T22:16:22.558Z" }, + { url = "https://files.pythonhosted.org/packages/86/94/9fb7fad2f824d25f8ecac0d70b94d0d48107ad5ece03769a9c543444f78a/uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21", size = 3753819, upload-time = "2025-10-16T22:16:23.903Z" }, + { url = "https://files.pythonhosted.org/packages/74/4f/256aca690709e9b008b7108bc85fba619a2bc37c6d80743d18abad16ee09/uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702", size = 3804529, upload-time = "2025-10-16T22:16:25.246Z" }, + { url = "https://files.pythonhosted.org/packages/7f/74/03c05ae4737e871923d21a76fe28b6aad57f5c03b6e6bfcfa5ad616013e4/uvloop-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733", size = 3621267, upload-time = "2025-10-16T22:16:26.819Z" }, + { url = "https://files.pythonhosted.org/packages/75/be/f8e590fe61d18b4a92070905497aec4c0e64ae1761498cad09023f3f4b3e/uvloop-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473", size = 3723105, upload-time = "2025-10-16T22:16:28.252Z" }, + { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, + { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + [[package]] name = "vine" version = "5.0.0" @@ -3751,6 +4584,109 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/0a/18755fa6aec794fd539b050beeaa905fa5c77c64356aa8bdecb62c01581a/virtualenv-20.23.0-py3-none-any.whl", hash = "sha256:6abec7670e5802a528357fdc75b26b9f57d5d92f29c5462ba0fbe45feacc685e", size = 3252838, upload-time = "2023-04-27T23:37:54.268Z" }, ] +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/1a/206e8cf2dd86fddf939165a57b4df61607a1e0add2785f170a3f616b7d9f/watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c", size = 407318, upload-time = "2025-10-14T15:04:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/b3/0f/abaf5262b9c496b5dad4ed3c0e799cbecb1f8ea512ecb6ddd46646a9fca3/watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43", size = 394478, upload-time = "2025-10-14T15:04:20.297Z" }, + { url = "https://files.pythonhosted.org/packages/b1/04/9cc0ba88697b34b755371f5ace8d3a4d9a15719c07bdc7bd13d7d8c6a341/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31", size = 449894, upload-time = "2025-10-14T15:04:21.527Z" }, + { url = "https://files.pythonhosted.org/packages/d2/9c/eda4615863cd8621e89aed4df680d8c3ec3da6a4cf1da113c17decd87c7f/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac", size = 459065, upload-time = "2025-10-14T15:04:22.795Z" }, + { url = "https://files.pythonhosted.org/packages/84/13/f28b3f340157d03cbc8197629bc109d1098764abe1e60874622a0be5c112/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d", size = 488377, upload-time = "2025-10-14T15:04:24.138Z" }, + { url = "https://files.pythonhosted.org/packages/86/93/cfa597fa9389e122488f7ffdbd6db505b3b915ca7435ecd7542e855898c2/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d", size = 595837, upload-time = "2025-10-14T15:04:25.057Z" }, + { url = "https://files.pythonhosted.org/packages/57/1e/68c1ed5652b48d89fc24d6af905d88ee4f82fa8bc491e2666004e307ded1/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863", size = 473456, upload-time = "2025-10-14T15:04:26.497Z" }, + { url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab", size = 455614, upload-time = "2025-10-14T15:04:27.539Z" }, + { url = "https://files.pythonhosted.org/packages/61/a5/3d782a666512e01eaa6541a72ebac1d3aae191ff4a31274a66b8dd85760c/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82", size = 630690, upload-time = "2025-10-14T15:04:28.495Z" }, + { url = "https://files.pythonhosted.org/packages/9b/73/bb5f38590e34687b2a9c47a244aa4dd50c56a825969c92c9c5fc7387cea1/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4", size = 622459, upload-time = "2025-10-14T15:04:29.491Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ac/c9bb0ec696e07a20bd58af5399aeadaef195fb2c73d26baf55180fe4a942/watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844", size = 272663, upload-time = "2025-10-14T15:04:30.435Z" }, + { url = "https://files.pythonhosted.org/packages/11/a0/a60c5a7c2ec59fa062d9a9c61d02e3b6abd94d32aac2d8344c4bdd033326/watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e", size = 287453, upload-time = "2025-10-14T15:04:31.53Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529, upload-time = "2025-10-14T15:04:32.899Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384, upload-time = "2025-10-14T15:04:33.761Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789, upload-time = "2025-10-14T15:04:34.679Z" }, + { url = "https://files.pythonhosted.org/packages/4a/24/33e71113b320030011c8e4316ccca04194bf0cbbaeee207f00cbc7d6b9f5/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b", size = 460521, upload-time = "2025-10-14T15:04:35.963Z" }, + { url = "https://files.pythonhosted.org/packages/f4/c3/3c9a55f255aa57b91579ae9e98c88704955fa9dac3e5614fb378291155df/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14", size = 488722, upload-time = "2025-10-14T15:04:37.091Z" }, + { url = "https://files.pythonhosted.org/packages/49/36/506447b73eb46c120169dc1717fe2eff07c234bb3232a7200b5f5bd816e9/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d", size = 596088, upload-time = "2025-10-14T15:04:38.39Z" }, + { url = "https://files.pythonhosted.org/packages/82/ab/5f39e752a9838ec4d52e9b87c1e80f1ee3ccdbe92e183c15b6577ab9de16/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff", size = 472923, upload-time = "2025-10-14T15:04:39.666Z" }, + { url = "https://files.pythonhosted.org/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606", size = 456080, upload-time = "2025-10-14T15:04:40.643Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c3/d5932fd62bde1a30c36e10c409dc5d54506726f08cb3e1d8d0ba5e2bc8db/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701", size = 629432, upload-time = "2025-10-14T15:04:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/f7/77/16bddd9779fafb795f1a94319dc965209c5641db5bf1edbbccace6d1b3c0/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10", size = 623046, upload-time = "2025-10-14T15:04:42.718Z" }, + { url = "https://files.pythonhosted.org/packages/46/ef/f2ecb9a0f342b4bfad13a2787155c6ee7ce792140eac63a34676a2feeef2/watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849", size = 271473, upload-time = "2025-10-14T15:04:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/94/bc/f42d71125f19731ea435c3948cad148d31a64fccde3867e5ba4edee901f9/watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4", size = 287598, upload-time = "2025-10-14T15:04:44.516Z" }, + { url = "https://files.pythonhosted.org/packages/57/c9/a30f897351f95bbbfb6abcadafbaca711ce1162f4db95fc908c98a9165f3/watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e", size = 277210, upload-time = "2025-10-14T15:04:45.883Z" }, + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, + { url = "https://files.pythonhosted.org/packages/ba/4c/a888c91e2e326872fa4705095d64acd8aa2fb9c1f7b9bd0588f33850516c/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3", size = 409611, upload-time = "2025-10-14T15:06:05.809Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c7/5420d1943c8e3ce1a21c0a9330bcf7edafb6aa65d26b21dbb3267c9e8112/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2", size = 396889, upload-time = "2025-10-14T15:06:07.035Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e5/0072cef3804ce8d3aaddbfe7788aadff6b3d3f98a286fdbee9fd74ca59a7/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d", size = 451616, upload-time = "2025-10-14T15:06:08.072Z" }, + { url = "https://files.pythonhosted.org/packages/83/4e/b87b71cbdfad81ad7e83358b3e447fedd281b880a03d64a760fe0a11fc2e/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b", size = 458413, upload-time = "2025-10-14T15:06:09.209Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250, upload-time = "2025-10-14T15:06:10.264Z" }, + { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117, upload-time = "2025-10-14T15:06:11.28Z" }, + { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493, upload-time = "2025-10-14T15:06:12.321Z" }, + { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, +] + [[package]] name = "wcwidth" version = "0.2.6" From 5a7f3958c33d1d108202706a1e653154cf247a82 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:29:38 +0530 Subject: [PATCH 03/49] feat(ai-chat): add dashboard chat and ai context schema foundation --- .../0152_orgdbt_docs_generated_at_and_more.py | 101 +++++++++ ddpui/models/__init__.py | 6 + ddpui/models/dashboard_chat.py | 100 +++++++++ ddpui/models/org.py | 2 + ddpui/models/org_preferences.py | 9 + .../models/test_dashboard_chat_models.py | 208 ++++++++++++++++++ 6 files changed, 426 insertions(+) create mode 100644 ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py create mode 100644 ddpui/models/dashboard_chat.py create mode 100644 ddpui/tests/models/test_dashboard_chat_models.py diff --git a/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py b/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py new file mode 100644 index 000000000..fa153eaa6 --- /dev/null +++ b/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py @@ -0,0 +1,101 @@ +# Generated by Django 4.2 on 2026-03-19 20:58 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('ddpui', '0151_alter_org_queue_config'), + ] + + operations = [ + migrations.AddField( + model_name='orgdbt', + name='docs_generated_at', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name='orgdbt', + name='vector_last_ingested_at', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name='orgpreferences', + name='ai_data_sharing_consented_at', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name='orgpreferences', + name='ai_data_sharing_consented_by', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='ai_data_sharing_consents', to='ddpui.orguser'), + ), + migrations.AddField( + model_name='orgpreferences', + name='ai_data_sharing_enabled', + field=models.BooleanField(default=False), + ), + migrations.CreateModel( + name='OrgAIContext', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('markdown', models.TextField(blank=True, default='')), + ('updated_at', models.DateTimeField(blank=True, null=True)), + ('org', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='ai_context', to='ddpui.org')), + ('updated_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='org_ai_context_updates', to='ddpui.orguser')), + ], + ), + migrations.CreateModel( + name='DashboardChatSession', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(auto_created=True, default=django.utils.timezone.now)), + ('session_id', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('dashboard', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='ddpui.dashboard')), + ('org', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ddpui.org')), + ('orguser', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='ddpui.orguser')), + ], + options={ + 'db_table': 'dashboard_chat_session', + 'ordering': ['-updated_at'], + }, + ), + migrations.CreateModel( + name='DashboardChatMessage', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(auto_created=True, default=django.utils.timezone.now)), + ('sequence_number', models.PositiveIntegerField()), + ('role', models.CharField(choices=[('user', 'USER'), ('assistant', 'ASSISTANT')], max_length=20)), + ('content', models.TextField(blank=True, default='')), + ('client_message_id', models.CharField(blank=True, max_length=100, null=True)), + ('payload', models.JSONField(blank=True, null=True)), + ('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='messages', to='ddpui.dashboardchatsession')), + ], + options={ + 'ordering': ['sequence_number'], + }, + ), + migrations.CreateModel( + name='DashboardAIContext', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('markdown', models.TextField(blank=True, default='')), + ('updated_at', models.DateTimeField(blank=True, null=True)), + ('dashboard', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='ai_context', to='ddpui.dashboard')), + ('updated_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='dashboard_ai_context_updates', to='ddpui.orguser')), + ], + ), + migrations.AddIndex( + model_name='dashboardchatsession', + index=models.Index(fields=['org', 'dashboard', 'created_at'], name='dchat_sess_org_dash_idx'), + ), + migrations.AddConstraint( + model_name='dashboardchatmessage', + constraint=models.UniqueConstraint(fields=('session', 'sequence_number'), name='dchat_message_session_seq_unique'), + ), + ] diff --git a/ddpui/models/__init__.py b/ddpui/models/__init__.py index e8a95f5d0..e22781973 100644 --- a/ddpui/models/__init__.py +++ b/ddpui/models/__init__.py @@ -6,6 +6,12 @@ from ddpui.models.org_wren import OrgWren from ddpui.models.visualization import Chart from ddpui.models.dashboard import Dashboard, DashboardFilter, DashboardLock +from ddpui.models.dashboard_chat import ( + OrgAIContext, + DashboardAIContext, + DashboardChatSession, + DashboardChatMessage, +) from ddpui.models.admin_user import AdminUser from ddpui.models.georegion import GeoRegion from ddpui.models.geojson import GeoJSON diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py new file mode 100644 index 000000000..f6f1561d1 --- /dev/null +++ b/ddpui/models/dashboard_chat.py @@ -0,0 +1,100 @@ +import uuid +from enum import Enum + +from django.db import models +from django.utils import timezone + +from ddpui.models.dashboard import Dashboard +from ddpui.models.org import Org +from ddpui.models.org_user import OrgUser + + +class DashboardChatMessageRole(str, Enum): + """Supported chat message roles for dashboard chat.""" + + USER = "user" + ASSISTANT = "assistant" + + @classmethod + def choices(cls): + return [(key.value, key.name) for key in cls] + + +class OrgAIContext(models.Model): + """Organization-level markdown context used by dashboard chat.""" + + org = models.OneToOneField(Org, on_delete=models.CASCADE, related_name="ai_context") + markdown = models.TextField(blank=True, default="") + updated_by = models.ForeignKey( + OrgUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="org_ai_context_updates", + ) + updated_at = models.DateTimeField(null=True, blank=True) + + +class DashboardAIContext(models.Model): + """Dashboard-level markdown context used by dashboard chat.""" + + dashboard = models.OneToOneField( + Dashboard, + on_delete=models.CASCADE, + related_name="ai_context", + ) + markdown = models.TextField(blank=True, default="") + updated_by = models.ForeignKey( + OrgUser, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="dashboard_ai_context_updates", + ) + updated_at = models.DateTimeField(null=True, blank=True) + + +class DashboardChatSession(models.Model): + """Groups dashboard chat messages under one org/dashboard conversation.""" + + session_id = models.UUIDField(editable=False, unique=True, default=uuid.uuid4) + org = models.ForeignKey(Org, on_delete=models.CASCADE) + orguser = models.ForeignKey(OrgUser, null=True, on_delete=models.SET_NULL) + dashboard = models.ForeignKey(Dashboard, on_delete=models.SET_NULL, null=True) + created_at = models.DateTimeField(auto_created=True, default=timezone.now) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = "dashboard_chat_session" + ordering = ["-updated_at"] + indexes = [ + models.Index( + fields=["org", "dashboard", "created_at"], + name="dchat_sess_org_dash_idx", + ), + ] + + +class DashboardChatMessage(models.Model): + """One user or assistant message within a dashboard chat session.""" + + session = models.ForeignKey( + DashboardChatSession, + on_delete=models.CASCADE, + related_name="messages", + ) + sequence_number = models.PositiveIntegerField() + role = models.CharField(max_length=20, choices=DashboardChatMessageRole.choices()) + content = models.TextField(blank=True, default="") + client_message_id = models.CharField(max_length=100, null=True, blank=True) + payload = models.JSONField(null=True, blank=True) + created_at = models.DateTimeField(auto_created=True, default=timezone.now) + + class Meta: + ordering = ["sequence_number"] + constraints = [ + models.UniqueConstraint( + fields=["session", "sequence_number"], + name="dchat_message_session_seq_unique", + ), + ] diff --git a/ddpui/models/org.py b/ddpui/models/org.py index 2770fefb4..56f0fea44 100644 --- a/ddpui/models/org.py +++ b/ddpui/models/org.py @@ -82,6 +82,8 @@ class OrgDbt(models.Model): null=True, related_name="dbtcloud_creds_block", ) + docs_generated_at = models.DateTimeField(null=True, blank=True) + vector_last_ingested_at = models.DateTimeField(null=True, blank=True) created_at = models.DateTimeField(auto_created=True, default=timezone.now) updated_at = models.DateTimeField(auto_now=True) diff --git a/ddpui/models/org_preferences.py b/ddpui/models/org_preferences.py index 826497248..c8adcf70d 100644 --- a/ddpui/models/org_preferences.py +++ b/ddpui/models/org_preferences.py @@ -17,6 +17,15 @@ class OrgPreferences(models.Model): enable_llm_requested_by = models.ForeignKey( OrgUser, on_delete=models.CASCADE, related_name="llm_request", null=True, blank=True ) + ai_data_sharing_enabled = models.BooleanField(default=False) + ai_data_sharing_consented_by = models.ForeignKey( + OrgUser, + on_delete=models.SET_NULL, + related_name="ai_data_sharing_consents", + null=True, + blank=True, + ) + ai_data_sharing_consented_at = models.DateTimeField(null=True, blank=True) enable_discord_notifications = models.BooleanField(default=False) discord_webhook = models.URLField(blank=True, null=True) created_at = models.DateTimeField(default=timezone.now) diff --git a/ddpui/tests/models/test_dashboard_chat_models.py b/ddpui/tests/models/test_dashboard_chat_models.py new file mode 100644 index 000000000..b0f5d282a --- /dev/null +++ b/ddpui/tests/models/test_dashboard_chat_models.py @@ -0,0 +1,208 @@ +import os +from datetime import timedelta + +import django +import pytest + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from django.contrib.auth.models import User +from django.db import IntegrityError +from django.db import transaction +from django.utils import timezone + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import ( + DashboardAIContext, + DashboardChatMessage, + DashboardChatSession, + OrgAIContext, +) +from ddpui.models.org import Org, OrgDbt +from ddpui.models.org_preferences import OrgPreferences +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.tests.api_tests.test_user_org_api import seed_db + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def authuser(): + user = User.objects.create( + username="dashchatmodeluser", + email="dashchatmodeluser@test.com", + password="testpassword", + ) + yield user + user.delete() + + +@pytest.fixture +def org_dbt(): + dbt = OrgDbt.objects.create( + gitrepo_url="https://github.com/example/dbt.git", + project_dir="/tmp/dbt", + target_type="bigquery", + default_schema="analytics", + ) + yield dbt + dbt.delete() + + +@pytest.fixture +def org(org_dbt): + org = Org.objects.create( + name="Dashboard Chat Org", + slug="dash-chat-org", + airbyte_workspace_id="workspace-id", + dbt=org_dbt, + ) + yield org + org.delete() + + +@pytest.fixture +def orguser(authuser, org, seed_db): + orguser = OrgUser.objects.create( + user=authuser, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield orguser + orguser.delete() + + +@pytest.fixture +def dashboard(org, orguser): + dashboard = Dashboard.objects.create( + title="Chat Dashboard", + dashboard_type="native", + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield dashboard + dashboard.delete() + + +def test_org_preferences_ai_consent_defaults(org): + preferences = OrgPreferences.objects.create(org=org) + + assert preferences.ai_data_sharing_enabled is False + assert preferences.ai_data_sharing_consented_by is None + assert preferences.ai_data_sharing_consented_at is None + + +def test_org_preferences_ai_consent_persists(org, orguser): + consented_at = timezone.now() + preferences = OrgPreferences.objects.create( + org=org, + ai_data_sharing_enabled=True, + ai_data_sharing_consented_by=orguser, + ai_data_sharing_consented_at=consented_at, + ) + + assert preferences.ai_data_sharing_enabled is True + assert preferences.ai_data_sharing_consented_by == orguser + assert preferences.ai_data_sharing_consented_at == consented_at + + +def test_org_dbt_ai_freshness_fields(org_dbt): + generated_at = timezone.now() + ingested_at = generated_at + timedelta(minutes=5) + + org_dbt.docs_generated_at = generated_at + org_dbt.vector_last_ingested_at = ingested_at + org_dbt.save() + + org_dbt.refresh_from_db() + assert org_dbt.docs_generated_at == generated_at + assert org_dbt.vector_last_ingested_at == ingested_at + + +def test_org_ai_context_one_to_one(org, orguser): + context = OrgAIContext.objects.create( + org=org, + markdown="## Org context", + updated_by=orguser, + updated_at=timezone.now(), + ) + + assert context.org == org + assert context.markdown == "## Org context" + assert org.ai_context == context + + +def test_dashboard_ai_context_one_to_one(dashboard, orguser): + context = DashboardAIContext.objects.create( + dashboard=dashboard, + markdown="## Dashboard context", + updated_by=orguser, + updated_at=timezone.now(), + ) + + assert context.dashboard == dashboard + assert context.markdown == "## Dashboard context" + assert dashboard.ai_context == context + + +def test_dashboard_chat_session_defaults(org, orguser, dashboard): + session = DashboardChatSession.objects.create( + org=org, + orguser=orguser, + dashboard=dashboard, + ) + + assert session.session_id is not None + assert session.org == org + assert session.orguser == orguser + assert session.dashboard == dashboard + + +def test_dashboard_chat_message_sequence_uniqueness(org, orguser, dashboard): + session = DashboardChatSession.objects.create( + org=org, + orguser=orguser, + dashboard=dashboard, + ) + DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="What changed this quarter?", + ) + + with pytest.raises(IntegrityError): + with transaction.atomic(): + DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="assistant", + content="Here is the answer.", + ) + + +def test_dashboard_chat_message_payload(org, orguser, dashboard): + session = DashboardChatSession.objects.create( + org=org, + orguser=orguser, + dashboard=dashboard, + ) + message = DashboardChatMessage.objects.create( + session=session, + sequence_number=2, + role="assistant", + content="Funding dropped because Q4 grants closed early.", + payload={ + "citations": [{"source_type": "dashboard_export", "title": "Funding by quarter"}], + "warnings": [], + "sql": "SELECT quarter, SUM(amount) FROM funding GROUP BY 1", + }, + ) + + assert message.payload["citations"][0]["source_type"] == "dashboard_export" + assert message.payload["sql"].startswith("SELECT") From a90f6cb3e807b1d3217843a30398b4de1538f256 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:41:02 +0530 Subject: [PATCH 04/49] feat(ai-chat): add consent status and context management APIs --- ddpui/api/dashboard_native_api.py | 67 +++++ ddpui/api/org_preferences_api.py | 129 +++++++++ ddpui/schemas/dashboard_schema.py | 17 ++ ddpui/schemas/org_preferences_schema.py | 33 +++ .../test_dashboard_chat_settings_api.py | 262 ++++++++++++++++++ 5 files changed, 508 insertions(+) create mode 100644 ddpui/tests/api_tests/test_dashboard_chat_settings_api.py diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index acaad3f8a..8d01610e8 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -16,6 +16,7 @@ DashboardLock, DashboardFilterType, ) +from ddpui.models.dashboard_chat import DashboardAIContext from ddpui.models.org_user import OrgUser from ddpui.auth import has_permission from ddpui.utils.custom_logger import CustomLogger @@ -47,6 +48,8 @@ DashboardShareStatus, LandingPageResponse, LandingPageResolveResponse, + DashboardAIContextResponse, + UpdateDashboardAIContextSchema, ) logger = CustomLogger("ddpui") @@ -54,6 +57,23 @@ dashboard_native_router = Router() +def _get_or_create_dashboard_ai_context(dashboard: Dashboard): + context, _ = DashboardAIContext.objects.get_or_create(dashboard=dashboard) + return context + + +def _serialize_dashboard_ai_context(dashboard: Dashboard, context: DashboardAIContext): + org_dbt = dashboard.org.dbt + return DashboardAIContextResponse( + dashboard_id=dashboard.id, + dashboard_title=dashboard.title, + dashboard_context_markdown=context.markdown, + dashboard_context_updated_by=context.updated_by.user.email if context.updated_by else None, + dashboard_context_updated_at=context.updated_at, + vector_last_ingested_at=org_dbt.vector_last_ingested_at if org_dbt else None, + ) + + # Endpoints @dashboard_native_router.get("/", response=List[DashboardResponse]) @has_permission(["can_view_dashboards"]) @@ -102,6 +122,53 @@ def export_dashboard(request, dashboard_id: int): raise HttpError(404, "Dashboard not found") from err +@dashboard_native_router.get( + "/{dashboard_id}/ai-context/", + response=DashboardAIContextResponse, +) +@has_permission(["can_manage_org_settings"]) +def get_dashboard_ai_context(request, dashboard_id: int): + """Load dashboard-level AI context settings for settings management.""" + orguser: OrgUser = request.orguser + + try: + dashboard = DashboardService.get_dashboard(dashboard_id, orguser.org) + except DashboardNotFoundError as err: + raise HttpError(404, "Dashboard not found") from err + + context = _get_or_create_dashboard_ai_context(dashboard) + + return _serialize_dashboard_ai_context(dashboard, context) + + +@dashboard_native_router.put( + "/{dashboard_id}/ai-context/", + response=DashboardAIContextResponse, +) +@has_permission(["can_manage_org_settings"]) +@transaction.atomic +def update_dashboard_ai_context( + request, + dashboard_id: int, + payload: UpdateDashboardAIContextSchema, +): + """Update dashboard-level AI context markdown for settings management.""" + orguser: OrgUser = request.orguser + + try: + dashboard = DashboardService.get_dashboard(dashboard_id, orguser.org) + except DashboardNotFoundError as err: + raise HttpError(404, "Dashboard not found") from err + + context = _get_or_create_dashboard_ai_context(dashboard) + context.markdown = payload.dashboard_context_markdown + context.updated_by = orguser + context.updated_at = timezone.now() + context.save() + + return _serialize_dashboard_ai_context(dashboard, context) + + @dashboard_native_router.post("/", response=DashboardResponse) @has_permission(["can_create_dashboards"]) def create_dashboard(request, payload: DashboardCreate): diff --git a/ddpui/api/org_preferences_api.py b/ddpui/api/org_preferences_api.py index 898e7142d..dee146b20 100644 --- a/ddpui/api/org_preferences_api.py +++ b/ddpui/api/org_preferences_api.py @@ -8,10 +8,14 @@ from ddpui.models.org_supersets import OrgSupersets from ddpui.models.org_plans import OrgPlans from ddpui.models.userpreferences import UserPreferences +from ddpui.models.dashboard_chat import OrgAIContext from ddpui.schemas.org_preferences_schema import ( CreateOrgPreferencesSchema, UpdateLLMOptinSchema, UpdateDiscordNotificationsSchema, + OrgAIDashboardChatSettingsResponse, + UpdateOrgAIDashboardChatSchema, + OrgAIDashboardChatStatusResponse, ) from ddpui.core.notifications.notifications_functions import create_notification from ddpui.schemas.notifications_api_schemas import NotificationDataSchema @@ -25,10 +29,73 @@ ) from ddpui.utils.awsses import send_text_message from ddpui.utils.redis_client import RedisClient +from ddpui.utils.feature_flags import get_all_feature_flags_for_org orgpreference_router = Router() +def _get_or_create_org_preferences(org): + org_preferences = OrgPreferences.objects.filter(org=org).first() + if org_preferences is None: + org_preferences = OrgPreferences.objects.create(org=org) + return org_preferences + + +def _get_or_create_org_ai_context(org): + context, _ = OrgAIContext.objects.get_or_create(org=org) + return context + + +def _is_dashboard_chat_feature_enabled(org) -> bool: + return get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) + + +def _is_dbt_configured(org) -> bool: + return org.dbt is not None + + +def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): + org_dbt = org.dbt + return OrgAIDashboardChatSettingsResponse( + feature_flag_enabled=_is_dashboard_chat_feature_enabled(org), + ai_data_sharing_enabled=bool(org_preferences.ai_data_sharing_enabled), + ai_data_sharing_consented_by=( + org_preferences.ai_data_sharing_consented_by.user.email + if org_preferences.ai_data_sharing_consented_by + else None + ), + ai_data_sharing_consented_at=org_preferences.ai_data_sharing_consented_at, + org_context_markdown=org_context.markdown, + org_context_updated_by=org_context.updated_by.user.email if org_context.updated_by else None, + org_context_updated_at=org_context.updated_at, + dbt_configured=_is_dbt_configured(org), + docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, + vector_last_ingested_at=org_dbt.vector_last_ingested_at if org_dbt else None, + ) + + +def _serialize_ai_dashboard_chat_status(org, org_preferences): + org_dbt = org.dbt + feature_flag_enabled = _is_dashboard_chat_feature_enabled(org) + ai_data_sharing_enabled = bool(org_preferences.ai_data_sharing_enabled) + dbt_configured = _is_dbt_configured(org) + vector_last_ingested_at = org_dbt.vector_last_ingested_at if org_dbt else None + + return OrgAIDashboardChatStatusResponse( + feature_flag_enabled=feature_flag_enabled, + ai_data_sharing_enabled=ai_data_sharing_enabled, + chat_available=( + feature_flag_enabled + and ai_data_sharing_enabled + and dbt_configured + and vector_last_ingested_at is not None + ), + dbt_configured=dbt_configured, + docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, + vector_last_ingested_at=vector_last_ingested_at, + ) + + @orgpreference_router.post("/") def create_org_preferences(request, payload: CreateOrgPreferencesSchema): """Creates preferences for an organization""" @@ -139,6 +206,68 @@ def get_org_preferences(request): return {"success": True, "res": org_preferences.to_json()} +@orgpreference_router.get("/ai-dashboard-chat") +@has_permission(["can_manage_org_settings"]) +def get_ai_dashboard_chat_settings(request): + """Load org-level dashboard chat settings and org AI context.""" + orguser: OrgUser = request.orguser + org = orguser.org + + org_preferences = _get_or_create_org_preferences(org) + org_context = _get_or_create_org_ai_context(org) + + return { + "success": True, + "res": _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context).dict(), + } + + +@orgpreference_router.put("/ai-dashboard-chat") +@has_permission(["can_manage_org_settings"]) +@transaction.atomic +def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChatSchema): + """Update org-level dashboard chat consent and org AI context.""" + orguser: OrgUser = request.orguser + org = orguser.org + + org_preferences = _get_or_create_org_preferences(org) + org_context = _get_or_create_org_ai_context(org) + + if ( + payload.ai_data_sharing_enabled is True + and org_preferences.ai_data_sharing_enabled is False + ): + org_preferences.ai_data_sharing_consented_by = orguser + org_preferences.ai_data_sharing_consented_at = timezone.now() + + if payload.ai_data_sharing_enabled is not None: + org_preferences.ai_data_sharing_enabled = payload.ai_data_sharing_enabled + + if payload.org_context_markdown is not None: + org_context.markdown = payload.org_context_markdown + org_context.updated_by = orguser + org_context.updated_at = timezone.now() + org_context.save() + + org_preferences.updated_at = timezone.now() + org_preferences.save() + + return { + "success": True, + "res": _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context).dict(), + } + +@orgpreference_router.get("/ai-dashboard-chat/status") +def get_ai_dashboard_chat_status(request): + """Return feature readiness for dashboard chat for the current org.""" + orguser: OrgUser = request.orguser + org = orguser.org + + org_preferences = _get_or_create_org_preferences(org) + + return {"success": True, "res": _serialize_ai_dashboard_chat_status(org, org_preferences).dict()} + + @orgpreference_router.get("/toolinfo") def get_tools_versions(request): """get versions of the tools used in the system""" diff --git a/ddpui/schemas/dashboard_schema.py b/ddpui/schemas/dashboard_schema.py index 230d80292..ae61fa9bf 100644 --- a/ddpui/schemas/dashboard_schema.py +++ b/ddpui/schemas/dashboard_schema.py @@ -193,3 +193,20 @@ class DashboardExportResponse(Schema): dashboard: DashboardResponse charts: List[dict] + + +class DashboardAIContextResponse(Schema): + """Response schema for dashboard-level AI context settings.""" + + dashboard_id: int + dashboard_title: str + dashboard_context_markdown: str + dashboard_context_updated_by: Optional[str] + dashboard_context_updated_at: Optional[datetime] + vector_last_ingested_at: Optional[datetime] + + +class UpdateDashboardAIContextSchema(Schema): + """Request schema for dashboard-level AI context updates.""" + + dashboard_context_markdown: str diff --git a/ddpui/schemas/org_preferences_schema.py b/ddpui/schemas/org_preferences_schema.py index c28b52bcf..7f08b80da 100644 --- a/ddpui/schemas/org_preferences_schema.py +++ b/ddpui/schemas/org_preferences_schema.py @@ -41,6 +41,39 @@ class UpdateDiscordNotificationsSchema(Schema): discord_webhook: Optional[str] +class OrgAIDashboardChatSettingsResponse(Schema): + """Response schema for org-level dashboard chat settings.""" + + feature_flag_enabled: bool + ai_data_sharing_enabled: bool + ai_data_sharing_consented_by: Optional[str] + ai_data_sharing_consented_at: Optional[datetime] + org_context_markdown: str + org_context_updated_by: Optional[str] + org_context_updated_at: Optional[datetime] + dbt_configured: bool + docs_generated_at: Optional[datetime] + vector_last_ingested_at: Optional[datetime] + + +class UpdateOrgAIDashboardChatSchema(Schema): + """Request schema for org-level dashboard chat settings updates.""" + + ai_data_sharing_enabled: Optional[bool] = None + org_context_markdown: Optional[str] = None + + +class OrgAIDashboardChatStatusResponse(Schema): + """Response schema for dashboard chat readiness.""" + + feature_flag_enabled: bool + ai_data_sharing_enabled: bool + chat_available: bool + dbt_configured: bool + docs_generated_at: Optional[datetime] + vector_last_ingested_at: Optional[datetime] + + class CreateOrgSupersetDetailsSchema(Schema): """Schema for creating organization superset details.""" diff --git a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py new file mode 100644 index 000000000..25f9846eb --- /dev/null +++ b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py @@ -0,0 +1,262 @@ +import os +import django + +import pytest +from ninja.errors import HttpError +from django.contrib.auth.models import User +from django.utils import timezone + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from ddpui.api.org_preferences_api import ( + get_ai_dashboard_chat_settings, + update_ai_dashboard_chat_settings, + get_ai_dashboard_chat_status, +) +from ddpui.api.dashboard_native_api import ( + get_dashboard_ai_context, + update_dashboard_ai_context, +) +from ddpui.auth import ACCOUNT_MANAGER_ROLE, GUEST_ROLE +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext +from ddpui.models.org import Org, OrgDbt +from ddpui.models.org_preferences import OrgPreferences +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.schemas.dashboard_schema import UpdateDashboardAIContextSchema +from ddpui.schemas.org_preferences_schema import UpdateOrgAIDashboardChatSchema +from ddpui.tests.api_tests.test_user_org_api import seed_db, mock_request +from ddpui.utils.feature_flags import enable_feature_flag + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def authuser(): + user = User.objects.create( + username="chatsettingsuser", + email="chatsettingsuser@test.com", + password="testpassword", + ) + yield user + user.delete() + + +@pytest.fixture +def org(): + org = Org.objects.create( + name="Dashboard Chat Settings Org", + slug="chat-settings-org", + airbyte_workspace_id="workspace-id", + ) + yield org + org.delete() + + +@pytest.fixture +def orguser(authuser, org): + orguser = OrgUser.objects.create( + user=authuser, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield orguser + orguser.delete() + + +@pytest.fixture +def guest_orguser(org): + guest_user = User.objects.create( + username="chatsettingsguest", + email="chatsettingsguest@test.com", + password="testpassword", + ) + orguser = OrgUser.objects.create( + user=guest_user, + org=org, + new_role=Role.objects.filter(slug=GUEST_ROLE).first(), + ) + yield orguser + orguser.delete() + guest_user.delete() + + +@pytest.fixture +def other_org_dashboard(): + other_org = Org.objects.create( + name="Other Dashboard Chat Org", + slug="other-chat-org", + airbyte_workspace_id="other-workspace-id", + ) + other_user = User.objects.create( + username="otherchatsettingsuser", + email="otherchatsettingsuser@test.com", + password="testpassword", + ) + other_orguser = OrgUser.objects.create( + user=other_user, + org=other_org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + dashboard = Dashboard.objects.create( + title="Other Org Dashboard", + description="Should not be accessible", + created_by=other_orguser, + org=other_org, + ) + yield dashboard + dashboard.delete() + other_orguser.delete() + other_user.delete() + other_org.delete() + + +@pytest.fixture +def dashboard(orguser, org): + dashboard = Dashboard.objects.create( + title="Donor Report", + description="Dashboard description", + created_by=orguser, + org=org, + ) + yield dashboard + dashboard.delete() + + +def test_get_ai_dashboard_chat_settings_returns_enveloped_response(orguser, seed_db): + request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) + + response = get_ai_dashboard_chat_settings(request) + + assert response["success"] is True + assert response["res"]["feature_flag_enabled"] is True + assert response["res"]["ai_data_sharing_enabled"] is False + assert response["res"]["org_context_markdown"] == "" + assert response["res"]["dbt_configured"] is False + + preferences = OrgPreferences.objects.get(org=orguser.org) + context = OrgAIContext.objects.get(org=orguser.org) + assert preferences.ai_data_sharing_enabled is False + assert context.markdown == "" + + +def test_update_ai_dashboard_chat_settings_stamps_consent_and_context(orguser, seed_db): + request = mock_request(orguser) + payload = UpdateOrgAIDashboardChatSchema( + ai_data_sharing_enabled=True, + org_context_markdown="## Org context", + ) + + response = update_ai_dashboard_chat_settings(request, payload) + + assert response["success"] is True + assert response["res"]["ai_data_sharing_enabled"] is True + assert response["res"]["ai_data_sharing_consented_by"] == orguser.user.email + assert response["res"]["org_context_markdown"] == "## Org context" + assert response["res"]["org_context_updated_by"] == orguser.user.email + + preferences = OrgPreferences.objects.get(org=orguser.org) + context = OrgAIContext.objects.get(org=orguser.org) + assert preferences.ai_data_sharing_enabled is True + assert preferences.ai_data_sharing_consented_by == orguser + assert preferences.ai_data_sharing_consented_at is not None + assert context.markdown == "## Org context" + assert context.updated_by == orguser + assert context.updated_at is not None + + +def test_get_ai_dashboard_chat_status_reports_chat_available(orguser, seed_db): + request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) + + generated_at = timezone.now() + ingested_at = timezone.now() + org_dbt = OrgDbt.objects.create( + project_dir="dbt/project", + target_type="postgres", + default_schema="analytics", + docs_generated_at=generated_at, + vector_last_ingested_at=ingested_at, + ) + org = orguser.org + org.dbt = org_dbt + org.save(update_fields=["dbt"]) + + OrgPreferences.objects.create( + org=org, + ai_data_sharing_enabled=True, + ai_data_sharing_consented_by=orguser, + ai_data_sharing_consented_at=timezone.now(), + ) + + response = get_ai_dashboard_chat_status(request) + + assert response["success"] is True + assert response["res"]["feature_flag_enabled"] is True + assert response["res"]["ai_data_sharing_enabled"] is True + assert response["res"]["dbt_configured"] is True + assert response["res"]["chat_available"] is True + assert response["res"]["docs_generated_at"] == generated_at + assert response["res"]["vector_last_ingested_at"] == ingested_at + + +def test_get_ai_dashboard_chat_settings_requires_permission(guest_orguser, seed_db): + request = mock_request(guest_orguser) + + with pytest.raises(HttpError) as excinfo: + get_ai_dashboard_chat_settings(request) + + assert excinfo.value.status_code == 404 + assert str(excinfo.value) == "unauthorized" + + +def test_get_dashboard_ai_context_returns_direct_payload(orguser, dashboard, seed_db): + request = mock_request(orguser) + + response = get_dashboard_ai_context(request, dashboard.id) + + assert response.dashboard_id == dashboard.id + assert response.dashboard_title == dashboard.title + assert response.dashboard_context_markdown == "" + assert response.dashboard_context_updated_by is None + assert response.vector_last_ingested_at is None + + +def test_update_dashboard_ai_context_persists_context(orguser, dashboard, seed_db): + request = mock_request(orguser) + payload = UpdateDashboardAIContextSchema(dashboard_context_markdown="## Dashboard context") + + response = update_dashboard_ai_context(request, dashboard.id, payload) + + assert response.dashboard_id == dashboard.id + assert response.dashboard_context_markdown == "## Dashboard context" + assert response.dashboard_context_updated_by == orguser.user.email + + context = DashboardAIContext.objects.get(dashboard=dashboard) + assert context.markdown == "## Dashboard context" + assert context.updated_by == orguser + assert context.updated_at is not None + + +def test_get_dashboard_ai_context_requires_permission(guest_orguser, dashboard, seed_db): + request = mock_request(guest_orguser) + + with pytest.raises(HttpError) as excinfo: + get_dashboard_ai_context(request, dashboard.id) + + assert excinfo.value.status_code == 404 + assert str(excinfo.value) == "unauthorized" + + +def test_get_dashboard_ai_context_is_org_scoped(orguser, other_org_dashboard, seed_db): + request = mock_request(orguser) + + with pytest.raises(HttpError) as excinfo: + get_dashboard_ai_context(request, other_org_dashboard.id) + + assert excinfo.value.status_code == 404 + assert str(excinfo.value) == "Dashboard not found" From 4061101b330d1be9bde09045c6c966fece6572f7 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:51:52 +0530 Subject: [PATCH 05/49] feat(ai-chat): add chroma sidecar and vector store primitives --- .env.template | 13 +- Docker/docker-compose.dev.yml | 52 ++- Docker/docker-compose.yml | 33 ++ ddpui/core/dashboard_chat/__init__.py | 1 + ddpui/core/dashboard_chat/config.py | 36 +++ ddpui/core/dashboard_chat/vector_documents.py | 104 ++++++ ddpui/core/dashboard_chat/vector_store.py | 300 ++++++++++++++++++ .../core/dashboard_chat/test_vector_store.py | 299 +++++++++++++++++ 8 files changed, 827 insertions(+), 11 deletions(-) create mode 100644 ddpui/core/dashboard_chat/__init__.py create mode 100644 ddpui/core/dashboard_chat/config.py create mode 100644 ddpui/core/dashboard_chat/vector_documents.py create mode 100644 ddpui/core/dashboard_chat/vector_store.py create mode 100644 ddpui/tests/core/dashboard_chat/test_vector_store.py diff --git a/.env.template b/.env.template index 16a071550..8dfe46ce7 100644 --- a/.env.template +++ b/.env.template @@ -141,6 +141,17 @@ ESTIMATE_TIME_FOR_QUEUE_RUNS="false" LLM_SERVICE_API_URL="http://127.0.0.1:7001" LLM_SERVICE_API_KEY="" LLM_SERVICE_API_VER="" # can be empty or v1 +OPENAI_API_KEY="" + +#################################################################################################### +# AI DASHBOARD CHAT VECTOR STORE +#################################################################################################### + +AI_DASHBOARD_CHAT_CHROMA_HOST="localhost" +AI_DASHBOARD_CHAT_CHROMA_PORT=8003 +AI_DASHBOARD_CHAT_CHROMA_SSL=False +AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX="org_" +AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL="text-embedding-3-small" #################################################################################################### # MONITORING & LOGGING @@ -178,4 +189,4 @@ LOGS_MOUNT= SCHEMA_CHANGE_DETECTION_INTER_ORG_DELAY=60 SCHEMA_CHANGE_DETECTION_SCHEDULE_HOUR=18 -SCHEMA_CHANGE_DETECTION_SCHEDULE_MINUTE=30 \ No newline at end of file +SCHEMA_CHANGE_DETECTION_SCHEDULE_MINUTE=30 diff --git a/Docker/docker-compose.dev.yml b/Docker/docker-compose.dev.yml index 78b26464b..26ff0f5ba 100644 --- a/Docker/docker-compose.dev.yml +++ b/Docker/docker-compose.dev.yml @@ -29,6 +29,19 @@ services: networks: - dalgo-network + chroma: + image: chromadb/chroma:0.6.3 + ports: + - "8003:8000" + environment: + - IS_PERSISTENT=TRUE + - ALLOW_RESET=TRUE + - ANONYMIZED_TELEMETRY=FALSE + volumes: + - chroma_data:/chroma/chroma + networks: + - dalgo-network + backend: image: dalgo_backend:0.1 restart: always @@ -37,6 +50,8 @@ services: condition: service_healthy redis_server: condition: service_started + chroma: + condition: service_started initdb: condition: service_completed_successfully ports: @@ -94,6 +109,12 @@ services: - DEMO_SUPERSET_PASSWORD=${DEMO_SUPERSET_PASSWORD} - FIRST_USER_PASSWORD=${FIRST_USER_PASSWORD} - FIRST_USER_ROLE=${FIRST_USER_ROLE} + - OPENAI_API_KEY=${OPENAI_API_KEY} + - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma + - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 + - AI_DASHBOARD_CHAT_CHROMA_SSL=False + - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} + - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} command: backend networks: - dalgo-network @@ -102,6 +123,17 @@ services: celery_worker: image: dalgo_backend:0.1 command: celery + depends_on: + backend: + condition: service_started + redis_server: + condition: service_started + chroma: + condition: service_started + initdb: + condition: service_completed_successfully + networks: + - dalgo-network environment: - REDIS_HOST=${REDIS_HOST} - REDIS_PORT=${REDIS_PORT} @@ -112,15 +144,12 @@ services: - DBPASSWORD=${DBPASSWORD} - DBADMINUSER=${DBADMINUSER} - DBADMINPASSWORD=${DBADMINPASSWORD} - depends_on: - backend: - condition: service_started - redis_server: - condition: service_started - initdb: - condition: service_completed_successfully - networks: - - dalgo-network + - OPENAI_API_KEY=${OPENAI_API_KEY} + - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma + - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 + - AI_DASHBOARD_CHAT_CHROMA_SSL=False + - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} + - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} celery_beat: image: dalgo_backend:0.1 command: beat @@ -129,6 +158,8 @@ services: condition: service_started redis_server: condition: service_started + chroma: + condition: service_started initdb: condition: service_completed_successfully @@ -161,8 +192,9 @@ services: volumes: redis_data: + chroma_data: celerybeat_volume: networks: dalgo-network: - driver: bridge \ No newline at end of file + driver: bridge diff --git a/Docker/docker-compose.yml b/Docker/docker-compose.yml index d29623b93..28a7f1481 100644 --- a/Docker/docker-compose.yml +++ b/Docker/docker-compose.yml @@ -8,6 +8,20 @@ services: - redis_data:/data networks: - dalgo-network + + chroma: + image: chromadb/chroma:0.6.3 + ports: + - "8003:8000" + environment: + - IS_PERSISTENT=TRUE + - ALLOW_RESET=TRUE + - ANONYMIZED_TELEMETRY=FALSE + volumes: + - chroma_data:/chroma/chroma + networks: + - dalgo-network + backend: image: dalgo_backend:latest command: backend @@ -16,10 +30,20 @@ services: - "8002:8002" env_file: - .env.docker + depends_on: + - redis_server + - chroma volumes: - ${CLIENTS_DBT_MOUNT}:/data/clients_dbt - ${DEV_SECRETS_MOUNT}:/data/secrets - ${LOGS_MOUNT}:/usr/src/backend/ddpui/logs + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma + - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 + - AI_DASHBOARD_CHAT_CHROMA_SSL=False + - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} + - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} networks: - dalgo-network celery_beat: @@ -41,17 +65,26 @@ services: depends_on: - backend - redis_server + - chroma env_file: - .env.docker volumes: - ${CLIENTS_DBT_MOUNT}:/data/clients_dbt - ${DEV_SECRETS_MOUNT}:/data/secrets - ${LOGS_MOUNT}:/usr/src/backend/ddpui/logs + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma + - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 + - AI_DASHBOARD_CHAT_CHROMA_SSL=False + - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} + - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} networks: - dalgo-network volumes: redis_data: + chroma_data: celerybeat_volume: networks: diff --git a/ddpui/core/dashboard_chat/__init__.py b/ddpui/core/dashboard_chat/__init__.py new file mode 100644 index 000000000..b409e250c --- /dev/null +++ b/ddpui/core/dashboard_chat/__init__.py @@ -0,0 +1 @@ +"""Dashboard chat backend primitives.""" diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py new file mode 100644 index 000000000..0fc102dbf --- /dev/null +++ b/ddpui/core/dashboard_chat/config.py @@ -0,0 +1,36 @@ +"""Configuration helpers for dashboard chat infrastructure.""" + +from dataclasses import dataclass +import os + + +def _parse_bool(value: str | None, default: bool) -> bool: + """Parse a boolean env var using Dalgo's common truthy values.""" + if value is None: + return default + return value.strip().lower() in {"1", "true", "yes", "on"} + + +@dataclass(frozen=True) +class DashboardChatVectorStoreConfig: + """Environment-backed configuration for the Chroma sidecar and embeddings.""" + + chroma_host: str = "localhost" + chroma_port: int = 8003 + chroma_ssl: bool = False + collection_prefix: str = "org_" + embedding_model: str = "text-embedding-3-small" + + @classmethod + def from_env(cls) -> "DashboardChatVectorStoreConfig": + """Build vector store config from environment variables.""" + return cls( + chroma_host=os.getenv("AI_DASHBOARD_CHAT_CHROMA_HOST", "localhost"), + chroma_port=int(os.getenv("AI_DASHBOARD_CHAT_CHROMA_PORT", "8003")), + chroma_ssl=_parse_bool(os.getenv("AI_DASHBOARD_CHAT_CHROMA_SSL"), False), + collection_prefix=os.getenv("AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX", "org_"), + embedding_model=os.getenv( + "AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL", + "text-embedding-3-small", + ), + ) diff --git a/ddpui/core/dashboard_chat/vector_documents.py b/ddpui/core/dashboard_chat/vector_documents.py new file mode 100644 index 000000000..2f6c2dd27 --- /dev/null +++ b/ddpui/core/dashboard_chat/vector_documents.py @@ -0,0 +1,104 @@ +"""Deterministic vector document models for dashboard chat retrieval.""" + +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from hashlib import sha256 +from typing import Any + + +class DashboardChatSourceType(str, Enum): + """Context source types supported by dashboard chat retrieval.""" + + ORG_CONTEXT = "org_context" + DASHBOARD_CONTEXT = "dashboard_context" + DASHBOARD_EXPORT = "dashboard_export" + DBT_MANIFEST = "dbt_manifest" + DBT_CATALOG = "dbt_catalog" + + +def build_dashboard_chat_collection_name(org_id: int, prefix: str = "org_") -> str: + """Build the per-org Chroma collection name.""" + return f"{prefix}{org_id}" + + +def compute_dashboard_chat_document_hash(content: str) -> str: + """Compute a stable content hash used in vector metadata and IDs.""" + return sha256(content.encode("utf-8")).hexdigest() + + +def build_dashboard_chat_document_id( + org_id: int, + source_type: str, + source_identifier: str, + chunk_index: int, + content_hash: str, +) -> str: + """Build a deterministic document ID for Chroma upserts.""" + raw_identifier = ":".join( + [ + str(org_id), + source_type, + source_identifier, + str(chunk_index), + content_hash, + ] + ) + return sha256(raw_identifier.encode("utf-8")).hexdigest() + + +@dataclass(frozen=True) +class DashboardChatVectorDocument: + """Single retrieval chunk stored in the dashboard chat vector store.""" + + org_id: int + source_type: DashboardChatSourceType | str + source_identifier: str + content: str + dashboard_id: int | None = None + chart_id: int | None = None + title: str | None = None + chunk_index: int = 0 + updated_at: datetime | None = None + + @property + def source_type_value(self) -> str: + """Return the string form of the source type.""" + if isinstance(self.source_type, DashboardChatSourceType): + return self.source_type.value + return self.source_type + + @property + def document_hash(self) -> str: + """Return a stable content hash for this document.""" + return compute_dashboard_chat_document_hash(self.content) + + @property + def document_id(self) -> str: + """Return the deterministic Chroma ID for this document.""" + return build_dashboard_chat_document_id( + org_id=self.org_id, + source_type=self.source_type_value, + source_identifier=self.source_identifier, + chunk_index=self.chunk_index, + content_hash=self.document_hash, + ) + + def metadata(self) -> dict[str, Any]: + """Return Chroma-safe metadata for this document.""" + metadata: dict[str, Any] = { + "org_id": self.org_id, + "source_type": self.source_type_value, + "source_identifier": self.source_identifier, + "chunk_index": self.chunk_index, + "document_hash": self.document_hash, + } + if self.dashboard_id is not None: + metadata["dashboard_id"] = self.dashboard_id + if self.chart_id is not None: + metadata["chart_id"] = self.chart_id + if self.title is not None: + metadata["title"] = self.title + if self.updated_at is not None: + metadata["updated_at"] = self.updated_at.isoformat() + return metadata diff --git a/ddpui/core/dashboard_chat/vector_store.py b/ddpui/core/dashboard_chat/vector_store.py new file mode 100644 index 000000000..1c33e539c --- /dev/null +++ b/ddpui/core/dashboard_chat/vector_store.py @@ -0,0 +1,300 @@ +"""Chroma-backed vector store wrapper for dashboard chat.""" + +from dataclasses import dataclass +import os +from typing import Any, Protocol + +from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig +from ddpui.core.dashboard_chat.vector_documents import ( + DashboardChatSourceType, + DashboardChatVectorDocument, + build_dashboard_chat_collection_name, +) + + +class DashboardChatEmbeddingProvider(Protocol): + """Embedding provider interface used by the vector store wrapper.""" + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of texts.""" + + def embed_query(self, text: str) -> list[float]: + """Embed a single query.""" + + +class OpenAIEmbeddingProvider: + """OpenAI embeddings adapter for dashboard chat retrieval.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "text-embedding-3-small", + client: Any = None, + ): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.model = model + if client is None: + if not self.api_key: + raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") + from openai import OpenAI + + client = OpenAI(api_key=self.api_key) + self.client = client + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of documents using OpenAI.""" + if not texts: + return [] + response = self.client.embeddings.create(model=self.model, input=texts) + return [item.embedding for item in response.data] + + def embed_query(self, text: str) -> list[float]: + """Embed a single query using the document embedding path.""" + return self.embed_documents([text])[0] + + +@dataclass(frozen=True) +class DashboardChatVectorQueryResult: + """Single query result returned from Chroma.""" + + document_id: str + content: str + metadata: dict[str, Any] + distance: float | None = None + + +@dataclass(frozen=True) +class DashboardChatStoredDocument: + """Stored document metadata returned from Chroma collection reads.""" + + document_id: str + metadata: dict[str, Any] + content: str | None = None + + +class ChromaDashboardChatVectorStore: + """Thin wrapper around the Chroma HTTP client with Dalgo-specific conventions.""" + + def __init__( + self, + config: DashboardChatVectorStoreConfig | None = None, + embedding_provider: DashboardChatEmbeddingProvider | None = None, + client: Any = None, + ): + self.config = config or DashboardChatVectorStoreConfig.from_env() + self.embedding_provider = embedding_provider or OpenAIEmbeddingProvider( + model=self.config.embedding_model + ) + self.client = client or self._build_client() + + def _build_client(self) -> Any: + """Build the real Chroma HTTP client lazily.""" + from chromadb import HttpClient + + return HttpClient( + host=self.config.chroma_host, + port=self.config.chroma_port, + ssl=self.config.chroma_ssl, + ) + + def collection_name(self, org_id: int) -> str: + """Return the Chroma collection name for an org.""" + return build_dashboard_chat_collection_name(org_id, self.config.collection_prefix) + + def create_collection(self, org_id: int) -> Any: + """Create or load the Chroma collection for an org.""" + return self.client.get_or_create_collection( + name=self.collection_name(org_id), + metadata={"org_id": str(org_id)}, + ) + + def load_collection(self, org_id: int) -> Any | None: + """Load an existing Chroma collection for an org.""" + from chromadb.errors import InvalidCollectionException + + try: + return self.client.get_collection(name=self.collection_name(org_id)) + except (InvalidCollectionException, ValueError): + return None + + def delete_collection(self, org_id: int) -> bool: + """Delete the Chroma collection for an org if it exists.""" + if self.load_collection(org_id) is None: + return False + self.client.delete_collection(name=self.collection_name(org_id)) + return True + + def get_documents( + self, + org_id: int, + source_types: list[DashboardChatSourceType | str] | None = None, + dashboard_id: int | None = None, + include_documents: bool = False, + ) -> list[DashboardChatStoredDocument]: + """Load stored documents for an org using metadata filters.""" + collection = self.load_collection(org_id) + if collection is None: + return [] + + include = ["metadatas"] + if include_documents: + include.append("documents") + + result = collection.get( + where=self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id), + include=include, + ) + return self._parse_get_result(result, include_documents=include_documents) + + def delete_documents( + self, + org_id: int, + ids: list[str] | None = None, + source_types: list[DashboardChatSourceType | str] | None = None, + dashboard_id: int | None = None, + ) -> int: + """Delete matching documents from an org collection.""" + collection = self.load_collection(org_id) + if collection is None: + return 0 + + where = self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id) + if ids is None and where is None: + return 0 + + deleted_count = ( + len(ids) + if ids is not None + else len( + self.get_documents( + org_id, + source_types=source_types, + dashboard_id=dashboard_id, + include_documents=False, + ) + ) + ) + collection.delete(ids=ids, where=where) + return deleted_count + + def upsert_documents( + self, + org_id: int, + documents: list[DashboardChatVectorDocument], + ) -> list[str]: + """Upsert documents into the org-specific Chroma collection.""" + if not documents: + return [] + + collection = self.create_collection(org_id) + contents = [document.content for document in documents] + document_ids = [document.document_id for document in documents] + metadatas = [document.metadata() for document in documents] + embeddings = self.embedding_provider.embed_documents(contents) + + collection.upsert( + ids=document_ids, + documents=contents, + metadatas=metadatas, + embeddings=embeddings, + ) + return document_ids + + def query( + self, + org_id: int, + query_text: str, + n_results: int = 5, + source_types: list[DashboardChatSourceType | str] | None = None, + dashboard_id: int | None = None, + ) -> list[DashboardChatVectorQueryResult]: + """Query the org-specific Chroma collection.""" + collection = self.load_collection(org_id) + if collection is None: + return [] + + where = self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id) + result = collection.query( + query_embeddings=[self.embedding_provider.embed_query(query_text)], + n_results=n_results, + where=where, + include=["documents", "metadatas", "distances"], + ) + return self._parse_query_result(result) + + @staticmethod + def _build_where_clause( + source_types: list[DashboardChatSourceType | str] | None = None, + dashboard_id: int | None = None, + ) -> dict[str, Any] | None: + """Build the metadata filter used for Chroma queries.""" + filters: list[dict[str, Any]] = [] + + if source_types: + normalized_types = [ + source_type.value + if isinstance(source_type, DashboardChatSourceType) + else source_type + for source_type in source_types + ] + if len(normalized_types) == 1: + filters.append({"source_type": normalized_types[0]}) + else: + filters.append({"source_type": {"$in": normalized_types}}) + + if dashboard_id is not None: + filters.append({"dashboard_id": dashboard_id}) + + if not filters: + return None + if len(filters) == 1: + return filters[0] + return {"$and": filters} + + @staticmethod + def _parse_query_result(result: dict[str, Any]) -> list[DashboardChatVectorQueryResult]: + """Parse Chroma's nested result shape into flat typed rows.""" + ids = result.get("ids", [[]]) + documents = result.get("documents", [[]]) + metadatas = result.get("metadatas", [[]]) + distances = result.get("distances", [[]]) + + parsed_results: list[DashboardChatVectorQueryResult] = [] + for document_id, content, metadata, distance in zip( + ids[0] if ids else [], + documents[0] if documents else [], + metadatas[0] if metadatas else [], + distances[0] if distances else [], + ): + parsed_results.append( + DashboardChatVectorQueryResult( + document_id=document_id, + content=content, + metadata=metadata, + distance=distance, + ) + ) + return parsed_results + + @staticmethod + def _parse_get_result( + result: dict[str, Any], + include_documents: bool = False, + ) -> list[DashboardChatStoredDocument]: + """Parse Chroma's get result into typed stored-document rows.""" + ids = result.get("ids", []) + metadatas = result.get("metadatas", []) + documents = result.get("documents", []) if include_documents else [] + + parsed_results: list[DashboardChatStoredDocument] = [] + for index, document_id in enumerate(ids): + parsed_results.append( + DashboardChatStoredDocument( + document_id=document_id, + metadata=metadatas[index] if index < len(metadatas) else {}, + content=documents[index] + if include_documents and index < len(documents) + else None, + ) + ) + return parsed_results diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py new file mode 100644 index 000000000..287bef6bd --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -0,0 +1,299 @@ +"""Tests for dashboard chat vector document and store primitives.""" + +from datetime import datetime, timezone +from unittest.mock import patch + +from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig +from ddpui.core.dashboard_chat.vector_documents import ( + DashboardChatSourceType, + DashboardChatVectorDocument, + build_dashboard_chat_collection_name, +) +from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore + + +class FakeEmbeddingProvider: + """Deterministic embedding provider for vector store tests.""" + + def embed_documents(self, texts): + return [[float(index), float(len(text))] for index, text in enumerate(texts, start=1)] + + def embed_query(self, text): + return [99.0, float(len(text))] + + +class FakeCollection: + """In-memory collection stub used by the vector store tests.""" + + def __init__(self): + self.upsert_calls = [] + self.query_calls = [] + self.get_calls = [] + self.delete_calls = [] + self.documents = {} + self.query_response = { + "ids": [["doc-1"]], + "documents": [["matched content"]], + "metadatas": [[{"source_type": "org_context"}]], + "distances": [[0.12]], + } + + def upsert(self, **kwargs): + self.upsert_calls.append(kwargs) + for document_id, document, metadata in zip( + kwargs["ids"], + kwargs["documents"], + kwargs["metadatas"], + ): + self.documents[document_id] = { + "id": document_id, + "document": document, + "metadata": metadata, + } + + def query(self, **kwargs): + self.query_calls.append(kwargs) + return self.query_response + + def get(self, **kwargs): + self.get_calls.append(kwargs) + rows = list(self.documents.values()) + where = kwargs.get("where") + if where: + rows = [row for row in rows if _matches_where(row["metadata"], where)] + return { + "ids": [row["id"] for row in rows], + "documents": [row["document"] for row in rows], + "metadatas": [row["metadata"] for row in rows], + } + + def delete(self, ids=None, where=None): + self.delete_calls.append({"ids": ids, "where": where}) + if ids is not None: + for document_id in ids: + self.documents.pop(document_id, None) + return + + for document_id, row in list(self.documents.items()): + if _matches_where(row["metadata"], where): + self.documents.pop(document_id, None) + + +def _matches_where(metadata, where): + """Minimal Chroma where-clause matcher for test stubs.""" + if where is None: + return True + if "$and" in where: + return all(_matches_where(metadata, condition) for condition in where["$and"]) + for key, value in where.items(): + if isinstance(value, dict) and "$in" in value: + if metadata.get(key) not in value["$in"]: + return False + continue + if metadata.get(key) != value: + return False + return True + + +class FakeChromaClient: + """In-memory Chroma HTTP client stub.""" + + def __init__(self): + self.collections = {} + self.deleted_collections = [] + + def get_or_create_collection(self, name, metadata=None): + if name not in self.collections: + self.collections[name] = FakeCollection() + return self.collections[name] + + def get_collection(self, name): + if name not in self.collections: + raise ValueError("collection does not exist") + return self.collections[name] + + def delete_collection(self, name): + self.deleted_collections.append(name) + del self.collections[name] + + +def test_dashboard_chat_vector_store_config_reads_env(): + """Vector store config should read the dedicated dashboard chat env vars.""" + with patch.dict( + "os.environ", + { + "AI_DASHBOARD_CHAT_CHROMA_HOST": "chroma.internal", + "AI_DASHBOARD_CHAT_CHROMA_PORT": "8100", + "AI_DASHBOARD_CHAT_CHROMA_SSL": "true", + "AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX": "tenant_", + "AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL": "text-embedding-3-large", + }, + ): + config = DashboardChatVectorStoreConfig.from_env() + + assert config.chroma_host == "chroma.internal" + assert config.chroma_port == 8100 + assert config.chroma_ssl is True + assert config.collection_prefix == "tenant_" + assert config.embedding_model == "text-embedding-3-large" + + +def test_collection_name_uses_org_prefix(): + """Collections should be split by org using the configured prefix.""" + assert build_dashboard_chat_collection_name(42) == "org_42" + assert build_dashboard_chat_collection_name(42, prefix="tenant_") == "tenant_42" + + +def test_vector_document_has_stable_id_and_required_metadata(): + """Document IDs should be deterministic and metadata should include required keys.""" + updated_at = datetime(2026, 3, 17, 1, 0, tzinfo=timezone.utc) + document = DashboardChatVectorDocument( + org_id=7, + source_type=DashboardChatSourceType.DBT_MANIFEST, + source_identifier="model.public.fact_enrollments", + content="manifest chunk", + dashboard_id=9, + chart_id=12, + title="Fact Enrollments", + chunk_index=3, + updated_at=updated_at, + ) + + assert document.document_id == document.document_id + assert document.metadata() == { + "org_id": 7, + "source_type": "dbt_manifest", + "source_identifier": "model.public.fact_enrollments", + "chunk_index": 3, + "document_hash": document.document_hash, + "dashboard_id": 9, + "chart_id": 12, + "title": "Fact Enrollments", + "updated_at": updated_at.isoformat(), + } + + +def test_upsert_documents_uses_embeddings_and_metadata(): + """Upserts should use deterministic IDs, embeddings, and per-org collections.""" + fake_client = FakeChromaClient() + store = ChromaDashboardChatVectorStore( + config=DashboardChatVectorStoreConfig(collection_prefix="org_"), + embedding_provider=FakeEmbeddingProvider(), + client=fake_client, + ) + documents = [ + DashboardChatVectorDocument( + org_id=11, + source_type=DashboardChatSourceType.ORG_CONTEXT, + source_identifier="org_context", + content="organization context chunk", + ), + DashboardChatVectorDocument( + org_id=11, + source_type=DashboardChatSourceType.DASHBOARD_CONTEXT, + source_identifier="dashboard:5:context", + content="dashboard context chunk", + dashboard_id=5, + title="Impact Overview", + chunk_index=1, + ), + ] + + document_ids = store.upsert_documents(11, documents) + collection = fake_client.collections["org_11"] + upsert_call = collection.upsert_calls[0] + + assert document_ids == [documents[0].document_id, documents[1].document_id] + assert upsert_call["ids"] == document_ids + assert upsert_call["documents"] == ["organization context chunk", "dashboard context chunk"] + assert upsert_call["metadatas"][0]["source_type"] == "org_context" + assert upsert_call["metadatas"][1]["dashboard_id"] == 5 + assert upsert_call["metadatas"][1]["title"] == "Impact Overview" + assert upsert_call["embeddings"] == [[1.0, 26.0], [2.0, 23.0]] + + +def test_query_scopes_to_org_collection_and_where_filters(): + """Queries should stay inside the org collection and forward source/dashboard filters.""" + fake_client = FakeChromaClient() + fake_client.get_or_create_collection("org_3") + store = ChromaDashboardChatVectorStore( + config=DashboardChatVectorStoreConfig(), + embedding_provider=FakeEmbeddingProvider(), + client=fake_client, + ) + + results = store.query( + 3, + query_text="what changed?", + source_types=[DashboardChatSourceType.DBT_CATALOG, DashboardChatSourceType.ORG_CONTEXT], + dashboard_id=9, + ) + + query_call = fake_client.collections["org_3"].query_calls[0] + assert query_call["query_embeddings"] == [[99.0, 13.0]] + assert query_call["where"] == { + "$and": [ + {"source_type": {"$in": ["dbt_catalog", "org_context"]}}, + {"dashboard_id": 9}, + ] + } + assert results[0].document_id == "doc-1" + assert results[0].content == "matched content" + assert results[0].distance == 0.12 + + +def test_delete_collection_returns_false_for_missing_org(): + """Deleting a missing collection should be a no-op.""" + store = ChromaDashboardChatVectorStore( + config=DashboardChatVectorStoreConfig(), + embedding_provider=FakeEmbeddingProvider(), + client=FakeChromaClient(), + ) + + assert store.delete_collection(404) is False + + +def test_get_documents_and_delete_documents_respect_where_filters(): + """Collection reads and deletes should honor source and dashboard scoping.""" + fake_client = FakeChromaClient() + store = ChromaDashboardChatVectorStore( + config=DashboardChatVectorStoreConfig(), + embedding_provider=FakeEmbeddingProvider(), + client=fake_client, + ) + documents = [ + DashboardChatVectorDocument( + org_id=23, + source_type=DashboardChatSourceType.ORG_CONTEXT, + source_identifier="org:23:context", + content="org chunk", + ), + DashboardChatVectorDocument( + org_id=23, + source_type=DashboardChatSourceType.DASHBOARD_CONTEXT, + source_identifier="dashboard:7:context", + content="dashboard chunk", + dashboard_id=7, + ), + ] + store.upsert_documents(23, documents) + + stored_documents = store.get_documents( + 23, + source_types=[DashboardChatSourceType.DASHBOARD_CONTEXT], + dashboard_id=7, + include_documents=True, + ) + + assert len(stored_documents) == 1 + assert stored_documents[0].content == "dashboard chunk" + assert stored_documents[0].metadata["dashboard_id"] == 7 + + deleted_count = store.delete_documents( + 23, + source_types=[DashboardChatSourceType.DASHBOARD_CONTEXT], + ) + + assert deleted_count == 1 + assert len(store.get_documents(23)) == 1 + assert store.get_documents(23)[0].metadata["source_type"] == "org_context" From b96e063cfcefe4b9af2a1229eb6813b1d571b0bc Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 02:59:06 +0530 Subject: [PATCH 06/49] feat(ai-chat): add dbt docs generation and context build pipeline --- ddpui/core/dashboard_chat/dbt_docs.py | 108 ++++ ddpui/core/dashboard_chat/ingestion.py | 510 ++++++++++++++++++ .../core/dashboard_chat/test_ingestion.py | 438 +++++++++++++++ 3 files changed, 1056 insertions(+) create mode 100644 ddpui/core/dashboard_chat/dbt_docs.py create mode 100644 ddpui/core/dashboard_chat/ingestion.py create mode 100644 ddpui/tests/core/dashboard_chat/test_ingestion.py diff --git a/ddpui/core/dashboard_chat/dbt_docs.py b/ddpui/core/dashboard_chat/dbt_docs.py new file mode 100644 index 000000000..dcc89c296 --- /dev/null +++ b/ddpui/core/dashboard_chat/dbt_docs.py @@ -0,0 +1,108 @@ +"""dbt docs helpers for dashboard chat context builds.""" + +from dataclasses import dataclass +import json +from pathlib import Path + +import yaml +from django.utils import timezone + +from ddpui.core.orgdbt_manager import DbtProjectManager +from ddpui.ddpprefect import prefect_service +from ddpui.models.org import Org, OrgDbt +from ddpui.utils.custom_logger import CustomLogger + +logger = CustomLogger("ddpui.dashboard_chat.dbt_docs") + + +class DashboardChatDbtDocsError(Exception): + """Raised when dbt docs generation for dashboard chat fails.""" + + +@dataclass(frozen=True) +class DashboardChatDbtDocsArtifacts: + """dbt docs artifacts required to build dashboard chat context.""" + + manifest_json: dict + catalog_json: dict + generated_at: timezone.datetime + target_dir: Path + + +def _write_profiles_file(org: Org, orgdbt: OrgDbt) -> Path: + """Write the dbt profiles.yml required for dbt CLI execution.""" + if orgdbt.cli_profile_block is None: + raise DashboardChatDbtDocsError("dbt CLI profile block not found") + + try: + dbt_project_params = DbtProjectManager.gather_dbt_project_params(org, orgdbt) + profile = prefect_service.get_dbt_cli_profile_block(orgdbt.cli_profile_block.block_name)[ + "profile" + ] + except Exception as error: + raise DashboardChatDbtDocsError( + f"Failed to load dbt CLI profile for dashboard chat: {error}" + ) from error + + profiles_dir = Path(dbt_project_params.project_dir) / "profiles" + profiles_dir.mkdir(parents=True, exist_ok=True) + profile_path = profiles_dir / "profiles.yml" + with open(profile_path, "w", encoding="utf-8") as profile_file: + yaml.safe_dump(profile, profile_file) + return profile_path + + +def generate_dashboard_chat_dbt_docs_artifacts( + org: Org, + orgdbt: OrgDbt, +) -> DashboardChatDbtDocsArtifacts: + """Run dbt docs generate and return the manifest/catalog payloads.""" + if orgdbt is None: + raise DashboardChatDbtDocsError("dbt workspace not configured") + + _write_profiles_file(org, orgdbt) + + try: + logger.info("running dbt deps for dashboard chat org=%s", org.id) + DbtProjectManager.run_dbt_command( + org, + orgdbt, + command=["deps"], + keyword_args={"profiles-dir": "profiles"}, + ) + logger.info("running dbt docs generate for dashboard chat org=%s", org.id) + DbtProjectManager.run_dbt_command( + org, + orgdbt, + command=["docs", "generate"], + keyword_args={"profiles-dir": "profiles"}, + ) + except Exception as error: + raise DashboardChatDbtDocsError( + f"dbt docs generate failed for dashboard chat: {error}" + ) from error + + target_dir = Path(DbtProjectManager.get_dbt_project_dir(orgdbt)) / "target" + manifest_path = target_dir / "manifest.json" + catalog_path = target_dir / "catalog.json" + + if not manifest_path.exists(): + raise DashboardChatDbtDocsError("dbt docs generate did not produce manifest.json") + if not catalog_path.exists(): + raise DashboardChatDbtDocsError("dbt docs generate did not produce catalog.json") + + with open(manifest_path, "r", encoding="utf-8") as manifest_file: + manifest_json = json.load(manifest_file) + with open(catalog_path, "r", encoding="utf-8") as catalog_file: + catalog_json = json.load(catalog_file) + + generated_at = timezone.now() + orgdbt.docs_generated_at = generated_at + orgdbt.save(update_fields=["docs_generated_at", "updated_at"]) + + return DashboardChatDbtDocsArtifacts( + manifest_json=manifest_json, + catalog_json=catalog_json, + generated_at=generated_at, + target_dir=target_dir, + ) diff --git a/ddpui/core/dashboard_chat/ingestion.py b/ddpui/core/dashboard_chat/ingestion.py new file mode 100644 index 000000000..2a427c722 --- /dev/null +++ b/ddpui/core/dashboard_chat/ingestion.py @@ -0,0 +1,510 @@ +"""Context-build pipeline for dashboard chat retrieval.""" + +from collections import defaultdict +from dataclasses import dataclass +import json +from typing import Callable + +from django.utils import timezone + +from ddpui.core.dashboard_chat.dbt_docs import ( + DashboardChatDbtDocsArtifacts, + generate_dashboard_chat_dbt_docs_artifacts, +) +from ddpui.core.dashboard_chat.vector_documents import ( + DashboardChatSourceType, + DashboardChatVectorDocument, +) +from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext +from ddpui.models.org import Org +from ddpui.services.dashboard_service import DashboardService + +MARKDOWN_CHUNK_MAX_CHARS = 1200 +DBT_IGNORE_PACKAGES = {"dbt", "dbt_bigquery", "elementary"} +INGEST_SOURCE_ORDER = [ + DashboardChatSourceType.ORG_CONTEXT, + DashboardChatSourceType.DASHBOARD_CONTEXT, + DashboardChatSourceType.DASHBOARD_EXPORT, + DashboardChatSourceType.DBT_MANIFEST, + DashboardChatSourceType.DBT_CATALOG, +] + + +class DashboardChatIngestionError(Exception): + """Raised when the dashboard chat context build cannot complete.""" + + +@dataclass(frozen=True) +class DashboardChatIngestionResult: + """Summary of one completed org context build.""" + + org_id: int + docs_generated_at: timezone.datetime + vector_ingested_at: timezone.datetime + source_document_counts: dict[str, int] + upserted_document_ids: list[str] + deleted_document_ids: list[str] + + +def _normalize_text(value: str) -> str: + """Normalize text before chunking so document IDs stay deterministic.""" + return "\n".join( + line.rstrip() for line in (value or "").replace("\r\n", "\n").split("\n") + ).strip() + + +def chunk_dashboard_chat_text(text: str, max_chars: int = MARKDOWN_CHUNK_MAX_CHARS) -> list[str]: + """Chunk text by paragraph blocks and hard-wrap only when needed.""" + normalized = _normalize_text(text) + if not normalized: + return [] + + blocks = [block.strip() for block in normalized.split("\n\n") if block.strip()] + if not blocks: + return [] + + chunks: list[str] = [] + current_blocks: list[str] = [] + current_length = 0 + + for block in blocks: + block_length = len(block) + if block_length > max_chars: + if current_blocks: + chunks.append("\n\n".join(current_blocks)) + current_blocks = [] + current_length = 0 + for start_index in range(0, block_length, max_chars): + chunks.append(block[start_index : start_index + max_chars].strip()) + continue + + separator_length = 2 if current_blocks else 0 + if current_length + separator_length + block_length > max_chars: + chunks.append("\n\n".join(current_blocks)) + current_blocks = [block] + current_length = block_length + continue + + current_blocks.append(block) + current_length += separator_length + block_length + + if current_blocks: + chunks.append("\n\n".join(current_blocks)) + return chunks + + +class DashboardChatIngestionService: + """Build and ingest org-scoped retrieval documents for dashboard chat.""" + + def __init__( + self, + vector_store: ChromaDashboardChatVectorStore | None = None, + dbt_docs_generator: Callable[[Org, object], DashboardChatDbtDocsArtifacts] | None = None, + ): + self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts + + def ingest_org(self, org: Org) -> DashboardChatIngestionResult: + """Run dbt docs generation and rebuild the desired vector documents for an org.""" + if org.dbt is None: + raise DashboardChatIngestionError("dbt workspace not configured") + + dbt_docs = self.dbt_docs_generator(org, org.dbt) + documents_by_source = self._build_documents(org, dbt_docs) + desired_documents = [ + document + for source_type in INGEST_SOURCE_ORDER + for document in documents_by_source[source_type.value] + ] + + existing_documents = self.vector_store.get_documents(org.id) + existing_document_ids = {document.document_id for document in existing_documents} + desired_document_ids = {document.document_id for document in desired_documents} + + new_documents = [ + document + for document in desired_documents + if document.document_id not in existing_document_ids + ] + upserted_document_ids: list[str] = [] + if new_documents: + upserted_document_ids = sorted(self.vector_store.upsert_documents(org.id, new_documents)) + + stale_document_ids = sorted(existing_document_ids - desired_document_ids) + if stale_document_ids: + self.vector_store.delete_documents(org.id, ids=stale_document_ids) + + vector_ingested_at = timezone.now() + org.dbt.vector_last_ingested_at = vector_ingested_at + org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) + + return DashboardChatIngestionResult( + org_id=org.id, + docs_generated_at=dbt_docs.generated_at, + vector_ingested_at=vector_ingested_at, + source_document_counts={ + source_type.value: len(documents_by_source[source_type.value]) + for source_type in INGEST_SOURCE_ORDER + }, + upserted_document_ids=upserted_document_ids, + deleted_document_ids=stale_document_ids, + ) + + def _build_documents( + self, + org: Org, + dbt_docs: DashboardChatDbtDocsArtifacts, + ) -> dict[str, list[DashboardChatVectorDocument]]: + """Build the full desired vector document set for an org.""" + documents_by_source: dict[str, list[DashboardChatVectorDocument]] = defaultdict(list) + + org_context = OrgAIContext.objects.filter(org=org).first() + if org_context and org_context.markdown: + documents_by_source[DashboardChatSourceType.ORG_CONTEXT.value].extend( + self._build_markdown_documents( + org_id=org.id, + source_type=DashboardChatSourceType.ORG_CONTEXT, + source_identifier=f"org:{org.id}:context", + markdown=org_context.markdown, + title=f"{org.name} organization context", + updated_at=org_context.updated_at, + ) + ) + + dashboard_contexts = { + context.dashboard_id: context + for context in DashboardAIContext.objects.filter( + dashboard__org=org, + ).select_related("dashboard") + } + dashboards = list(Dashboard.objects.filter(org=org).order_by("id")) + + for dashboard in dashboards: + dashboard_context = dashboard_contexts.get(dashboard.id) + if dashboard_context and dashboard_context.markdown: + documents_by_source[DashboardChatSourceType.DASHBOARD_CONTEXT.value].extend( + self._build_markdown_documents( + org_id=org.id, + source_type=DashboardChatSourceType.DASHBOARD_CONTEXT, + source_identifier=f"dashboard:{dashboard.id}:context", + markdown=dashboard_context.markdown, + dashboard_id=dashboard.id, + title=f"{dashboard.title} dashboard context", + updated_at=dashboard_context.updated_at, + ) + ) + + export_payload = DashboardService.export_dashboard_context(dashboard.id, org) + documents_by_source[DashboardChatSourceType.DASHBOARD_EXPORT.value].extend( + self._build_dashboard_export_documents(org.id, dashboard.id, export_payload) + ) + + documents_by_source[DashboardChatSourceType.DBT_MANIFEST.value].extend( + self._build_manifest_documents(org.id, dbt_docs) + ) + documents_by_source[DashboardChatSourceType.DBT_CATALOG.value].extend( + self._build_catalog_documents(org.id, dbt_docs) + ) + + return { + source_type.value: documents_by_source.get(source_type.value, []) + for source_type in INGEST_SOURCE_ORDER + } + + def _build_markdown_documents( + self, + org_id: int, + source_type: DashboardChatSourceType, + source_identifier: str, + markdown: str, + title: str, + dashboard_id: int | None = None, + updated_at: timezone.datetime | None = None, + ) -> list[DashboardChatVectorDocument]: + """Chunk a markdown source into deterministic vector documents.""" + return [ + DashboardChatVectorDocument( + org_id=org_id, + source_type=source_type, + source_identifier=source_identifier, + content=chunk, + dashboard_id=dashboard_id, + title=title, + chunk_index=chunk_index, + updated_at=updated_at, + ) + for chunk_index, chunk in enumerate(chunk_dashboard_chat_text(markdown)) + ] + + def _build_dashboard_export_documents( + self, + org_id: int, + dashboard_id: int, + export_payload: dict, + ) -> list[DashboardChatVectorDocument]: + """Build dashboard summary and chart documents from the export contract.""" + documents: list[DashboardChatVectorDocument] = [] + dashboard_payload = export_payload["dashboard"] + dashboard_title = dashboard_payload.get("title") or f"Dashboard {dashboard_id}" + + summary_lines = [ + f"Dashboard title: {dashboard_title}", + f"Dashboard id: {dashboard_payload.get('id')}", + f"Dashboard type: {dashboard_payload.get('dashboard_type')}", + ] + if dashboard_payload.get("description"): + summary_lines.append(f"Description: {dashboard_payload['description']}") + + filters = dashboard_payload.get("filters") or [] + if filters: + summary_lines.append("Filters:") + for dashboard_filter in filters: + summary_lines.append( + "- {name} ({filter_type}) from {schema}.{table}.{column}".format( + name=dashboard_filter.get("name") or dashboard_filter.get("column_name"), + filter_type=dashboard_filter.get("filter_type"), + schema=dashboard_filter.get("schema_name"), + table=dashboard_filter.get("table_name"), + column=dashboard_filter.get("column_name"), + ) + ) + + charts = export_payload.get("charts") or [] + if charts: + summary_lines.append("Charts:") + for chart in charts: + summary_lines.append( + "- {title} [{chart_type}] from {schema}.{table}".format( + title=chart.get("title"), + chart_type=chart.get("chart_type"), + schema=chart.get("schema_name"), + table=chart.get("table_name"), + ) + ) + + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DASHBOARD_EXPORT, + source_identifier=f"dashboard:{dashboard_id}:summary", + markdown="\n\n".join(summary_lines), + dashboard_id=dashboard_id, + title=dashboard_title, + ) + ) + + for chart in charts: + chart_title = chart.get("title") or f"Chart {chart.get('id')}" + chart_lines = [ + f"Dashboard id: {dashboard_id}", + f"Chart id: {chart.get('id')}", + f"Chart title: {chart_title}", + f"Chart type: {chart.get('chart_type')}", + "Data source: {schema}.{table}".format( + schema=chart.get("schema_name"), + table=chart.get("table_name"), + ), + ] + if chart.get("description"): + chart_lines.append(f"Description: {chart['description']}") + if chart.get("extra_config"): + chart_lines.append( + "Extra config: " + + json.dumps(chart["extra_config"], sort_keys=True, separators=(",", ":")) + ) + + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DASHBOARD_EXPORT, + source_identifier=f"dashboard:{dashboard_id}:chart:{chart['id']}", + markdown="\n\n".join(chart_lines), + dashboard_id=dashboard_id, + title=chart_title, + ) + ) + + return documents + + def _build_manifest_documents( + self, + org_id: int, + dbt_docs: DashboardChatDbtDocsArtifacts, + ) -> list[DashboardChatVectorDocument]: + """Build vector documents from manifest.json models and sources.""" + manifest_json = dbt_docs.manifest_json + project_name = manifest_json.get("metadata", {}).get("project_name") + documents: list[DashboardChatVectorDocument] = [] + + for unique_id, source in sorted((manifest_json.get("sources") or {}).items()): + if not self._include_dbt_unique_id(unique_id, project_name): + continue + source_name = source.get("name") or unique_id + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DBT_MANIFEST, + source_identifier=f"manifest:{unique_id}", + markdown=self._format_manifest_source(unique_id, source), + title=source_name, + updated_at=dbt_docs.generated_at, + ) + ) + + for unique_id, node in sorted((manifest_json.get("nodes") or {}).items()): + if node.get("resource_type") != "model": + continue + if not self._include_dbt_unique_id(unique_id, project_name): + continue + model_name = node.get("name") or unique_id + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DBT_MANIFEST, + source_identifier=f"manifest:{unique_id}", + markdown=self._format_manifest_model(unique_id, node), + title=model_name, + updated_at=dbt_docs.generated_at, + ) + ) + + return documents + + def _build_catalog_documents( + self, + org_id: int, + dbt_docs: DashboardChatDbtDocsArtifacts, + ) -> list[DashboardChatVectorDocument]: + """Build vector documents from catalog.json models and sources.""" + catalog_json = dbt_docs.catalog_json + project_name = dbt_docs.manifest_json.get("metadata", {}).get("project_name") + documents: list[DashboardChatVectorDocument] = [] + + for unique_id, source in sorted((catalog_json.get("sources") or {}).items()): + if not self._include_dbt_unique_id(unique_id, project_name): + continue + source_name = ((source.get("metadata") or {}).get("name")) or unique_id + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DBT_CATALOG, + source_identifier=f"catalog:{unique_id}", + markdown=self._format_catalog_entry(unique_id, source, entry_type="source"), + title=source_name, + updated_at=dbt_docs.generated_at, + ) + ) + + for unique_id, node in sorted((catalog_json.get("nodes") or {}).items()): + if not self._include_dbt_unique_id(unique_id, project_name): + continue + model_name = ((node.get("metadata") or {}).get("name")) or unique_id + documents.extend( + self._build_markdown_documents( + org_id=org_id, + source_type=DashboardChatSourceType.DBT_CATALOG, + source_identifier=f"catalog:{unique_id}", + markdown=self._format_catalog_entry(unique_id, node, entry_type="model"), + title=model_name, + updated_at=dbt_docs.generated_at, + ) + ) + + return documents + + @staticmethod + def _include_dbt_unique_id(unique_id: str, project_name: str | None) -> bool: + """Exclude package docs that do not belong to the org project.""" + parts = unique_id.split(".") + if len(parts) < 2: + return True + package_name = parts[1] + if package_name in DBT_IGNORE_PACKAGES: + return False + if project_name and package_name != project_name: + return False + return True + + @staticmethod + def _format_manifest_source(unique_id: str, source: dict) -> str: + """Format a manifest source entry into stable text.""" + column_lines = DashboardChatIngestionService._format_columns(source.get("columns") or {}) + blocks = [ + f"dbt manifest source: {source.get('schema')}.{source.get('name')}", + f"Unique id: {unique_id}", + f"Source name: {source.get('source_name')}", + f"Database: {source.get('database')}", + ] + if column_lines: + blocks.append("Columns:\n" + "\n".join(column_lines)) + return "\n\n".join(block for block in blocks if block and block != "Database: None") + + @staticmethod + def _format_manifest_model(unique_id: str, node: dict) -> str: + """Format a manifest model entry into stable text.""" + blocks = [ + f"dbt manifest model: {node.get('schema')}.{node.get('name')}", + f"Unique id: {unique_id}", + f"Path: {node.get('original_file_path') or node.get('path')}", + f"Description: {node.get('description')}", + f"Database: {node.get('database')}", + ] + depends_on_nodes = sorted(node.get("depends_on", {}).get("nodes") or []) + if depends_on_nodes: + blocks.append( + "Depends on:\n" + "\n".join(f"- {dependency}" for dependency in depends_on_nodes) + ) + column_lines = DashboardChatIngestionService._format_columns(node.get("columns") or {}) + if column_lines: + blocks.append("Columns:\n" + "\n".join(column_lines)) + return "\n\n".join( + block + for block in blocks + if block and block not in {"Database: None", "Description: None"} + ) + + @staticmethod + def _format_catalog_entry(unique_id: str, entry: dict, entry_type: str) -> str: + """Format a catalog source/model entry into stable text.""" + metadata = entry.get("metadata") or {} + blocks = [ + f"dbt catalog {entry_type}: {metadata.get('schema')}.{metadata.get('name')}", + f"Unique id: {unique_id}", + f"Database: {metadata.get('database')}", + f"Type: {metadata.get('type')}", + ] + column_lines = DashboardChatIngestionService._format_catalog_columns( + entry.get("columns") or {} + ) + if column_lines: + blocks.append("Columns:\n" + "\n".join(column_lines)) + return "\n\n".join(block for block in blocks if block and block != "Database: None") + + @staticmethod + def _format_columns(columns: dict) -> list[str]: + """Format manifest column metadata into stable bullet lines.""" + formatted_columns: list[str] = [] + for column_key, column in sorted(columns.items()): + column_name = column.get("name") or column_key + line = f"- {column_name}" + if column.get("data_type"): + line += f" ({column['data_type']})" + if column.get("description"): + line += f": {column['description']}" + formatted_columns.append(line) + return formatted_columns + + @staticmethod + def _format_catalog_columns(columns: dict) -> list[str]: + """Format catalog column metadata into stable bullet lines.""" + formatted_columns: list[str] = [] + for column_name, column in sorted(columns.items()): + line = f"- {column_name}" + if column.get("type"): + line += f" ({column['type']})" + if column.get("comment"): + line += f": {column['comment']}" + formatted_columns.append(line) + return formatted_columns diff --git a/ddpui/tests/core/dashboard_chat/test_ingestion.py b/ddpui/tests/core/dashboard_chat/test_ingestion.py new file mode 100644 index 000000000..e0aa5a50b --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_ingestion.py @@ -0,0 +1,438 @@ +import os +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from unittest.mock import Mock, patch +import json + +import django +import pytest + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from django.contrib.auth.models import User +from django.utils import timezone + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.core.dashboard_chat.dbt_docs import ( + DashboardChatDbtDocsArtifacts, + generate_dashboard_chat_dbt_docs_artifacts, +) +from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService +from ddpui.core.dashboard_chat.vector_store import DashboardChatStoredDocument +from ddpui.ddpdbt.schema import DbtProjectParams +from ddpui.ddpprefect import DBTCLIPROFILE +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext +from ddpui.models.org import Org, OrgDbt, OrgPrefectBlockv1 +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.models.visualization import Chart +from ddpui.tests.api_tests.test_user_org_api import seed_db + +pytestmark = pytest.mark.django_db + + +class FakeDashboardChatVectorStore: + """In-memory vector store used to exercise ingest diffing logic.""" + + def __init__(self): + self.documents_by_org = {} + self.upsert_calls = [] + self.delete_calls = [] + + def get_documents( + self, + org_id, + source_types=None, + dashboard_id=None, + include_documents=False, + ): + rows = list(self.documents_by_org.get(org_id, {}).values()) + if source_types: + allowed = { + source_type.value if hasattr(source_type, "value") else source_type + for source_type in source_types + } + rows = [row for row in rows if row.metadata["source_type"] in allowed] + if dashboard_id is not None: + rows = [row for row in rows if row.metadata.get("dashboard_id") == dashboard_id] + return [ + DashboardChatStoredDocument( + document_id=row.document_id, + metadata=row.metadata, + content=row.content if include_documents else None, + ) + for row in rows + ] + + def upsert_documents(self, org_id, documents): + self.upsert_calls.append([document.document_id for document in documents]) + org_documents = self.documents_by_org.setdefault(org_id, {}) + for document in documents: + org_documents[document.document_id] = DashboardChatStoredDocument( + document_id=document.document_id, + metadata=document.metadata(), + content=document.content, + ) + return [document.document_id for document in documents] + + def delete_documents(self, org_id, ids=None, source_types=None, dashboard_id=None): + self.delete_calls.append( + { + "org_id": org_id, + "ids": list(ids) if ids is not None else None, + "source_types": source_types, + "dashboard_id": dashboard_id, + } + ) + org_documents = self.documents_by_org.setdefault(org_id, {}) + if ids is None: + return 0 + for document_id in ids: + org_documents.pop(document_id, None) + return len(ids) + + +@dataclass(frozen=True) +class StoredArtifacts: + """Factory payload for deterministic dbt docs test fixtures.""" + + manifest_json: dict + catalog_json: dict + generated_at: datetime + + def to_artifacts(self): + return DashboardChatDbtDocsArtifacts( + manifest_json=self.manifest_json, + catalog_json=self.catalog_json, + generated_at=self.generated_at, + target_dir=Path("/tmp"), + ) + + +@pytest.fixture +def org(): + organization = Org.objects.create( + name="Dashboard Chat Org", + slug="dashchat", + airbyte_workspace_id="ws-1", + ) + yield organization + organization.delete() + + +@pytest.fixture +def orguser(org, seed_db): + user = User.objects.create( + username="dashchat-user", + email="dashchat-user@test.com", + password="testpassword", + ) + org_user = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield org_user + org_user.delete() + user.delete() + + +@pytest.fixture +def orgdbt(org): + cli_block = OrgPrefectBlockv1.objects.create( + org=org, + block_type=DBTCLIPROFILE, + block_name="dashboard-chat-profile", + block_id="profile-block-id", + ) + dbt = OrgDbt.objects.create( + project_dir="dashchat/dbtrepo", + dbt_venv="dbt-1.8.7", + target_type="postgres", + default_schema="analytics", + cli_profile_block=cli_block, + ) + org.dbt = dbt + org.save(update_fields=["dbt"]) + yield dbt + cli_block.delete() + dbt.delete() + + +@pytest.fixture +def chart(org, orguser): + instance = Chart.objects.create( + title="Program Reach", + description="Program reach over time", + chart_type="line", + schema_name="analytics", + table_name="program_reach", + extra_config={"metric": "beneficiaries"}, + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield instance + instance.delete() + + +@pytest.fixture +def dashboard(org, orguser, chart): + instance = Dashboard.objects.create( + title="Impact Overview", + description="Program KPI dashboard", + dashboard_type="native", + components={ + "chart-1": { + "id": "chart-1", + "type": "chart", + "config": {"chartId": chart.id}, + } + }, + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield instance + instance.delete() + + +def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdbt, tmp_path): + """dbt docs generation should write profiles, load artifacts, and persist the timestamp.""" + project_dir = tmp_path / "dashchat" / "dbtrepo" + target_dir = project_dir / "target" + target_dir.mkdir(parents=True) + manifest_json = {"metadata": {"project_name": "dashchat"}, "nodes": {}, "sources": {}} + catalog_json = {"nodes": {}, "sources": {}} + (target_dir / "manifest.json").write_text(json.dumps(manifest_json), encoding="utf-8") + (target_dir / "catalog.json").write_text(json.dumps(catalog_json), encoding="utf-8") + + with patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.gather_dbt_project_params", + return_value=DbtProjectParams( + dbt_binary="/mock/dbt", + dbt_env_dir="/mock/env", + venv_binary="/mock/bin", + target="analytics", + project_dir=str(project_dir), + org_project_dir=str(project_dir.parent), + ), + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.prefect_service.get_dbt_cli_profile_block", + return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.run_dbt_command", + return_value=Mock(stdout="ok", returncode=0), + ) as mock_run_dbt, patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.get_dbt_project_dir", + return_value=str(project_dir), + ): + artifacts = generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) + + orgdbt.refresh_from_db() + assert (project_dir / "profiles" / "profiles.yml").exists() + assert mock_run_dbt.call_count == 2 + assert mock_run_dbt.call_args_list[0].kwargs["command"] == ["deps"] + assert mock_run_dbt.call_args_list[1].kwargs["command"] == ["docs", "generate"] + assert artifacts.manifest_json == manifest_json + assert artifacts.catalog_json == catalog_json + assert orgdbt.docs_generated_at is not None + + +def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, dashboard): + """A repeated identical build should skip writes, and a removed source should be deleted.""" + OrgAIContext.objects.create( + org=org, + markdown="# Org context\n\nImportant org notes.", + updated_by=orguser, + updated_at=timezone.now(), + ) + dashboard_context = DashboardAIContext.objects.create( + dashboard=dashboard, + markdown="## Dashboard context\n\nThis dashboard tracks monthly reach.", + updated_by=orguser, + updated_at=timezone.now(), + ) + vector_store = FakeDashboardChatVectorStore() + artifacts = StoredArtifacts( + manifest_json={ + "metadata": {"project_name": "dashchat"}, + "sources": { + "source.dashchat.raw.program_reach": { + "source_name": "raw", + "schema": "raw", + "name": "program_reach", + "columns": { + "id": {"name": "id", "data_type": "integer"}, + "period": {"name": "period", "data_type": "date"}, + }, + } + }, + "nodes": { + "model.dashchat.fact_program_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "fact_program_reach", + "description": "Monthly reach facts", + "depends_on": {"nodes": ["source.dashchat.raw.program_reach"]}, + "columns": {"beneficiaries": {"name": "beneficiaries", "data_type": "integer"}}, + } + }, + }, + catalog_json={ + "sources": { + "source.dashchat.raw.program_reach": { + "metadata": { + "database": "warehouse", + "schema": "raw", + "name": "program_reach", + "type": "table", + }, + "columns": {"id": {"type": "integer"}}, + } + }, + "nodes": { + "model.dashchat.fact_program_reach": { + "metadata": { + "database": "warehouse", + "schema": "analytics", + "name": "fact_program_reach", + "type": "table", + }, + "columns": {"beneficiaries": {"type": "integer"}}, + } + }, + }, + generated_at=timezone.now(), + ) + service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), + ) + + first_result = service.ingest_org(org) + upsert_count_after_first_ingest = len(vector_store.upsert_calls) + second_result = service.ingest_org(org) + + dashboard_context.markdown = "" + dashboard_context.updated_at = timezone.now() + dashboard_context.save(update_fields=["markdown", "updated_at"]) + third_result = service.ingest_org(org) + + stored_source_types = { + document.metadata["source_type"] + for document in vector_store.get_documents(org.id, include_documents=False) + } + + assert first_result.source_document_counts["dashboard_context"] == 1 + assert second_result.upserted_document_ids == [] + assert second_result.deleted_document_ids == [] + assert len(vector_store.upsert_calls) == upsert_count_after_first_ingest + assert third_result.source_document_counts["dashboard_context"] == 0 + assert third_result.deleted_document_ids + assert "dashboard_context" not in stored_source_types + + +def test_ingest_org_keeps_collections_isolated_per_org(org, orgdbt, orguser, dashboard, seed_db): + """The context build should never mix documents between org collections.""" + other_org = Org.objects.create( + name="Dashboard Chat Org 2", + slug="dashch2", + airbyte_workspace_id="ws-2", + ) + other_user = User.objects.create( + username="dashchat-user-2", + email="dashchat-user-2@test.com", + password="testpassword", + ) + other_orguser = OrgUser.objects.create( + user=other_user, + org=other_org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + other_orgdbt = OrgDbt.objects.create( + project_dir="dashch2/dbtrepo", + dbt_venv="dbt-1.8.7", + target_type="postgres", + default_schema="analytics", + ) + other_org.dbt = other_orgdbt + other_org.save(update_fields=["dbt"]) + Dashboard.objects.create( + title="Other Dashboard", + dashboard_type="native", + created_by=other_orguser, + last_modified_by=other_orguser, + org=other_org, + ) + OrgAIContext.objects.create(org=org, markdown="Org one") + OrgAIContext.objects.create(org=other_org, markdown="Org two") + + artifacts = StoredArtifacts( + manifest_json={"metadata": {"project_name": "dashchat"}, "sources": {}, "nodes": {}}, + catalog_json={"sources": {}, "nodes": {}}, + generated_at=timezone.now(), + ) + vector_store = FakeDashboardChatVectorStore() + service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), + ) + + service.ingest_org(org) + service.ingest_org(other_org) + + assert set(vector_store.documents_by_org.keys()) == {org.id, other_org.id} + assert vector_store.documents_by_org[org.id] + assert vector_store.documents_by_org[other_org.id] + + other_orguser.delete() + other_user.delete() + other_orgdbt.delete() + other_org.delete() + + +def test_ingest_org_keeps_last_good_context_when_upsert_fails(org, orgdbt, orguser, dashboard): + """A failed rebuild should not delete the previously indexed documents.""" + OrgAIContext.objects.create( + org=org, + markdown="# Org context\n\nOriginal context.", + updated_by=orguser, + updated_at=timezone.now(), + ) + vector_store = FakeDashboardChatVectorStore() + artifacts = StoredArtifacts( + manifest_json={"metadata": {"project_name": "dashchat"}, "sources": {}, "nodes": {}}, + catalog_json={"sources": {}, "nodes": {}}, + generated_at=timezone.now(), + ) + service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), + ) + + first_result = service.ingest_org(org) + original_ids = set(first_result.upserted_document_ids) + assert original_ids + + org.ai_context.markdown = "# Org context\n\nUpdated context." + org.ai_context.updated_at = timezone.now() + org.ai_context.save(update_fields=["markdown", "updated_at"]) + + def _raise_on_upsert(org_id, documents): + raise RuntimeError("upsert failed") + + vector_store.upsert_documents = _raise_on_upsert + + with pytest.raises(RuntimeError, match="upsert failed"): + service.ingest_org(org) + + remaining_ids = { + document.document_id for document in vector_store.get_documents(org.id, include_documents=False) + } + assert remaining_ids == original_ids + assert vector_store.delete_calls == [] From dc6a70d61590d578ba571f705fec96b635ae56e4 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 03:05:05 +0530 Subject: [PATCH 07/49] feat(ai-chat): schedule per-org dashboard chat context builds --- ddpui/celeryworkers/tasks.py | 89 +++++++++++ ddpui/tests/core/dashboard_chat/test_tasks.py | 150 ++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 ddpui/tests/core/dashboard_chat/test_tasks.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index e46bdb07f..c9360d6b8 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -42,6 +42,7 @@ from ddpui.models.airbyte import AirbyteJob from ddpui.models.org_user import OrgUser +from ddpui.models.org_preferences import OrgPreferences from ddpui.models.tasks import ( TaskLock, OrgTask, @@ -96,13 +97,17 @@ ) from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.core import llm_service +from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService from ddpui.utils.helpers import ( find_key_in_dictionary, convert_sqlalchemy_rows_to_csv_string, ) +from ddpui.utils.redis_client import RedisClient +from ddpui.utils.feature_flags import get_all_feature_flags_for_org logger = CustomLogger("ddpui") UTC = timezone.UTC +DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS = 3 * 60 * 60 @app.task(bind=True) @@ -1254,6 +1259,84 @@ def clear_stuck_locks(): return processed_count +def _get_dashboard_chat_context_build_orgs(): + """Return orgs that are eligible for scheduled dashboard chat context builds.""" + candidate_orgs = ( + Org.objects.select_related("dbt", "preferences") + .filter( + dbt__isnull=False, + preferences__ai_data_sharing_enabled=True, + ) + .order_by("id") + ) + return [ + org + for org in candidate_orgs + if get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) + ] + + +def _dashboard_chat_context_build_lock_key(org_id: int) -> str: + """Build the Redis lock key for an org's scheduled context build.""" + return f"dashboard_chat_context_build:{org_id}" + + +@app.task +def schedule_dashboard_chat_context_builds(): + """Fan out one dashboard chat context-build task per eligible org.""" + enqueued_org_ids: list[int] = [] + for org in _get_dashboard_chat_context_build_orgs(): + build_dashboard_chat_context_for_org.delay(org.id) + enqueued_org_ids.append(org.id) + + logger.info("enqueued dashboard chat context builds for org ids=%s", enqueued_org_ids) + return {"enqueued_org_ids": enqueued_org_ids} + + +@app.task(bind=True) +def build_dashboard_chat_context_for_org(self, org_id: int): + """Build dashboard chat retrieval context for one org if the org is eligible.""" + org = ( + Org.objects.select_related("dbt", "preferences") + .filter(id=org_id, dbt__isnull=False) + .first() + ) + if org is None: + logger.warning("dashboard chat context build skipped: org %s not found or missing dbt", org_id) + return {"status": "skipped_missing_org", "org_id": org_id} + + preferences = OrgPreferences.objects.filter(org=org).first() + feature_enabled = get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) + if not feature_enabled or preferences is None or not preferences.ai_data_sharing_enabled: + logger.info("dashboard chat context build skipped for org=%s because it is not eligible", org_id) + return {"status": "skipped_ineligible", "org_id": org_id} + + redis_client = RedisClient.get_instance() + lock = redis_client.lock( + _dashboard_chat_context_build_lock_key(org_id), + timeout=DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS, + ) + if not lock.acquire(blocking=False): + logger.info("dashboard chat context build skipped for org=%s because a rebuild is already running", org_id) + return {"status": "skipped_locked", "org_id": org_id} + + try: + result = DashboardChatIngestionService().ingest_org(org) + return { + "status": "completed", + "org_id": org_id, + "docs_generated_at": result.docs_generated_at.isoformat(), + "vector_last_ingested_at": result.vector_ingested_at.isoformat(), + "source_document_counts": result.source_document_counts, + } + finally: + try: + if lock.owned(): + lock.release() + except Exception: + logger.exception("failed to release dashboard chat context build lock for org=%s", org_id) + + @app.on_after_finalize.connect def setup_periodic_tasks(sender: Celery, **kwargs): """periodic celery tasks""" @@ -1282,6 +1365,12 @@ def setup_periodic_tasks(sender: Celery, **kwargs): name="sync flow runs of deployments into our db", ) + sender.add_periodic_task( + crontab(minute=0, hour="*/3"), + schedule_dashboard_chat_context_builds.s(), + name="build dashboard chat context", + ) + if os.getenv("ADMIN_EMAIL"): # check for long running flow runs; every 3600 seconds or 1 hour sender.add_periodic_task( diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py new file mode 100644 index 000000000..9feef826c --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -0,0 +1,150 @@ +import os +from unittest.mock import Mock, patch + +import django +import pytest + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from django.contrib.auth.models import User +from django.utils import timezone + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.celeryworkers.tasks import ( + build_dashboard_chat_context_for_org, + schedule_dashboard_chat_context_builds, +) +from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionResult +from ddpui.models.org import Org, OrgDbt +from ddpui.models.org_preferences import OrgPreferences +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.tests.api_tests.test_user_org_api import seed_db +from ddpui.utils.feature_flags import enable_feature_flag + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def orguser(seed_db): + org = Org.objects.create( + name="Dashboard Chat Org", + slug="dashchat", + airbyte_workspace_id="workspace-id", + ) + user = User.objects.create( + username="dashchat-task-user", + email="dashchat-task-user@test.com", + password="testpassword", + ) + org_user = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield org_user + org_user.delete() + user.delete() + org.delete() + + +def _create_org_dbt(org: Org) -> OrgDbt: + dbt = OrgDbt.objects.create( + project_dir=f"{org.slug}/dbtrepo", + dbt_venv="dbt-1.8.7", + target_type="postgres", + default_schema="analytics", + ) + org.dbt = dbt + org.save(update_fields=["dbt"]) + return dbt + + +def test_schedule_dashboard_chat_context_builds_enqueues_only_eligible_orgs(orguser): + eligible_org = orguser.org + _create_org_dbt(eligible_org) + OrgPreferences.objects.create(org=eligible_org, ai_data_sharing_enabled=True) + enable_feature_flag("AI_DASHBOARD_CHAT", org=eligible_org) + + missing_flag_org = Org.objects.create( + name="Missing Flag", + slug="missing-flag", + airbyte_workspace_id="ws-2", + ) + _create_org_dbt(missing_flag_org) + OrgPreferences.objects.create(org=missing_flag_org, ai_data_sharing_enabled=True) + + missing_consent_org = Org.objects.create( + name="Missing Consent", + slug="missing-consent", + airbyte_workspace_id="ws-3", + ) + _create_org_dbt(missing_consent_org) + OrgPreferences.objects.create(org=missing_consent_org, ai_data_sharing_enabled=False) + enable_feature_flag("AI_DASHBOARD_CHAT", org=missing_consent_org) + + with patch( + "ddpui.celeryworkers.tasks.build_dashboard_chat_context_for_org.delay" + ) as delay_mock: + result = schedule_dashboard_chat_context_builds() + + delay_mock.assert_called_once_with(eligible_org.id) + assert result == {"enqueued_org_ids": [eligible_org.id]} + + +def test_build_dashboard_chat_context_for_org_skips_when_locked(orguser): + org = orguser.org + _create_org_dbt(org) + OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=True) + enable_feature_flag("AI_DASHBOARD_CHAT", org=org) + + redis_lock = Mock() + redis_lock.acquire.return_value = False + redis_client = Mock() + redis_client.lock.return_value = redis_lock + + with patch("ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client), patch( + "ddpui.celeryworkers.tasks.DashboardChatIngestionService" + ) as ingestion_service: + result = build_dashboard_chat_context_for_org.run(org.id) + + assert result == {"status": "skipped_locked", "org_id": org.id} + ingestion_service.assert_not_called() + + +def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): + org = orguser.org + _create_org_dbt(org) + OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=True) + enable_feature_flag("AI_DASHBOARD_CHAT", org=org) + + redis_lock = Mock() + redis_lock.acquire.return_value = True + redis_lock.owned.return_value = True + redis_client = Mock() + redis_client.lock.return_value = redis_lock + + result_payload = DashboardChatIngestionResult( + org_id=org.id, + docs_generated_at=timezone.now(), + vector_ingested_at=timezone.now(), + source_document_counts={"dashboard_export": 2}, + upserted_document_ids=["abc"], + deleted_document_ids=[], + ) + ingestion_service = Mock() + ingestion_service.ingest_org.return_value = result_payload + + with patch("ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client), patch( + "ddpui.celeryworkers.tasks.DashboardChatIngestionService", + return_value=ingestion_service, + ): + result = build_dashboard_chat_context_for_org.run(org.id) + + assert result["status"] == "completed" + assert result["org_id"] == org.id + assert result["source_document_counts"] == {"dashboard_export": 2} + ingestion_service.ingest_org.assert_called_once() + redis_lock.release.assert_called_once() From b1caa900b0600682c11587060eeb3a9e66d472af Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 03:13:17 +0530 Subject: [PATCH 08/49] feat(ai-chat): add langgraph dashboard chat runtime support --- ddpui/core/dashboard_chat/allowlist.py | 187 ++++ ddpui/core/dashboard_chat/config.py | 30 + ddpui/core/dashboard_chat/llm_client.py | 295 ++++++ ddpui/core/dashboard_chat/runtime.py | 945 ++++++++++++++++++ ddpui/core/dashboard_chat/runtime_types.py | 176 ++++ ddpui/core/dashboard_chat/sql_guard.py | 130 +++ ddpui/core/dashboard_chat/warehouse_tools.py | 154 +++ .../tests/core/dashboard_chat/test_runtime.py | 474 +++++++++ 8 files changed, 2391 insertions(+) create mode 100644 ddpui/core/dashboard_chat/allowlist.py create mode 100644 ddpui/core/dashboard_chat/llm_client.py create mode 100644 ddpui/core/dashboard_chat/runtime.py create mode 100644 ddpui/core/dashboard_chat/runtime_types.py create mode 100644 ddpui/core/dashboard_chat/sql_guard.py create mode 100644 ddpui/core/dashboard_chat/warehouse_tools.py create mode 100644 ddpui/tests/core/dashboard_chat/test_runtime.py diff --git a/ddpui/core/dashboard_chat/allowlist.py b/ddpui/core/dashboard_chat/allowlist.py new file mode 100644 index 000000000..b70ca2f55 --- /dev/null +++ b/ddpui/core/dashboard_chat/allowlist.py @@ -0,0 +1,187 @@ +"""Dashboard table allowlist derived from dashboard exports and dbt lineage.""" + +import json +from dataclasses import dataclass, field +from pathlib import Path + +from ddpui.core.orgdbt_manager import DbtProjectManager +from ddpui.models.org import OrgDbt + + +def normalize_dashboard_chat_table_name(table_name: str | None) -> str | None: + """Normalize schema-qualified table names for matching and policy checks.""" + if not table_name: + return None + normalized = table_name.strip().strip('"').strip("`").strip() + if not normalized: + return None + normalized = normalized.replace('"."', ".").replace("`.", ".").replace(".`", ".") + return normalized.lower() + + +def build_dashboard_chat_table_name(schema_name: str | None, table_name: str | None) -> str | None: + """Build a normalized schema.table identifier from separate pieces.""" + if not schema_name or not table_name: + return None + return normalize_dashboard_chat_table_name(f"{schema_name}.{table_name}") + + +@dataclass +class DashboardChatAllowlist: + """Allowed tables and dbt nodes for the current dashboard context.""" + + chart_tables: set[str] = field(default_factory=set) + upstream_tables: set[str] = field(default_factory=set) + allowed_tables: set[str] = field(default_factory=set) + allowed_unique_ids: set[str] = field(default_factory=set) + unique_id_to_table: dict[str, str] = field(default_factory=dict) + table_to_unique_ids: dict[str, set[str]] = field(default_factory=dict) + + def is_allowed(self, table_name: str | None) -> bool: + """Return whether the table is inside the dashboard allowlist.""" + normalized = normalize_dashboard_chat_table_name(table_name) + if not normalized or not self.allowed_tables: + return False + if normalized in self.allowed_tables: + return True + table_only = normalized.split(".")[-1] + return any( + allowed_table == table_only or allowed_table.endswith(f".{table_only}") + for allowed_table in self.allowed_tables + ) + + def is_unique_id_allowed(self, unique_id: str | None) -> bool: + """Return whether the dbt node belongs to the current dashboard lineage.""" + return bool(unique_id and unique_id in self.allowed_unique_ids) + + def prioritized_tables(self, limit: int | None = None) -> list[str]: + """Return chart tables first, then lineage tables.""" + ordered_tables = sorted(self.chart_tables) + sorted(self.upstream_tables - self.chart_tables) + deduped_tables = list(dict.fromkeys(ordered_tables)) + if limit is None: + return deduped_tables + return deduped_tables[:limit] + + +class DashboardChatAllowlistBuilder: + """Build the allowlist for a dashboard using chart metadata and dbt lineage.""" + + @classmethod + def build( + cls, + export_payload: dict, + manifest_json: dict | None = None, + ) -> DashboardChatAllowlist: + """Build the allowlist from the dashboard export contract and manifest lineage.""" + allowlist = DashboardChatAllowlist() + + for chart in export_payload.get("charts") or []: + table_name = build_dashboard_chat_table_name( + chart.get("schema_name"), + chart.get("table_name"), + ) + if not table_name: + continue + allowlist.chart_tables.add(table_name) + allowlist.allowed_tables.add(table_name) + + if not manifest_json: + return allowlist + + nodes_by_unique_id = cls._manifest_nodes_by_unique_id(manifest_json) + table_to_unique_ids = cls._table_to_unique_ids(nodes_by_unique_id) + allowlist.table_to_unique_ids = table_to_unique_ids + allowlist.unique_id_to_table = { + unique_id: table_name + for table_name, unique_ids in table_to_unique_ids.items() + for unique_id in unique_ids + } + + for chart_table in list(allowlist.chart_tables): + for unique_id in table_to_unique_ids.get(chart_table, set()): + cls._add_unique_id_and_upstreams( + unique_id=unique_id, + allowlist=allowlist, + nodes_by_unique_id=nodes_by_unique_id, + visited=set(), + ) + + return allowlist + + @staticmethod + def load_manifest_json(orgdbt: OrgDbt | None) -> dict | None: + """Load the current manifest.json from the dbt target directory if it exists.""" + if orgdbt is None: + return None + + target_dir = Path(DbtProjectManager.get_dbt_project_dir(orgdbt)) / "target" + manifest_path = target_dir / "manifest.json" + if not manifest_path.exists(): + return None + + with open(manifest_path, "r", encoding="utf-8") as manifest_file: + return json.load(manifest_file) + + @staticmethod + def _manifest_nodes_by_unique_id(manifest_json: dict) -> dict[str, dict]: + """Collect manifest nodes and sources that can participate in lineage traversal.""" + nodes_by_unique_id: dict[str, dict] = {} + for unique_id, node in (manifest_json.get("nodes") or {}).items(): + if node.get("resource_type") not in {"model", "seed"}: + continue + nodes_by_unique_id[unique_id] = node + for unique_id, source in (manifest_json.get("sources") or {}).items(): + nodes_by_unique_id[unique_id] = source + return nodes_by_unique_id + + @staticmethod + def _table_to_unique_ids(nodes_by_unique_id: dict[str, dict]) -> dict[str, set[str]]: + """Build a schema.table lookup for manifest nodes and sources.""" + table_to_unique_ids: dict[str, set[str]] = {} + for unique_id, node in nodes_by_unique_id.items(): + table_name = DashboardChatAllowlistBuilder._table_name_for_node(node) + if not table_name: + continue + table_to_unique_ids.setdefault(table_name, set()).add(unique_id) + return table_to_unique_ids + + @staticmethod + def _table_name_for_node(node: dict) -> str | None: + """Resolve a normalized schema.table identifier for a dbt node.""" + schema_name = node.get("schema") + table_name = node.get("alias") or node.get("identifier") or node.get("name") + if node.get("resource_type") == "source": + table_name = node.get("identifier") or node.get("name") + return build_dashboard_chat_table_name(schema_name, table_name) + + @classmethod + def _add_unique_id_and_upstreams( + cls, + unique_id: str, + allowlist: DashboardChatAllowlist, + nodes_by_unique_id: dict[str, dict], + visited: set[str], + ) -> None: + """Recursively add a dbt node and its upstream lineage into the allowlist.""" + if unique_id in visited: + return + visited.add(unique_id) + + node = nodes_by_unique_id.get(unique_id) + if node is None: + return + + allowlist.allowed_unique_ids.add(unique_id) + table_name = cls._table_name_for_node(node) + if table_name: + allowlist.allowed_tables.add(table_name) + if table_name not in allowlist.chart_tables: + allowlist.upstream_tables.add(table_name) + + for dependency_unique_id in node.get("depends_on", {}).get("nodes") or []: + cls._add_unique_id_and_upstreams( + unique_id=dependency_unique_id, + allowlist=allowlist, + nodes_by_unique_id=nodes_by_unique_id, + visited=visited, + ) diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 0fc102dbf..37c69a373 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -34,3 +34,33 @@ def from_env(cls) -> "DashboardChatVectorStoreConfig": "text-embedding-3-small", ), ) + + +@dataclass(frozen=True) +class DashboardChatRuntimeConfig: + """Environment-backed configuration for dashboard chat orchestration.""" + + llm_model: str = "gpt-4o-mini" + llm_timeout_ms: int = 45000 + retrieval_limit: int = 6 + related_dashboard_limit: int = 3 + max_query_rows: int = 200 + max_distinct_values: int = 50 + max_schema_tables: int = 4 + + @classmethod + def from_env(cls) -> "DashboardChatRuntimeConfig": + """Build runtime config from environment variables.""" + return cls( + llm_model=os.getenv("AI_DASHBOARD_CHAT_LLM_MODEL", "gpt-4o-mini"), + llm_timeout_ms=int(os.getenv("AI_DASHBOARD_CHAT_LLM_TIMEOUT_MS", "45000")), + retrieval_limit=int(os.getenv("AI_DASHBOARD_CHAT_RETRIEVAL_LIMIT", "6")), + related_dashboard_limit=int( + os.getenv("AI_DASHBOARD_CHAT_RELATED_DASHBOARD_LIMIT", "3") + ), + max_query_rows=int(os.getenv("AI_DASHBOARD_CHAT_MAX_QUERY_ROWS", "200")), + max_distinct_values=int( + os.getenv("AI_DASHBOARD_CHAT_MAX_DISTINCT_VALUES", "50") + ), + max_schema_tables=int(os.getenv("AI_DASHBOARD_CHAT_MAX_SCHEMA_TABLES", "4")), + ) diff --git a/ddpui/core/dashboard_chat/llm_client.py b/ddpui/core/dashboard_chat/llm_client.py new file mode 100644 index 000000000..9c48e1be9 --- /dev/null +++ b/ddpui/core/dashboard_chat/llm_client.py @@ -0,0 +1,295 @@ +"""Direct OpenAI client wrapper for dashboard chat runtime.""" + +from collections.abc import Sequence +import json +import os +from typing import Any, Protocol + +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationMessage, + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatPlanMode, + DashboardChatQueryPlan, + DashboardChatRetrievedDocument, + DashboardChatSqlDraft, + DashboardChatTextFilterPlan, +) + + +class DashboardChatLlmClient(Protocol): + """LLM contract used by the dashboard chat LangGraph runtime.""" + + def classify_intent( + self, + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage], + dashboard_summary: str, + ) -> DashboardChatIntentDecision: + """Classify the incoming query.""" + + def plan_query( + self, + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage], + dashboard_summary: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + schema_prompt: str, + allowlisted_tables: Sequence[str], + ) -> DashboardChatQueryPlan: + """Build a structured plan for the query.""" + + def generate_sql( + self, + user_query: str, + dashboard_summary: str, + query_plan: DashboardChatQueryPlan, + schema_prompt: str, + distinct_values: dict[str, list[str]], + allowlisted_tables: Sequence[str], + ) -> DashboardChatSqlDraft: + """Generate SQL from the structured plan.""" + + def compose_answer( + self, + user_query: str, + dashboard_summary: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: Sequence[str], + related_dashboard_titles: Sequence[str], + ) -> str: + """Compose the final answer text.""" + + +class OpenAIDashboardChatLlmClient: + """Direct OpenAI SDK adapter with JSON-mode helpers.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "gpt-4o-mini", + timeout_ms: int = 45000, + client: Any = None, + ): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.model = model + self.timeout_ms = timeout_ms + if client is None: + if not self.api_key: + raise ValueError("OPENAI_API_KEY must be set for dashboard chat runtime") + from openai import OpenAI + + client = OpenAI(api_key=self.api_key, timeout=timeout_ms / 1000) + self.client = client + + def classify_intent( + self, + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage], + dashboard_summary: str, + ) -> DashboardChatIntentDecision: + """Classify intent with lightweight conversation awareness.""" + prompt = { + "dashboard_summary": dashboard_summary, + "conversation_history": [message.__dict__ for message in conversation_history[-6:]], + "user_query": user_query, + } + result = self._complete_json( + system_prompt=( + "Classify the user query for an NGO dashboard assistant. " + "Return JSON with keys intent, reason, force_sql_path, clarification_question. " + "Allowed intents: data_query, context_query, needs_clarification, small_talk, irrelevant. " + "Set force_sql_path=true for any query that asks for counts, trends, breakdowns, comparisons, " + "filters, or tabular data." + ), + user_prompt=json.dumps(prompt, ensure_ascii=True), + ) + intent_value = result.get("intent", DashboardChatIntent.CONTEXT_QUERY.value) + try: + intent = DashboardChatIntent(intent_value) + except ValueError: + intent = DashboardChatIntent.CONTEXT_QUERY + return DashboardChatIntentDecision( + intent=intent, + reason=str(result.get("reason") or "LLM classification"), + force_sql_path=bool(result.get("force_sql_path", intent == DashboardChatIntent.DATA_QUERY)), + clarification_question=result.get("clarification_question"), + ) + + def plan_query( + self, + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage], + dashboard_summary: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + schema_prompt: str, + allowlisted_tables: Sequence[str], + ) -> DashboardChatQueryPlan: + """Generate a structured execution plan.""" + prompt = { + "dashboard_summary": dashboard_summary, + "conversation_history": [message.__dict__ for message in conversation_history[-6:]], + "retrieved_documents": [ + { + "source_type": document.source_type, + "source_identifier": document.source_identifier, + "content": document.content[:500], + } + for document in retrieved_documents[:8] + ], + "schema_prompt": schema_prompt, + "allowlisted_tables": list(allowlisted_tables), + "user_query": user_query, + } + result = self._complete_json( + system_prompt=( + "Plan how to answer the dashboard question. " + "Return JSON with keys mode, reason, relevant_tables, schema_lookup_tables, text_filters, " + "answer_strategy, clarification_question. " + "Allowed modes: sql, context, clarify. " + "text_filters must be an array of objects with table_name, column_name, requested_value. " + "If the question can be answered from context or retrieved docs without SQL, choose context. " + "If the question needs row-level or aggregate data, choose sql." + ), + user_prompt=json.dumps(prompt, ensure_ascii=True), + ) + mode_value = result.get("mode", DashboardChatPlanMode.CONTEXT.value) + try: + mode = DashboardChatPlanMode(mode_value) + except ValueError: + mode = DashboardChatPlanMode.CONTEXT + return DashboardChatQueryPlan( + mode=mode, + reason=str(result.get("reason") or "LLM plan"), + relevant_tables=_normalize_table_list(result.get("relevant_tables")), + schema_lookup_tables=_normalize_table_list(result.get("schema_lookup_tables")), + text_filters=[ + DashboardChatTextFilterPlan( + table_name=str(item.get("table_name") or "").lower(), + column_name=str(item.get("column_name") or ""), + requested_value=str(item.get("requested_value") or ""), + ) + for item in result.get("text_filters", []) + if item.get("table_name") and item.get("column_name") and item.get("requested_value") + ], + answer_strategy=result.get("answer_strategy"), + clarification_question=result.get("clarification_question"), + ) + + def generate_sql( + self, + user_query: str, + dashboard_summary: str, + query_plan: DashboardChatQueryPlan, + schema_prompt: str, + distinct_values: dict[str, list[str]], + allowlisted_tables: Sequence[str], + ) -> DashboardChatSqlDraft: + """Generate a single read-only SQL statement.""" + prompt = { + "dashboard_summary": dashboard_summary, + "query_plan": { + "mode": query_plan.mode.value, + "reason": query_plan.reason, + "relevant_tables": query_plan.relevant_tables, + "schema_lookup_tables": query_plan.schema_lookup_tables, + "text_filters": [text_filter.__dict__ for text_filter in query_plan.text_filters], + "answer_strategy": query_plan.answer_strategy, + }, + "schema_prompt": schema_prompt, + "distinct_values": distinct_values, + "allowlisted_tables": list(allowlisted_tables), + "user_query": user_query, + } + result = self._complete_json( + system_prompt=( + "Generate one safe read-only SQL query. " + "Return JSON with keys sql, reason, warnings, clarification_question. " + "The SQL must be a single SELECT or WITH...SELECT statement that only references allowlisted tables. " + "Use exact values from the provided distinct_values map for text filters when available. " + "If the question cannot be answered safely, return sql as null and provide clarification_question." + ), + user_prompt=json.dumps(prompt, ensure_ascii=True), + ) + sql = result.get("sql") + if sql is not None: + sql = str(sql).strip() + return DashboardChatSqlDraft( + sql=sql or None, + reason=str(result.get("reason") or "LLM SQL draft"), + warnings=[str(warning) for warning in result.get("warnings", [])], + clarification_question=result.get("clarification_question"), + ) + + def compose_answer( + self, + user_query: str, + dashboard_summary: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: Sequence[str], + related_dashboard_titles: Sequence[str], + ) -> str: + """Compose the final user-facing answer.""" + response = self.client.chat.completions.create( + model=self.model, + temperature=0, + messages=[ + { + "role": "system", + "content": ( + "You answer NGO dashboard questions. " + "Use plain language, cite data-backed claims carefully, and avoid exposing hidden reasoning. " + "If SQL results are empty, say that no matching rows were found." + ), + }, + { + "role": "user", + "content": json.dumps( + { + "dashboard_summary": dashboard_summary, + "user_query": user_query, + "retrieved_documents": [ + { + "source_type": document.source_type, + "source_identifier": document.source_identifier, + "content": document.content[:400], + } + for document in retrieved_documents[:8] + ], + "sql": sql, + "sql_results": sql_results, + "warnings": list(warnings), + "related_dashboards": list(related_dashboard_titles), + }, + ensure_ascii=True, + ), + }, + ], + ) + answer = response.choices[0].message.content or "" + return answer.strip() + + def _complete_json(self, system_prompt: str, user_prompt: str) -> dict[str, Any]: + """Run a JSON-mode chat completion and parse the result.""" + response = self.client.chat.completions.create( + model=self.model, + temperature=0, + response_format={"type": "json_object"}, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + ) + content = response.choices[0].message.content or "{}" + return json.loads(content) + + +def _normalize_table_list(value: Any) -> list[str]: + """Normalize a JSON value into a lowercased table list.""" + if not isinstance(value, list): + return [] + return [str(table_name).lower() for table_name in value if table_name] diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py new file mode 100644 index 000000000..30a20f8d2 --- /dev/null +++ b/ddpui/core/dashboard_chat/runtime.py @@ -0,0 +1,945 @@ +"""LangGraph runtime for dashboard chat orchestration.""" + +from collections.abc import Callable, Sequence +import json +from typing import Any, TypedDict + +from langgraph.graph import END, START, StateGraph + +from ddpui.core.dashboard_chat.allowlist import ( + DashboardChatAllowlist, + DashboardChatAllowlistBuilder, +) +from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig +from ddpui.core.dashboard_chat.llm_client import ( + DashboardChatLlmClient, + OpenAIDashboardChatLlmClient, +) +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatCitation, + DashboardChatConversationMessage, + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatPlanMode, + DashboardChatQueryPlan, + DashboardChatRelatedDashboard, + DashboardChatResponse, + DashboardChatRetrievedDocument, + DashboardChatSqlDraft, + DashboardChatSqlValidationResult, +) +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.warehouse_tools import ( + DashboardChatWarehouseTools, + DashboardChatWarehouseToolsError, +) +from ddpui.models.dashboard import Dashboard +from ddpui.models.org import Org +from ddpui.services.dashboard_service import DashboardService + +SIMPLE_GREETINGS = {"hi", "hey", "hello", "thanks", "thank you", "gm", "good morning"} +DATA_QUERY_KEYWORDS = { + "count", + "counts", + "trend", + "compare", + "breakdown", + "how many", + "total", + "sum", + "average", + "avg", + "top", + "bottom", + "show me", + "list", + "split by", + "group by", +} +CONTEXT_QUERY_KEYWORDS = { + "what does", + "explain", + "definition", + "metric", + "why", + "how is", + "which chart", + "which dataset", + "context", +} + + +class DashboardChatRuntimeState(TypedDict, total=False): + """LangGraph state for dashboard chat.""" + + org: Org + dashboard_id: int + user_query: str + conversation_history: list[DashboardChatConversationMessage] + dashboard_export: dict[str, Any] + dashboard_summary: str + allowlist: DashboardChatAllowlist + intent_decision: DashboardChatIntentDecision + retrieved_documents: list[DashboardChatRetrievedDocument] + citations: list[DashboardChatCitation] + related_dashboards: list[DashboardChatRelatedDashboard] + schema_prompt: str + schema_snippets: dict[str, Any] + query_plan: DashboardChatQueryPlan + distinct_values: dict[str, list[str]] + sql_draft: DashboardChatSqlDraft | None + sql_validation: DashboardChatSqlValidationResult | None + sql_results: list[dict[str, Any]] | None + warnings: list[str] + response: DashboardChatResponse + + +class DashboardChatRuntime: + """Run dashboard chat queries through a LangGraph workflow.""" + + def __init__( + self, + vector_store: ChromaDashboardChatVectorStore | None = None, + llm_client: DashboardChatLlmClient | None = None, + warehouse_tools_factory: Callable[[Org], DashboardChatWarehouseTools] | None = None, + runtime_config: DashboardChatRuntimeConfig | None = None, + ): + self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() + self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.llm_client = llm_client or OpenAIDashboardChatLlmClient( + model=self.runtime_config.llm_model, + timeout_ms=self.runtime_config.llm_timeout_ms, + ) + self.warehouse_tools_factory = warehouse_tools_factory or ( + lambda org: DashboardChatWarehouseTools( + org=org, + max_rows=self.runtime_config.max_query_rows, + ) + ) + self.graph = self._build_graph() + + def run( + self, + org: Org, + dashboard_id: int, + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] | None = None, + ) -> DashboardChatResponse: + """Run a single dashboard chat turn and return the structured response.""" + initial_state: DashboardChatRuntimeState = { + "org": org, + "dashboard_id": dashboard_id, + "user_query": user_query, + "conversation_history": self._normalize_conversation_history(conversation_history), + "warnings": [], + } + final_state = self.graph.invoke(initial_state) + return final_state["response"] + + def _build_graph(self): + """Create the LangGraph state machine.""" + graph = StateGraph(DashboardChatRuntimeState) + graph.add_node("load_context", self._node_load_context) + graph.add_node("route_intent", self._node_route_intent) + graph.add_node("build_allowlist", self._node_build_allowlist) + graph.add_node("retrieve_docs", self._node_retrieve_docs) + graph.add_node("load_schema_snippets", self._node_load_schema_snippets) + graph.add_node("plan_query", self._node_plan_query) + graph.add_node("lookup_distinct_values", self._node_lookup_distinct_values) + graph.add_node("generate_sql", self._node_generate_sql) + graph.add_node("validate_sql", self._node_validate_sql) + graph.add_node("execute_sql", self._node_execute_sql) + graph.add_node("compose_answer", self._node_compose_answer) + graph.add_node("finalize", self._node_finalize_response) + + graph.add_edge(START, "load_context") + graph.add_edge("load_context", "route_intent") + graph.add_conditional_edges( + "route_intent", + self._route_after_intent, + { + "compose_answer": "compose_answer", + "build_allowlist": "build_allowlist", + }, + ) + graph.add_edge("build_allowlist", "retrieve_docs") + graph.add_edge("retrieve_docs", "load_schema_snippets") + graph.add_edge("load_schema_snippets", "plan_query") + graph.add_conditional_edges( + "plan_query", + self._route_after_plan, + { + "compose_answer": "compose_answer", + "lookup_distinct_values": "lookup_distinct_values", + }, + ) + graph.add_edge("lookup_distinct_values", "generate_sql") + graph.add_edge("generate_sql", "validate_sql") + graph.add_conditional_edges( + "validate_sql", + self._route_after_sql_validation, + { + "compose_answer": "compose_answer", + "execute_sql": "execute_sql", + }, + ) + graph.add_edge("execute_sql", "compose_answer") + graph.add_edge("compose_answer", "finalize") + graph.add_edge("finalize", END) + return graph.compile() + + def _node_load_context(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Load dashboard context and summary.""" + dashboard_export = DashboardService.export_dashboard_context( + state["dashboard_id"], + state["org"], + ) + state["dashboard_export"] = dashboard_export + state["dashboard_summary"] = self._build_dashboard_summary(dashboard_export) + return state + + def _node_route_intent(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Classify the user query.""" + intent_decision = self._heuristic_intent_decision( + user_query=state["user_query"], + conversation_history=state["conversation_history"], + ) + if intent_decision is None: + intent_decision = self.llm_client.classify_intent( + user_query=state["user_query"], + conversation_history=state["conversation_history"], + dashboard_summary=state["dashboard_summary"], + ) + state["intent_decision"] = intent_decision + return state + + def _node_build_allowlist(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Build the dashboard table allowlist from export data and dbt lineage.""" + manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) + state["allowlist"] = DashboardChatAllowlistBuilder.build( + state["dashboard_export"], + manifest_json=manifest_json, + ) + return state + + def _node_retrieve_docs(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Retrieve dashboard, org, and dbt context from Chroma.""" + org = state["org"] + dashboard_results = self._query_vector_store( + org=org, + query_text=state["user_query"], + source_types=[ + DashboardChatSourceType.DASHBOARD_EXPORT.value, + DashboardChatSourceType.DASHBOARD_CONTEXT.value, + ], + dashboard_id=state["dashboard_id"], + ) + org_results = self._query_vector_store( + org=org, + query_text=state["user_query"], + source_types=[DashboardChatSourceType.ORG_CONTEXT.value], + ) + dbt_results = self._filter_allowlisted_dbt_results( + self._query_vector_store( + org=org, + query_text=state["user_query"], + source_types=[ + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + ], + ), + state["allowlist"], + ) + + retrieved_documents = self._merge_retrieval_results( + dashboard_results=dashboard_results, + org_results=org_results, + dbt_results=dbt_results, + ) + state["retrieved_documents"] = retrieved_documents + state["citations"] = self._build_citations( + retrieved_documents=retrieved_documents, + dashboard_export=state["dashboard_export"], + allowlist=state["allowlist"], + ) + state["related_dashboards"] = self._build_related_dashboards( + org=org, + current_dashboard_id=state["dashboard_id"], + query_text=state["user_query"], + ) + return state + + def _node_load_schema_snippets( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Load schema snippets for the relevant dashboard tables.""" + intent_decision = state["intent_decision"] + if not intent_decision.force_sql_path and intent_decision.intent != DashboardChatIntent.DATA_QUERY: + state["schema_snippets"] = {} + state["schema_prompt"] = "" + return state + + candidate_tables = state["allowlist"].prioritized_tables( + limit=self.runtime_config.max_schema_tables, + ) + if not candidate_tables: + state["schema_snippets"] = {} + state["schema_prompt"] = "" + state["warnings"] = state.get("warnings", []) + [ + "No dashboard tables were available for schema inspection.", + ] + return state + + try: + warehouse_tools = self.warehouse_tools_factory(state["org"]) + schema_snippets = warehouse_tools.get_schema_snippets(candidate_tables) + except DashboardChatWarehouseToolsError as error: + state["schema_snippets"] = {} + state["schema_prompt"] = "" + state["warnings"] = state.get("warnings", []) + [str(error)] + return state + + state["schema_snippets"] = schema_snippets + state["schema_prompt"] = "\n\n".join( + snippet.to_prompt_text() for snippet in schema_snippets.values() + ) + return state + + def _node_plan_query(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Produce the structured execution plan.""" + intent_decision = state["intent_decision"] + + if intent_decision.intent == DashboardChatIntent.CONTEXT_QUERY and not intent_decision.force_sql_path: + state["query_plan"] = DashboardChatQueryPlan( + mode=DashboardChatPlanMode.CONTEXT, + reason=intent_decision.reason, + ) + return state + + if intent_decision.intent in { + DashboardChatIntent.SMALL_TALK, + DashboardChatIntent.IRRELEVANT, + DashboardChatIntent.NEEDS_CLARIFICATION, + }: + state["query_plan"] = DashboardChatQueryPlan( + mode=DashboardChatPlanMode.CLARIFY, + reason=intent_decision.reason, + clarification_question=intent_decision.clarification_question, + ) + return state + + allowlisted_tables = state["allowlist"].prioritized_tables() + if not allowlisted_tables and intent_decision.force_sql_path: + state["query_plan"] = DashboardChatQueryPlan( + mode=DashboardChatPlanMode.CLARIFY, + reason="Current dashboard does not expose any allowlisted tables for SQL.", + clarification_question=( + "I can explain this dashboard, but it does not expose a data source I can query safely." + ), + ) + return state + + query_plan = self.llm_client.plan_query( + user_query=state["user_query"], + conversation_history=state["conversation_history"], + dashboard_summary=state["dashboard_summary"], + retrieved_documents=state.get("retrieved_documents", []), + schema_prompt=state.get("schema_prompt", ""), + allowlisted_tables=allowlisted_tables, + ) + query_plan = self._normalize_query_plan( + query_plan=query_plan, + allowlist=state["allowlist"], + default_tables=allowlisted_tables, + ) + state["query_plan"] = query_plan + return state + + def _node_lookup_distinct_values( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Fetch distinct values for requested text filters.""" + distinct_values: dict[str, list[str]] = {} + query_plan = state["query_plan"] + if not query_plan.text_filters: + state["distinct_values"] = distinct_values + return state + + try: + warehouse_tools = self.warehouse_tools_factory(state["org"]) + except DashboardChatWarehouseToolsError as error: + state["warnings"] = state.get("warnings", []) + [str(error)] + state["distinct_values"] = distinct_values + return state + + available_tables = set(state.get("schema_snippets", {}).keys()) + for text_filter in query_plan.text_filters: + table_name = text_filter.table_name.lower() + if not state["allowlist"].is_allowed(table_name) or table_name not in available_tables: + continue + distinct_key = f"{table_name}.{text_filter.column_name}" + distinct_values[distinct_key] = warehouse_tools.get_distinct_values( + table_name=table_name, + column_name=text_filter.column_name, + limit=self.runtime_config.max_distinct_values, + ) + + state["distinct_values"] = distinct_values + return state + + def _node_generate_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Generate SQL from the structured plan.""" + query_plan = state["query_plan"] + if query_plan.mode != DashboardChatPlanMode.SQL: + state["sql_draft"] = None + return state + + sql_draft = self.llm_client.generate_sql( + user_query=state["user_query"], + dashboard_summary=state["dashboard_summary"], + query_plan=query_plan, + schema_prompt=self._schema_prompt_for_plan( + state.get("schema_snippets", {}), + query_plan, + ), + distinct_values=state.get("distinct_values", {}), + allowlisted_tables=state["allowlist"].prioritized_tables(), + ) + state["sql_draft"] = sql_draft + return state + + def _node_validate_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Run SQL guard validation.""" + sql_draft = state.get("sql_draft") + if sql_draft is None or not sql_draft.sql: + state["sql_validation"] = None + return state + + validation = DashboardChatSqlGuard( + allowlist=state["allowlist"], + max_rows=self.runtime_config.max_query_rows, + ).validate(sql_draft.sql) + state["sql_validation"] = validation + return state + + def _node_execute_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Execute the validated SQL.""" + sql_validation = state["sql_validation"] + if sql_validation is None or not sql_validation.sanitized_sql: + state["sql_results"] = None + return state + + try: + warehouse_tools = self.warehouse_tools_factory(state["org"]) + state["sql_results"] = warehouse_tools.execute_sql(sql_validation.sanitized_sql) + except DashboardChatWarehouseToolsError as error: + state["warnings"] = state.get("warnings", []) + [str(error)] + state["sql_results"] = None + return state + + def _node_compose_answer(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Assemble the answer text across simple, context, and SQL paths.""" + intent_decision = state["intent_decision"] + query_plan = state.get("query_plan") + sql_draft = state.get("sql_draft") + sql_validation = state.get("sql_validation") + + warnings = list(dict.fromkeys(state.get("warnings", []))) + if sql_draft is not None: + warnings.extend(warning for warning in sql_draft.warnings if warning not in warnings) + if sql_validation is not None: + warnings.extend(warning for warning in sql_validation.warnings if warning not in warnings) + + if intent_decision.intent == DashboardChatIntent.SMALL_TALK: + answer_text = ( + "I can help explain this dashboard or answer questions about the data behind its charts." + ) + elif intent_decision.intent == DashboardChatIntent.IRRELEVANT: + answer_text = ( + "I can help with questions about this dashboard, its charts, and the data behind them." + ) + elif intent_decision.intent == DashboardChatIntent.NEEDS_CLARIFICATION: + answer_text = ( + intent_decision.clarification_question + or "Could you be more specific about the metric, program, or time period you want?" + ) + elif query_plan and query_plan.mode == DashboardChatPlanMode.CLARIFY: + answer_text = query_plan.clarification_question or ( + sql_draft.clarification_question + if sql_draft is not None and sql_draft.clarification_question + else "I need a bit more detail before I can answer that safely." + ) + elif sql_draft is not None and not sql_draft.sql and sql_draft.clarification_question: + answer_text = sql_draft.clarification_question + elif sql_validation is not None and not sql_validation.is_valid: + answer_text = "I couldn't answer that safely from this dashboard context." + if sql_validation.errors: + answer_text += f" {sql_validation.errors[0]}" + else: + try: + answer_text = self.llm_client.compose_answer( + user_query=state["user_query"], + dashboard_summary=state["dashboard_summary"], + retrieved_documents=state.get("retrieved_documents", []), + sql=sql_validation.sanitized_sql if sql_validation else None, + sql_results=state.get("sql_results"), + warnings=warnings, + related_dashboard_titles=[ + related_dashboard.title + for related_dashboard in state.get("related_dashboards", []) + ], + ) + except Exception: + answer_text = self._fallback_answer_text( + retrieved_documents=state.get("retrieved_documents", []), + sql_results=state.get("sql_results"), + ) + + state["response"] = DashboardChatResponse( + answer_text=answer_text.strip(), + intent=intent_decision.intent, + citations=state.get("citations", []), + related_dashboards=state.get("related_dashboards", []), + warnings=warnings, + sql=sql_validation.sanitized_sql if sql_validation else None, + sql_results=state.get("sql_results"), + metadata={}, + ) + return state + + def _node_finalize_response(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Attach metadata and table citations to the final response.""" + response = state["response"] + citations = list(response.citations) + sql_validation = state.get("sql_validation") + if sql_validation is not None: + citations.extend( + DashboardChatCitation( + source_type="warehouse_table", + source_identifier=table_name, + title=f"Warehouse table: {table_name}", + snippet=f"SQL executed against {table_name}.", + table_name=table_name, + ) + for table_name in sql_validation.tables + if table_name + ) + + state["response"] = DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + related_dashboards=response.related_dashboards, + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + metadata={ + "dashboard_id": state["dashboard_id"], + "query_plan_mode": state.get("query_plan").mode.value + if state.get("query_plan") + else None, + "query_plan_tables": state.get("query_plan").relevant_tables + if state.get("query_plan") + else [], + "retrieved_document_ids": [ + document.document_id for document in state.get("retrieved_documents", []) + ], + "allowlisted_tables": sorted(state["allowlist"].allowed_tables), + "sql_guard_errors": state.get("sql_validation").errors + if state.get("sql_validation") + else [], + }, + ) + return state + + @staticmethod + def _route_after_intent(state: DashboardChatRuntimeState) -> str: + """Route simple intents directly to answer composition.""" + intent = state["intent_decision"].intent + if intent in { + DashboardChatIntent.SMALL_TALK, + DashboardChatIntent.IRRELEVANT, + DashboardChatIntent.NEEDS_CLARIFICATION, + }: + return "compose_answer" + return "build_allowlist" + + @staticmethod + def _route_after_plan(state: DashboardChatRuntimeState) -> str: + """Route SQL plans to distinct lookup and all others to answer composition.""" + query_plan = state["query_plan"] + if query_plan.mode == DashboardChatPlanMode.SQL: + return "lookup_distinct_values" + return "compose_answer" + + @staticmethod + def _route_after_sql_validation(state: DashboardChatRuntimeState) -> str: + """Only execute SQL after it passes validation.""" + sql_validation = state.get("sql_validation") + if sql_validation is not None and sql_validation.is_valid and sql_validation.sanitized_sql: + return "execute_sql" + return "compose_answer" + + @staticmethod + def _normalize_conversation_history( + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] | None, + ) -> list[DashboardChatConversationMessage]: + """Normalize conversation history into typed messages.""" + normalized_messages: list[DashboardChatConversationMessage] = [] + for item in conversation_history or []: + if isinstance(item, DashboardChatConversationMessage): + normalized_messages.append(item) + else: + normalized_messages.append( + DashboardChatConversationMessage( + role=str(item.get("role") or "user"), + content=str(item.get("content") or ""), + ) + ) + return normalized_messages + + @staticmethod + def _heuristic_intent_decision( + user_query: str, + conversation_history: Sequence[DashboardChatConversationMessage], + ) -> DashboardChatIntentDecision | None: + """Fast path for obvious intents before consulting the LLM.""" + normalized_query = user_query.strip().lower() + if not normalized_query: + return DashboardChatIntentDecision( + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + reason="Empty query", + clarification_question="What would you like to know about this dashboard?", + ) + + if normalized_query in SIMPLE_GREETINGS: + return DashboardChatIntentDecision( + intent=DashboardChatIntent.SMALL_TALK, + reason="Greeting or pleasantry", + ) + + if any(keyword in normalized_query for keyword in DATA_QUERY_KEYWORDS): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.DATA_QUERY, + reason="Contains data-analysis keywords", + force_sql_path=True, + ) + + if any(keyword in normalized_query for keyword in CONTEXT_QUERY_KEYWORDS): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.CONTEXT_QUERY, + reason="Contains definition or explanation keywords", + ) + + if len(normalized_query.split()) <= 2 and conversation_history: + return DashboardChatIntentDecision( + intent=DashboardChatIntent.DATA_QUERY, + reason="Short follow-up treated as a data refinement", + force_sql_path=True, + ) + return None + + def _query_vector_store( + self, + org: Org, + query_text: str, + source_types: Sequence[str], + dashboard_id: int | None = None, + ) -> list[DashboardChatRetrievedDocument]: + """Run a vector query and map it into runtime documents.""" + if not source_types: + return [] + + results = self.vector_store.query( + org.id, + query_text=query_text, + n_results=self.runtime_config.retrieval_limit, + source_types=list(source_types), + dashboard_id=dashboard_id, + ) + return [ + DashboardChatRetrievedDocument( + document_id=result.document_id, + source_type=str(result.metadata.get("source_type") or ""), + source_identifier=str(result.metadata.get("source_identifier") or ""), + content=result.content, + dashboard_id=result.metadata.get("dashboard_id"), + distance=result.distance, + ) + for result in results + ] + + @staticmethod + def _filter_allowlisted_dbt_results( + results: Sequence[DashboardChatRetrievedDocument], + allowlist: DashboardChatAllowlist, + ) -> list[DashboardChatRetrievedDocument]: + """Keep only dbt docs that belong to the dashboard lineage.""" + filtered_results: list[DashboardChatRetrievedDocument] = [] + for result in results: + unique_id = DashboardChatRuntime._unique_id_from_source_identifier( + result.source_identifier + ) + if allowlist.is_unique_id_allowed(unique_id): + filtered_results.append(result) + return filtered_results + + @staticmethod + def _merge_retrieval_results( + dashboard_results: Sequence[DashboardChatRetrievedDocument], + org_results: Sequence[DashboardChatRetrievedDocument], + dbt_results: Sequence[DashboardChatRetrievedDocument], + ) -> list[DashboardChatRetrievedDocument]: + """Prioritize current-dashboard docs, then org docs, then dbt docs.""" + scored_results: list[tuple[tuple[int, float], DashboardChatRetrievedDocument]] = [] + for priority, result_group in enumerate([dashboard_results, org_results, dbt_results]): + for result in result_group: + scored_results.append( + ( + (priority, result.distance if result.distance is not None else 999.0), + result, + ) + ) + + merged_results: list[DashboardChatRetrievedDocument] = [] + seen_document_ids: set[str] = set() + for _, result in sorted(scored_results, key=lambda item: item[0]): + if result.document_id in seen_document_ids: + continue + merged_results.append(result) + seen_document_ids.add(result.document_id) + return merged_results + + def _build_related_dashboards( + self, + org: Org, + current_dashboard_id: int, + query_text: str, + ) -> list[DashboardChatRelatedDashboard]: + """Suggest other dashboards with matching retrieved context.""" + related_results = self.vector_store.query( + org.id, + query_text=query_text, + n_results=self.runtime_config.related_dashboard_limit * 4, + source_types=[ + DashboardChatSourceType.DASHBOARD_CONTEXT.value, + DashboardChatSourceType.DASHBOARD_EXPORT.value, + ], + ) + candidate_dashboard_ids = [ + result.metadata.get("dashboard_id") + for result in related_results + if result.metadata.get("dashboard_id") + and result.metadata.get("dashboard_id") != current_dashboard_id + ] + if not candidate_dashboard_ids: + return [] + + dashboard_titles = { + dashboard.id: dashboard.title + for dashboard in Dashboard.objects.filter(org=org, id__in=set(candidate_dashboard_ids)) + } + suggestions: list[DashboardChatRelatedDashboard] = [] + seen_dashboard_ids: set[int] = set() + for result in related_results: + dashboard_id = result.metadata.get("dashboard_id") + if ( + dashboard_id in seen_dashboard_ids + or dashboard_id == current_dashboard_id + or dashboard_id not in dashboard_titles + ): + continue + suggestions.append( + DashboardChatRelatedDashboard( + dashboard_id=int(dashboard_id), + title=dashboard_titles[dashboard_id], + reason=self._compact_snippet(result.content), + ) + ) + seen_dashboard_ids.add(int(dashboard_id)) + if len(suggestions) >= self.runtime_config.related_dashboard_limit: + break + return suggestions + + def _build_citations( + self, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + dashboard_export: dict[str, Any], + allowlist: DashboardChatAllowlist, + ) -> list[DashboardChatCitation]: + """Build structured citations from retrieved documents.""" + dashboard_title = dashboard_export["dashboard"].get("title") or "Current dashboard" + chart_lookup = { + chart.get("id"): chart.get("title") or f"Chart {chart.get('id')}" + for chart in dashboard_export.get("charts") or [] + } + citations: list[DashboardChatCitation] = [] + + for document in retrieved_documents[:6]: + chart_id = self._chart_id_from_source_identifier(document.source_identifier) + table_name = None + if document.source_type in { + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + }: + unique_id = self._unique_id_from_source_identifier(document.source_identifier) + table_name = allowlist.unique_id_to_table.get(unique_id) if unique_id else None + title = self._citation_title( + document=document, + dashboard_title=dashboard_title, + chart_lookup=chart_lookup, + table_name=table_name, + ) + citations.append( + DashboardChatCitation( + source_type=document.source_type, + source_identifier=document.source_identifier, + title=title, + snippet=self._compact_snippet(document.content), + dashboard_id=document.dashboard_id, + table_name=table_name, + ) + ) + + return citations + + @staticmethod + def _citation_title( + document: DashboardChatRetrievedDocument, + dashboard_title: str, + chart_lookup: dict[int, str], + table_name: str | None, + ) -> str: + """Map a retrieved document into a human-readable citation title.""" + if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: + return "Organization context" + if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: + return f"Dashboard context: {dashboard_title}" + if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + chart_id = DashboardChatRuntime._chart_id_from_source_identifier(document.source_identifier) + if chart_id is not None and chart_id in chart_lookup: + return f"Chart: {chart_lookup[chart_id]}" + return f"Dashboard export: {dashboard_title}" + if document.source_type == DashboardChatSourceType.DBT_MANIFEST.value: + return f"dbt manifest: {table_name or document.source_identifier}" + if document.source_type == DashboardChatSourceType.DBT_CATALOG.value: + return f"dbt catalog: {table_name or document.source_identifier}" + return document.source_identifier + + @staticmethod + def _build_dashboard_summary(dashboard_export: dict[str, Any]) -> str: + """Format the dashboard summary fed into the LLM.""" + dashboard_payload = dashboard_export["dashboard"] + lines = [ + f"Dashboard: {dashboard_payload.get('title')}", + f"Description: {dashboard_payload.get('description') or 'None'}", + ] + for chart in dashboard_export.get("charts") or []: + lines.append( + "{title} uses {schema}.{table} ({chart_type})".format( + title=chart.get("title"), + schema=chart.get("schema_name"), + table=chart.get("table_name"), + chart_type=chart.get("chart_type"), + ) + ) + return "\n".join(lines) + + @staticmethod + def _normalize_query_plan( + query_plan: DashboardChatQueryPlan, + allowlist: DashboardChatAllowlist, + default_tables: Sequence[str], + ) -> DashboardChatQueryPlan: + """Drop out-of-bounds tables and backfill safe defaults.""" + relevant_tables = [ + table_name.lower() + for table_name in query_plan.relevant_tables + if allowlist.is_allowed(table_name) + ] + schema_lookup_tables = [ + table_name.lower() + for table_name in query_plan.schema_lookup_tables + if allowlist.is_allowed(table_name) + ] + if query_plan.mode == DashboardChatPlanMode.SQL and not relevant_tables: + relevant_tables = [table_name.lower() for table_name in default_tables] + if query_plan.mode == DashboardChatPlanMode.SQL and not schema_lookup_tables: + schema_lookup_tables = relevant_tables + return DashboardChatQueryPlan( + mode=query_plan.mode, + reason=query_plan.reason, + relevant_tables=relevant_tables, + schema_lookup_tables=schema_lookup_tables, + text_filters=[ + text_filter + for text_filter in query_plan.text_filters + if allowlist.is_allowed(text_filter.table_name) + ], + answer_strategy=query_plan.answer_strategy, + clarification_question=query_plan.clarification_question, + ) + + @staticmethod + def _schema_prompt_for_plan( + schema_snippets: dict[str, Any], + query_plan: DashboardChatQueryPlan, + ) -> str: + """Filter the schema prompt down to the planned tables when possible.""" + if not schema_snippets: + return "" + desired_tables = query_plan.schema_lookup_tables or query_plan.relevant_tables + if not desired_tables: + return "\n\n".join(snippet.to_prompt_text() for snippet in schema_snippets.values()) + return "\n\n".join( + schema_snippets[table_name].to_prompt_text() + for table_name in desired_tables + if table_name in schema_snippets + ) + + @staticmethod + def _fallback_answer_text( + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + sql_results: list[dict[str, Any]] | None, + ) -> str: + """Fallback response when answer composition fails.""" + if sql_results is not None: + if not sql_results: + return "I didn't find any matching rows for that question." + return "Here are the matching results: " + json.dumps(sql_results[:3], default=str) + if retrieved_documents: + return DashboardChatRuntime._compact_snippet(retrieved_documents[0].content) + return "I couldn't find enough context to answer that." + + @staticmethod + def _compact_snippet(content: str, max_length: int = 220) -> str: + """Collapse whitespace and trim long snippets for citations and suggestions.""" + normalized = " ".join(content.split()) + if len(normalized) <= max_length: + return normalized + return normalized[: max_length - 3].rstrip() + "..." + + @staticmethod + def _chart_id_from_source_identifier(source_identifier: str) -> int | None: + """Extract chart ids from dashboard export source identifiers.""" + parts = source_identifier.split(":") + if len(parts) >= 4 and parts[-2] == "chart": + try: + return int(parts[-1]) + except ValueError: + return None + return None + + @staticmethod + def _unique_id_from_source_identifier(source_identifier: str) -> str | None: + """Extract dbt unique ids from manifest/catalog source identifiers.""" + if ":" not in source_identifier: + return None + prefix, unique_id = source_identifier.split(":", 1) + if prefix not in {"manifest", "catalog"}: + return None + return unique_id diff --git a/ddpui/core/dashboard_chat/runtime_types.py b/ddpui/core/dashboard_chat/runtime_types.py new file mode 100644 index 000000000..b6428bd2f --- /dev/null +++ b/ddpui/core/dashboard_chat/runtime_types.py @@ -0,0 +1,176 @@ +"""Typed runtime contracts for dashboard chat orchestration.""" + +from dataclasses import asdict, dataclass, field +from enum import Enum +from typing import Any + + +class DashboardChatIntent(str, Enum): + """Supported high-level intents for dashboard chat.""" + + DATA_QUERY = "data_query" + CONTEXT_QUERY = "context_query" + NEEDS_CLARIFICATION = "needs_clarification" + SMALL_TALK = "small_talk" + IRRELEVANT = "irrelevant" + + +class DashboardChatPlanMode(str, Enum): + """Execution modes chosen after planning.""" + + SQL = "sql" + CONTEXT = "context" + CLARIFY = "clarify" + + +@dataclass(frozen=True) +class DashboardChatConversationMessage: + """Single prior conversation message.""" + + role: str + content: str + + +@dataclass(frozen=True) +class DashboardChatIntentDecision: + """Intent-routing outcome.""" + + intent: DashboardChatIntent + reason: str + force_sql_path: bool = False + clarification_question: str | None = None + + +@dataclass(frozen=True) +class DashboardChatRetrievedDocument: + """Retrieved document returned from the vector store.""" + + document_id: str + source_type: str + source_identifier: str + content: str + dashboard_id: int | None = None + distance: float | None = None + + +@dataclass(frozen=True) +class DashboardChatTextFilterPlan: + """Filter that requires a distinct-values lookup before SQL generation.""" + + table_name: str + column_name: str + requested_value: str + + +@dataclass(frozen=True) +class DashboardChatQueryPlan: + """Structured plan produced before SQL generation.""" + + mode: DashboardChatPlanMode + reason: str + relevant_tables: list[str] = field(default_factory=list) + schema_lookup_tables: list[str] = field(default_factory=list) + text_filters: list[DashboardChatTextFilterPlan] = field(default_factory=list) + answer_strategy: str | None = None + clarification_question: str | None = None + + +@dataclass(frozen=True) +class DashboardChatSqlDraft: + """LLM-produced SQL draft and metadata.""" + + sql: str | None + reason: str + warnings: list[str] = field(default_factory=list) + clarification_question: str | None = None + + +@dataclass(frozen=True) +class DashboardChatSchemaSnippet: + """Schema description for a warehouse table.""" + + table_name: str + columns: list[dict[str, Any]] + + def to_prompt_text(self) -> str: + """Format a compact schema summary for prompts.""" + column_lines = [] + for column in self.columns: + column_lines.append( + "- {name} ({data_type}, nullable={nullable})".format( + name=column.get("name"), + data_type=column.get("data_type"), + nullable=column.get("nullable"), + ) + ) + return f"Table: {self.table_name}\n" + "\n".join(column_lines) + + +@dataclass(frozen=True) +class DashboardChatSqlValidationResult: + """Outcome of SQL guard validation.""" + + is_valid: bool + sanitized_sql: str | None + tables: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + errors: list[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class DashboardChatCitation: + """Citation attached to a chat response.""" + + source_type: str + source_identifier: str + title: str + snippet: str + dashboard_id: int | None = None + table_name: str | None = None + + def to_dict(self) -> dict[str, Any]: + """Return a serializable citation payload.""" + return asdict(self) + + +@dataclass(frozen=True) +class DashboardChatRelatedDashboard: + """Dashboard suggestion when the current dashboard is not sufficient.""" + + dashboard_id: int + title: str + reason: str + + def to_dict(self) -> dict[str, Any]: + """Return a serializable suggestion payload.""" + return asdict(self) + + +@dataclass(frozen=True) +class DashboardChatResponse: + """Final runtime response returned by the LangGraph runner.""" + + answer_text: str + intent: DashboardChatIntent + citations: list[DashboardChatCitation] = field(default_factory=list) + related_dashboards: list[DashboardChatRelatedDashboard] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + sql: str | None = None + sql_results: list[dict[str, Any]] | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + """Return a serializable payload.""" + return { + "answer_text": self.answer_text, + "intent": self.intent.value, + "citations": [citation.to_dict() for citation in self.citations], + "related_dashboards": [ + related_dashboard.to_dict() + for related_dashboard in self.related_dashboards + ], + "warnings": self.warnings, + "sql": self.sql, + "sql_results": self.sql_results, + "metadata": self.metadata, + } diff --git a/ddpui/core/dashboard_chat/sql_guard.py b/ddpui/core/dashboard_chat/sql_guard.py new file mode 100644 index 000000000..579da2e8d --- /dev/null +++ b/ddpui/core/dashboard_chat/sql_guard.py @@ -0,0 +1,130 @@ +"""SQL safety guardrails for dashboard chat.""" + +import re + +import sqlparse + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.runtime_types import DashboardChatSqlValidationResult + +FORBIDDEN_SQL_KEYWORDS = { + "INSERT", + "UPDATE", + "DELETE", + "DROP", + "ALTER", + "CREATE", + "TRUNCATE", + "GRANT", + "REVOKE", + "MERGE", + "CALL", + "EXECUTE", + "VACUUM", +} + +PII_PATTERNS = [ + r"\b(name|phone|email|address|national_id|id_number)\b", + r"\b(contact|mobile|telephone|personal|identification)\b", + r"\b(firstname|lastname|full_name|participant_name|survivor_name)\b", +] + + +class DashboardChatSqlGuard: + """Validate SQL before it reaches the warehouse.""" + + def __init__( + self, + allowlist: DashboardChatAllowlist, + max_rows: int = 200, + ): + self.allowlist = allowlist + self.max_rows = max_rows + + def validate(self, sql: str) -> DashboardChatSqlValidationResult: + """Validate a generated SQL statement.""" + errors: list[str] = [] + warnings: list[str] = [] + + sql_without_comments = self._strip_sql_comments(sql) + statements = [ + statement.strip() for statement in sqlparse.split(sql_without_comments) if statement.strip() + ] + if len(statements) != 1: + return DashboardChatSqlValidationResult( + is_valid=False, + sanitized_sql=None, + errors=["Multiple statements are not allowed"], + ) + + sanitized_sql = statements[0].rstrip(";").strip() + sql_upper = sanitized_sql.upper() + + if not (sql_upper.startswith("SELECT") or sql_upper.startswith("WITH")): + errors.append("Query must start with SELECT or WITH") + + for keyword in FORBIDDEN_SQL_KEYWORDS: + if re.search(rf"\b{keyword}\b", sql_upper): + errors.append(f"Forbidden keyword detected: {keyword}") + + limit_match = re.search(r"\bLIMIT\s+(\d+)\b", sql_upper) + if limit_match: + limit_value = int(limit_match.group(1)) + if limit_value > self.max_rows: + errors.append(f"LIMIT {limit_value} exceeds the maximum allowed {self.max_rows}") + else: + sanitized_sql = f"{sanitized_sql}\nLIMIT {self.max_rows}" + warnings.append(f"No LIMIT clause found. Added LIMIT {self.max_rows}.") + + if re.search(r"\bSELECT\s+\*", sql_upper): + warnings.append("SELECT * detected. Prefer explicit column lists.") + + for pii_pattern in PII_PATTERNS: + if re.search(pii_pattern, sanitized_sql, re.IGNORECASE): + warnings.append(f"Query may touch PII-like columns matching {pii_pattern}.") + + tables = self._extract_table_names(sanitized_sql) + for table_name in tables: + if not self.allowlist.is_allowed(table_name): + errors.append( + f"Table '{table_name}' is not accessible in the current dashboard context" + ) + + return DashboardChatSqlValidationResult( + is_valid=not errors, + sanitized_sql=sanitized_sql if not errors else None, + tables=tables, + warnings=warnings, + errors=errors, + ) + + @staticmethod + def _strip_sql_comments(sql: str) -> str: + """Remove line and block comments before validation.""" + sql_without_block_comments = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL) + return re.sub(r"--.*", "", sql_without_block_comments) + + @classmethod + def _extract_table_names(cls, sql: str) -> list[str]: + """Extract physical table names from FROM/JOIN clauses.""" + sql_without_quotes = sql.replace('"', "").replace("`", "") + cte_names = set( + cte_name.lower() + for cte_name in re.findall( + r"(?:\bWITH\b|,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s+AS\s*\(", + sql_without_quotes, + flags=re.IGNORECASE, + ) + ) + + tables: list[str] = [] + for table_name in re.findall( + r"\b(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)?)", + sql_without_quotes, + flags=re.IGNORECASE, + ): + if table_name.upper() in FORBIDDEN_SQL_KEYWORDS or table_name.lower() in cte_names: + continue + tables.append(table_name.lower()) + + return list(dict.fromkeys(tables)) diff --git a/ddpui/core/dashboard_chat/warehouse_tools.py b/ddpui/core/dashboard_chat/warehouse_tools.py new file mode 100644 index 000000000..6afa3ebdf --- /dev/null +++ b/ddpui/core/dashboard_chat/warehouse_tools.py @@ -0,0 +1,154 @@ +"""Warehouse access helpers used by dashboard chat runtime.""" + +from typing import Any + +from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet +from ddpui.models.org import Org, OrgWarehouse +from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory + + +class DashboardChatWarehouseToolsError(Exception): + """Raised when a warehouse-backed dashboard chat action cannot complete.""" + + +class DashboardChatWarehouseTools: + """Read-only warehouse helpers for dashboard chat nodes.""" + + def __init__( + self, + org: Org, + org_warehouse: OrgWarehouse | None = None, + warehouse_client: Any = None, + max_rows: int = 200, + ): + self.org = org + self.org_warehouse = org_warehouse or OrgWarehouse.objects.filter(org=org).first() + if self.org_warehouse is None: + raise DashboardChatWarehouseToolsError("Warehouse not configured for dashboard chat") + + self.max_rows = max_rows + self.warehouse_client = warehouse_client or WarehouseFactory.get_warehouse_client( + self.org_warehouse + ) + + def get_schema_snippets(self, tables: list[str]) -> dict[str, DashboardChatSchemaSnippet]: + """Load table column metadata for prompt grounding.""" + snippets: dict[str, DashboardChatSchemaSnippet] = {} + + for table_name in list(dict.fromkeys(tables)): + parsed_table = self._parse_table_name(table_name) + if parsed_table is None: + continue + schema_name, bare_table_name = parsed_table + columns = self.warehouse_client.get_table_columns(schema_name, bare_table_name) + if not columns: + continue + snippets[table_name.lower()] = DashboardChatSchemaSnippet( + table_name=table_name.lower(), + columns=list(columns), + ) + + return snippets + + def get_distinct_values( + self, + table_name: str, + column_name: str, + limit: int = 50, + ) -> list[str]: + """Return distinct, non-empty values for a text filter column.""" + parsed_table = self._parse_table_name(table_name) + if parsed_table is None: + raise DashboardChatWarehouseToolsError( + f"Table '{table_name}' must be schema-qualified for distinct lookups" + ) + schema_name, bare_table_name = parsed_table + + if not self.warehouse_client.column_exists(schema_name, bare_table_name, column_name): + return [] + + query = self._build_distinct_values_query( + schema_name=schema_name, + table_name=bare_table_name, + column_name=column_name, + limit=limit, + ) + rows = self.warehouse_client.execute(query) + return [ + str(row.get("value")) + for row in rows + if row.get("value") is not None and str(row.get("value")).strip() + ] + + def execute_sql(self, sql: str) -> list[dict[str, Any]]: + """Execute a validated read-only SQL statement.""" + rows = self.warehouse_client.execute(sql) + return list(rows[: self.max_rows]) + + def _build_distinct_values_query( + self, + schema_name: str, + table_name: str, + column_name: str, + limit: int, + ) -> str: + """Build a warehouse-specific query for distinct values.""" + if self.org_warehouse.wtype == "postgres": + quoted_column = self._quote_postgres_identifier(column_name) + return f""" + SELECT DISTINCT {quoted_column} AS value + FROM {self._quote_table_ref(schema_name, table_name)} + WHERE {quoted_column} IS NOT NULL + AND TRIM(CAST({quoted_column} AS TEXT)) != '' + ORDER BY value + LIMIT {int(limit)} + """ + + if self.org_warehouse.wtype == "bigquery": + quoted_column = self._quote_bigquery_identifier(column_name) + return f""" + SELECT DISTINCT {quoted_column} AS value + FROM {self._quote_bigquery_table_ref(schema_name, table_name)} + WHERE {quoted_column} IS NOT NULL + AND TRIM(CAST({quoted_column} AS STRING)) != '' + ORDER BY value + LIMIT {int(limit)} + """ + + raise DashboardChatWarehouseToolsError( + f"Unsupported warehouse type for dashboard chat: {self.org_warehouse.wtype}" + ) + + def _quote_table_ref(self, schema_name: str, table_name: str) -> str: + """Quote a Postgres schema.table reference.""" + return ( + f"{self._quote_postgres_identifier(schema_name)}." + f"{self._quote_postgres_identifier(table_name)}" + ) + + def _quote_bigquery_table_ref(self, schema_name: str, table_name: str) -> str: + """Quote a BigQuery fully-qualified table reference.""" + project_name = self.org_warehouse.bq_location + if not project_name: + raise DashboardChatWarehouseToolsError("BigQuery location/project not configured") + return f"`{project_name}.{schema_name}.{table_name}`" + + @staticmethod + def _quote_postgres_identifier(identifier: str) -> str: + """Quote a Postgres identifier while preserving its literal value.""" + escaped_identifier = identifier.replace('"', '""') + return f'"{escaped_identifier}"' + + @staticmethod + def _quote_bigquery_identifier(identifier: str) -> str: + """Quote a BigQuery identifier while preserving its literal value.""" + escaped_identifier = identifier.replace("`", "") + return f"`{escaped_identifier}`" + + @staticmethod + def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: + """Parse schema.table into separate pieces.""" + if not table_name or "." not in table_name: + return None + schema_name, bare_table_name = table_name.split(".", 1) + return schema_name.strip().strip('"').strip("`"), bare_table_name.strip().strip('"').strip("`") diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py new file mode 100644 index 000000000..f9a24e677 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -0,0 +1,474 @@ +"""Tests for dashboard chat LangGraph runtime, allowlist, and SQL guard.""" + +import os + +import django +import pytest +from django.contrib.auth.models import User +from django.db import transaction + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist, DashboardChatAllowlistBuilder +from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig +from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationMessage, + DashboardChatIntent, + DashboardChatPlanMode, + DashboardChatQueryPlan, + DashboardChatSqlDraft, + DashboardChatTextFilterPlan, +) +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector_store import DashboardChatVectorQueryResult +from ddpui.models.dashboard import Dashboard +from ddpui.models.org import Org +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.models.visualization import Chart +from ddpui.tests.api_tests.test_user_org_api import seed_db + +pytestmark = pytest.mark.django_db(transaction=True) + + +class FakeVectorStore: + """Deterministic vector store used by runtime tests.""" + + def __init__(self, rows): + self.rows = list(rows) + self.calls = [] + + def query(self, org_id, query_text, n_results=5, source_types=None, dashboard_id=None): + self.calls.append( + { + "org_id": org_id, + "query_text": query_text, + "n_results": n_results, + "source_types": list(source_types) if source_types else [], + "dashboard_id": dashboard_id, + } + ) + results = [] + for row in self.rows: + if source_types and row.metadata.get("source_type") not in source_types: + continue + if dashboard_id is not None and row.metadata.get("dashboard_id") != dashboard_id: + continue + results.append(row) + return results[:n_results] + + +class ContextOnlyLlm: + """Minimal LLM stub for context-answer tests.""" + + def plan_query(self, *args, **kwargs): + raise AssertionError("plan_query should not be called for context-only heuristics") + + def classify_intent(self, *args, **kwargs): + raise AssertionError("classify_intent should not be called for context-only heuristics") + + def generate_sql(self, *args, **kwargs): + raise AssertionError("generate_sql should not be called for context-only heuristics") + + def compose_answer( + self, + user_query, + dashboard_summary, + retrieved_documents, + sql, + sql_results, + warnings, + related_dashboard_titles, + ): + assert sql is None + assert sql_results is None + return "The reach metric shows how many beneficiaries were served over time." + + +class FakeWarehouseTools: + """Warehouse stub that records schema, distinct, and execution calls.""" + + def __init__(self): + self.schema_requests = [] + self.distinct_requests = [] + self.executed_sql = [] + + def get_schema_snippets(self, tables): + self.schema_requests.append(list(tables)) + return { + "analytics.program_reach": self._schema_snippet( + "analytics.program_reach", + [ + {"name": "program_name", "data_type": "text", "nullable": False}, + {"name": "beneficiaries", "data_type": "integer", "nullable": False}, + ], + ) + } + + def get_distinct_values(self, table_name, column_name, limit=50): + self.distinct_requests.append((table_name, column_name, limit)) + return ["Education", "Health"] + + def execute_sql(self, sql): + self.executed_sql.append(sql) + return [{"program_name": "Education", "beneficiary_count": 120}] + + @staticmethod + def _schema_snippet(table_name, columns): + from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet + + return DashboardChatSchemaSnippet(table_name=table_name, columns=columns) + + +class SqlPathLlm: + """LLM stub that forces the runtime through planning, distinct lookup, and SQL execution.""" + + def classify_intent(self, *args, **kwargs): + raise AssertionError("Heuristic data routing should handle this test case") + + def plan_query( + self, + user_query, + conversation_history, + dashboard_summary, + retrieved_documents, + schema_prompt, + allowlisted_tables, + ): + assert "analytics.program_reach" in allowlisted_tables + assert "program_name" in schema_prompt + return DashboardChatQueryPlan( + mode=DashboardChatPlanMode.SQL, + reason="Needs aggregate data", + relevant_tables=["analytics.program_reach"], + schema_lookup_tables=["analytics.program_reach"], + text_filters=[ + DashboardChatTextFilterPlan( + table_name="analytics.program_reach", + column_name="program_name", + requested_value="Education", + ) + ], + ) + + def generate_sql( + self, + user_query, + dashboard_summary, + query_plan, + schema_prompt, + distinct_values, + allowlisted_tables, + ): + assert distinct_values["analytics.program_reach.program_name"] == ["Education", "Health"] + return DashboardChatSqlDraft( + sql=( + "SELECT program_name, COUNT(*) AS beneficiary_count " + "FROM analytics.program_reach " + "WHERE program_name = 'Education' " + "GROUP BY program_name " + "LIMIT 50" + ), + reason="Uses the allowlisted chart table with an exact filter value.", + ) + + def compose_answer( + self, + user_query, + dashboard_summary, + retrieved_documents, + sql, + sql_results, + warnings, + related_dashboard_titles, + ): + assert sql is not None + assert sql_results == [{"program_name": "Education", "beneficiary_count": 120}] + return "Education has 120 beneficiaries on the current dashboard." + + +@pytest.fixture +def org(): + organization = Org.objects.create( + name="Dashboard Chat Org", + slug="dashchat", + airbyte_workspace_id="workspace-1", + ) + yield organization + organization.delete() + + +@pytest.fixture +def orguser(org, seed_db): + user = User.objects.create( + username="dashchat-user", + email="dashchat-user@test.com", + password="testpassword", + ) + org_user = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield org_user + org_user.delete() + user.delete() + + +@pytest.fixture +def primary_chart(org, orguser): + chart = Chart.objects.create( + title="Program Reach", + description="Monthly reach", + chart_type="line", + schema_name="analytics", + table_name="program_reach", + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield chart + chart.delete() + + +@pytest.fixture +def related_chart(org, orguser): + chart = Chart.objects.create( + title="Funding by Donor", + description="Donor funding mix", + chart_type="bar", + schema_name="analytics", + table_name="donor_funding", + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield chart + chart.delete() + + +@pytest.fixture +def primary_dashboard(org, orguser, primary_chart): + dashboard = Dashboard.objects.create( + title="Impact Overview", + description="Program KPIs and reach", + dashboard_type="native", + components={ + "chart-1": { + "id": "chart-1", + "type": "chart", + "config": {"chartId": primary_chart.id}, + } + }, + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield dashboard + dashboard.delete() + + +@pytest.fixture +def related_dashboard(org, orguser, related_chart): + dashboard = Dashboard.objects.create( + title="Funding Overview", + description="Funding KPIs and donor mix", + dashboard_type="native", + components={ + "chart-2": { + "id": "chart-2", + "type": "chart", + "config": {"chartId": related_chart.id}, + } + }, + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield dashboard + dashboard.delete() + + +def test_allowlist_adds_upstream_dbt_tables(): + """Allowlist should include chart tables and their upstream dbt lineage.""" + export_payload = { + "dashboard": {"title": "Impact Overview"}, + "charts": [{"id": 1, "schema_name": "analytics", "table_name": "fact_reach"}], + } + manifest_json = { + "nodes": { + "model.dalgo.fact_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "fact_reach", + "depends_on": {"nodes": ["model.dalgo.dim_program", "source.dalgo.raw_students"]}, + }, + "model.dalgo.dim_program": { + "resource_type": "model", + "schema": "analytics", + "name": "dim_program", + "depends_on": {"nodes": []}, + }, + }, + "sources": { + "source.dalgo.raw_students": { + "resource_type": "source", + "schema": "raw", + "name": "students", + } + }, + } + + allowlist = DashboardChatAllowlistBuilder.build(export_payload, manifest_json=manifest_json) + + assert allowlist.chart_tables == {"analytics.fact_reach"} + assert "analytics.dim_program" in allowlist.upstream_tables + assert "raw.students" in allowlist.allowed_tables + assert allowlist.is_allowed("analytics.fact_reach") is True + assert allowlist.is_unique_id_allowed("model.dalgo.dim_program") is True + + +def test_sql_guard_enforces_single_statement_allowlist_and_limit(): + """SQL guard should block unsafe queries and add a row limit when absent.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + multi_statement = guard.validate( + "SELECT * FROM analytics.program_reach; DELETE FROM analytics.program_reach" + ) + assert multi_statement.is_valid is False + assert multi_statement.errors == ["Multiple statements are not allowed"] + + disallowed_table = guard.validate("SELECT * FROM analytics.other_table") + assert disallowed_table.is_valid is False + assert any("not accessible" in error for error in disallowed_table.errors) + + allowed_query = guard.validate("SELECT COUNT(*) AS beneficiary_count FROM analytics.program_reach") + assert allowed_query.is_valid is True + assert allowed_query.sanitized_sql.endswith("LIMIT 200") + assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) + + +def test_runtime_context_query_returns_citations_and_related_dashboards( + org, + primary_dashboard, + related_dashboard, +): + """Context questions should return citations and cross-dashboard suggestions.""" + transaction.commit() + vector_store = FakeVectorStore( + [ + DashboardChatVectorQueryResult( + document_id="doc-dashboard-context", + content="This dashboard tracks monthly reach across programs.", + metadata={ + "source_type": "dashboard_context", + "source_identifier": f"dashboard:{primary_dashboard.id}:context", + "dashboard_id": primary_dashboard.id, + }, + distance=0.02, + ), + DashboardChatVectorQueryResult( + document_id="doc-org-context", + content="Dalgo supports NGO dashboards and program reporting.", + metadata={ + "source_type": "org_context", + "source_identifier": f"org:{org.id}:context", + }, + distance=0.04, + ), + DashboardChatVectorQueryResult( + document_id="doc-related-dashboard", + content="This dashboard shows donor-wise funding and cashflow trends.", + metadata={ + "source_type": "dashboard_export", + "source_identifier": f"dashboard:{related_dashboard.id}:summary", + "dashboard_id": related_dashboard.id, + }, + distance=0.05, + ), + ] + ) + + runtime = DashboardChatRuntime( + vector_store=vector_store, + llm_client=ContextOnlyLlm(), + runtime_config=DashboardChatRuntimeConfig( + retrieval_limit=6, + related_dashboard_limit=2, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), + ) + + response = runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="Explain the reach metric", + ) + + assert response.intent == DashboardChatIntent.CONTEXT_QUERY + assert response.sql is None + assert response.metadata["query_plan_mode"] == "context" + assert len(response.citations) >= 2 + assert response.citations[0].source_type == "dashboard_context" + assert response.related_dashboards[0].dashboard_id == related_dashboard.id + assert response.related_dashboards[0].title == "Funding Overview" + + +def test_runtime_data_query_uses_distinct_values_before_sql_execution( + org, + primary_dashboard, +): + """Data questions should fetch distinct values before generating and executing SQL.""" + transaction.commit() + vector_store = FakeVectorStore( + [ + DashboardChatVectorQueryResult( + document_id="doc-dashboard-export", + content="Chart id: 1. Data source: analytics.program_reach.", + metadata={ + "source_type": "dashboard_export", + "source_identifier": f"dashboard:{primary_dashboard.id}:chart:1", + "dashboard_id": primary_dashboard.id, + }, + distance=0.01, + ) + ] + ) + fake_warehouse = FakeWarehouseTools() + + runtime = DashboardChatRuntime( + vector_store=vector_store, + llm_client=SqlPathLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, + runtime_config=DashboardChatRuntimeConfig( + retrieval_limit=6, + related_dashboard_limit=2, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), + ) + + response = runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="How many beneficiaries are in Education?", + conversation_history=[ + DashboardChatConversationMessage(role="user", content="Show me beneficiary data") + ], + ) + + assert fake_warehouse.distinct_requests == [("analytics.program_reach", "program_name", 20)] + assert len(fake_warehouse.executed_sql) == 1 + assert "WHERE program_name = 'Education'" in fake_warehouse.executed_sql[0] + assert response.intent == DashboardChatIntent.DATA_QUERY + assert response.sql is not None + assert response.metadata["query_plan_mode"] == "sql" + assert any(citation.source_type == "warehouse_table" for citation in response.citations) From 58b817feffb7f2785f2d9850a3c421c1be0b349b Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 06:51:35 +0530 Subject: [PATCH 09/49] feat(ai-chat): add dashboard chat websocket transport --- ddpui/celeryworkers/tasks.py | 104 ++++++++++++ ddpui/core/dashboard_chat/events.py | 48 ++++++ ddpui/core/dashboard_chat/session_service.py | 136 ++++++++++++++++ ddpui/tests/core/dashboard_chat/test_tasks.py | 81 ++++++++++ .../test_dashboard_chat_consumer.py | 78 +++++++++ ddpui/urls.py | 2 + ddpui/websockets/dashboard_chat_consumer.py | 148 ++++++++++++++++++ 7 files changed, 597 insertions(+) create mode 100644 ddpui/core/dashboard_chat/events.py create mode 100644 ddpui/core/dashboard_chat/session_service.py create mode 100644 ddpui/tests/websockets/test_dashboard_chat_consumer.py create mode 100644 ddpui/websockets/dashboard_chat_consumer.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index c9360d6b8..14a002ba3 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -98,6 +98,16 @@ from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.core import llm_service from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService +from ddpui.core.dashboard_chat.events import ( + build_dashboard_chat_event, + publish_dashboard_chat_event, +) +from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime +from ddpui.core.dashboard_chat.session_service import ( + create_dashboard_chat_assistant_message, + list_dashboard_chat_history, + serialize_dashboard_chat_message, +) from ddpui.utils.helpers import ( find_key_in_dictionary, convert_sqlalchemy_rows_to_csv_string, @@ -1337,6 +1347,100 @@ def build_dashboard_chat_context_for_org(self, org_id: int): logger.exception("failed to release dashboard chat context build lock for org=%s", org_id) +@app.task +def run_dashboard_chat_turn(session_id: str, user_message_id: int): + """Run one dashboard chat turn asynchronously and emit websocket events.""" + from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession + + session = ( + DashboardChatSession.objects.select_related("org", "dashboard", "orguser") + .filter(session_id=session_id) + .first() + ) + if session is None or session.dashboard is None: + logger.warning( + "dashboard chat turn skipped because session %s was not found or has no dashboard", + session_id, + ) + return {"status": "skipped_missing_session", "session_id": session_id} + + user_message = ( + DashboardChatMessage.objects.filter( + id=user_message_id, + session=session, + role="user", + ) + .first() + ) + if user_message is None: + logger.warning( + "dashboard chat turn skipped because message %s was not found in session %s", + user_message_id, + session_id, + ) + return {"status": "skipped_missing_message", "session_id": session_id} + + try: + response = DashboardChatRuntime().run( + org=session.org, + dashboard_id=session.dashboard.id, + user_query=user_message.content, + conversation_history=list_dashboard_chat_history( + session, + exclude_message_id=user_message.id, + ), + ) + assistant_payload = { + "intent": response.intent.value, + "citations": [citation.to_dict() for citation in response.citations], + "related_dashboards": [ + related_dashboard.to_dict() + for related_dashboard in response.related_dashboards + ], + "warnings": response.warnings, + "sql": response.sql, + "sql_results": response.sql_results, + "metadata": response.metadata, + } + assistant_message = create_dashboard_chat_assistant_message( + session=session, + content=response.answer_text, + payload=assistant_payload, + ) + publish_dashboard_chat_event( + str(session.session_id), + build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=session.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ), + ) + return { + "status": "completed", + "session_id": str(session.session_id), + "assistant_message_id": assistant_message.id, + } + except Exception: + logger.exception( + "dashboard chat turn failed for session=%s message_id=%s", + session_id, + user_message_id, + ) + publish_dashboard_chat_event( + str(session.session_id), + build_dashboard_chat_event( + event_type="error", + session_id=str(session.session_id), + dashboard_id=session.dashboard.id, + message_id=str(user_message.id), + data={"message": "Something went wrong while generating the response"}, + ), + ) + raise + + @app.on_after_finalize.connect def setup_periodic_tasks(sender: Celery, **kwargs): """periodic celery tasks""" diff --git a/ddpui/core/dashboard_chat/events.py b/ddpui/core/dashboard_chat/events.py new file mode 100644 index 000000000..ec3ddec5f --- /dev/null +++ b/ddpui/core/dashboard_chat/events.py @@ -0,0 +1,48 @@ +"""Websocket event helpers for dashboard chat.""" + +import json + +from asgiref.sync import async_to_sync +from channels.layers import get_channel_layer +from django.utils import timezone + + +def dashboard_chat_group_name(session_id: str) -> str: + """Return the channel-layer group name for a dashboard chat session.""" + return f"dashboard_chat_{session_id}" + + +def build_dashboard_chat_event( + *, + event_type: str, + dashboard_id: int, + data: dict, + session_id: str | None = None, + message_id: str | None = None, +) -> dict: + """Build a dashboard chat websocket event envelope.""" + event = { + "event_type": event_type, + "dashboard_id": dashboard_id, + "occurred_at": timezone.now().isoformat(), + "data": data, + } + if session_id is not None: + event["session_id"] = session_id + if message_id is not None: + event["message_id"] = message_id + return event + + +def publish_dashboard_chat_event(session_id: str, event: dict) -> None: + """Publish a dashboard chat event to the session channel-layer group.""" + channel_layer = get_channel_layer() + if channel_layer is None: + return + async_to_sync(channel_layer.group_send)( + dashboard_chat_group_name(session_id), + { + "type": "dashboard_chat_event", + "event": json.dumps(event), + }, + ) diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/session_service.py new file mode 100644 index 000000000..3cda6fb11 --- /dev/null +++ b/ddpui/core/dashboard_chat/session_service.py @@ -0,0 +1,136 @@ +"""Session and message persistence helpers for dashboard chat.""" + +from uuid import UUID + +from django.db import transaction +from django.db.models import Max +from django.utils import timezone + +from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import ( + DashboardChatMessage, + DashboardChatMessageRole, + DashboardChatSession, +) +from ddpui.models.org_user import OrgUser + + +class DashboardChatSessionError(Exception): + """Raised when a dashboard chat session cannot be created or reused.""" + + +def get_or_create_dashboard_chat_session( + *, + orguser: OrgUser, + dashboard: Dashboard, + session_id: str | None, +) -> DashboardChatSession: + """Create a new session or validate an existing one for the current dashboard.""" + if session_id is None: + return DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=dashboard, + ) + + try: + session_uuid = UUID(str(session_id)) + except ValueError as error: + raise DashboardChatSessionError("Invalid session_id") from error + + session = DashboardChatSession.objects.filter( + session_id=session_uuid, + org=orguser.org, + dashboard=dashboard, + ).first() + if session is None: + raise DashboardChatSessionError("Chat session not found for this dashboard") + return session + + +def create_dashboard_chat_user_message( + *, + session: DashboardChatSession, + content: str, + client_message_id: str | None, +) -> DashboardChatMessage: + """Persist one user message and advance the session timestamp.""" + return _create_dashboard_chat_message( + session=session, + role=DashboardChatMessageRole.USER.value, + content=content, + client_message_id=client_message_id, + payload=None, + ) + + +def create_dashboard_chat_assistant_message( + *, + session: DashboardChatSession, + content: str, + payload: dict | None, +) -> DashboardChatMessage: + """Persist one assistant message and advance the session timestamp.""" + return _create_dashboard_chat_message( + session=session, + role=DashboardChatMessageRole.ASSISTANT.value, + content=content, + client_message_id=None, + payload=payload, + ) + + +def list_dashboard_chat_history( + session: DashboardChatSession, + *, + exclude_message_id: int | None = None, +) -> list[DashboardChatConversationMessage]: + """Return prior session messages in the format expected by the runtime.""" + query = session.messages.order_by("sequence_number") + if exclude_message_id is not None: + query = query.exclude(id=exclude_message_id) + return [ + DashboardChatConversationMessage(role=message.role, content=message.content) + for message in query + ] + + +def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: + """Return the websocket payload shape for one persisted chat message.""" + return { + "id": str(message.id), + "role": message.role, + "content": message.content, + "payload": message.payload or {}, + "created_at": message.created_at.isoformat(), + } + + +def _create_dashboard_chat_message( + *, + session: DashboardChatSession, + role: str, + content: str, + client_message_id: str | None, + payload: dict | None, +) -> DashboardChatMessage: + """Create a session-scoped chat message with a stable next sequence number.""" + with transaction.atomic(): + locked_session = DashboardChatSession.objects.select_for_update().get(id=session.id) + next_sequence_number = ( + locked_session.messages.aggregate(max_sequence_number=Max("sequence_number"))[ + "max_sequence_number" + ] + or 0 + ) + 1 + message = DashboardChatMessage.objects.create( + session=locked_session, + sequence_number=next_sequence_number, + role=role, + content=content, + client_message_id=client_message_id, + payload=payload, + ) + DashboardChatSession.objects.filter(id=locked_session.id).update(updated_at=timezone.now()) + return message diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 9feef826c..60e65d24c 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -14,10 +14,14 @@ from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.celeryworkers.tasks import ( build_dashboard_chat_context_for_org, + run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionResult +from ddpui.core.dashboard_chat.runtime_types import DashboardChatIntent, DashboardChatResponse from ddpui.models.org import Org, OrgDbt +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org_preferences import OrgPreferences from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role @@ -62,6 +66,16 @@ def _create_org_dbt(org: Org) -> OrgDbt: return dbt +def _create_dashboard(orguser: OrgUser) -> Dashboard: + return Dashboard.objects.create( + title="Chat Dashboard", + dashboard_type="native", + created_by=orguser, + last_modified_by=orguser, + org=orguser.org, + ) + + def test_schedule_dashboard_chat_context_builds_enqueues_only_eligible_orgs(orguser): eligible_org = orguser.org _create_org_dbt(eligible_org) @@ -148,3 +162,70 @@ def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): assert result["source_document_counts"] == {"dashboard_export": 2} ingestion_service.ingest_org.assert_called_once() redis_lock.release.assert_called_once() + + +@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") +@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( + runtime_class, + publish_event, + orguser, +): + _create_org_dbt(orguser.org) + dashboard = _create_dashboard(orguser) + session = DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + runtime_class.return_value.run.return_value = DashboardChatResponse( + answer_text="Funding dropped because donor inflows slowed this quarter.", + intent=DashboardChatIntent.DATA_QUERY, + warnings=["Example warning"], + sql="SELECT 1", + sql_results=[{"value": 1}], + ) + + result = run_dashboard_chat_turn(str(session.session_id), user_message.id) + + assistant_message = DashboardChatMessage.objects.get(session=session, role="assistant") + assert assistant_message.sequence_number == 2 + assert assistant_message.content == "Funding dropped because donor inflows slowed this quarter." + assert assistant_message.payload["sql"] == "SELECT 1" + assert result["status"] == "completed" + publish_event.assert_called_once() + + +@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") +@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( + runtime_class, + publish_event, + orguser, +): + _create_org_dbt(orguser.org) + dashboard = _create_dashboard(orguser) + session = DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + runtime_class.return_value.run.side_effect = RuntimeError("boom") + + with pytest.raises(RuntimeError, match="boom"): + run_dashboard_chat_turn(str(session.session_id), user_message.id) + + assert DashboardChatMessage.objects.filter(session=session, role="assistant").count() == 0 + publish_event.assert_called_once() diff --git a/ddpui/tests/websockets/test_dashboard_chat_consumer.py b/ddpui/tests/websockets/test_dashboard_chat_consumer.py new file mode 100644 index 000000000..43b596012 --- /dev/null +++ b/ddpui/tests/websockets/test_dashboard_chat_consumer.py @@ -0,0 +1,78 @@ +import json +from unittest.mock import Mock, patch + +import pytest + +from ddpui.websockets.dashboard_chat_consumer import DashboardChatConsumer + + +def test_dashboard_chat_consumer_send_message_requires_message(): + consumer = DashboardChatConsumer() + consumer.send = Mock() + consumer.dashboard = Mock(id=42) + consumer.websocket_receive({"text": json.dumps({"action": "send_message"})}) + + payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + assert payload["event_type"] == "error" + assert payload["data"]["message"] == "Message is required" + + +def test_dashboard_chat_consumer_send_message_requires_available_chat(): + consumer = DashboardChatConsumer() + consumer.send = Mock() + consumer.dashboard = Mock(id=42) + consumer._chat_available = Mock(return_value=(False, "Chat unavailable")) + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + } + ) + } + ) + + payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + assert payload["event_type"] == "error" + assert payload["data"]["message"] == "Chat unavailable" + + +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_event") +@patch("ddpui.websockets.dashboard_chat_consumer.run_dashboard_chat_turn.delay") +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_send_message_creates_session_and_dispatches_task( + mock_get_or_create_session, + mock_create_user_message, + mock_delay, + mock_publish_event, +): + session = Mock(session_id="session-123") + user_message = Mock(id=17) + mock_get_or_create_session.return_value = session + mock_create_user_message.return_value = user_message + + consumer = DashboardChatConsumer() + consumer.dashboard = Mock(id=42) + consumer.orguser = Mock() + consumer._chat_available = Mock(return_value=(True, "")) + consumer._subscribe_to_session = Mock() + + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + mock_get_or_create_session.assert_called_once() + mock_create_user_message.assert_called_once() + consumer._subscribe_to_session.assert_called_once_with("session-123") + mock_publish_event.assert_called_once() + mock_delay.assert_called_once_with("session-123", 17) diff --git a/ddpui/urls.py b/ddpui/urls.py index 6a8508842..b6123cac5 100644 --- a/ddpui/urls.py +++ b/ddpui/urls.py @@ -10,6 +10,7 @@ from ddpui.core.datainsights.generate_result import DataInsightsConsumer from ddpui.websockets.airbyte_consumer import SchemaCatalogConsumer, SourceCheckConnectionConsumer from ddpui.websockets.airbyte_consumer import DestinationCheckConnectionConsumer +from ddpui.websockets.dashboard_chat_consumer import DashboardChatConsumer def trigger_error(request): # pylint: disable=unused-argument # skipcq PYK-W0612 @@ -42,4 +43,5 @@ def healthcheck(request): # pylint:disable=unused-argument DestinationCheckConnectionConsumer.as_asgi(), ), path("wss/airbyte/connection/schema_catalog", SchemaCatalogConsumer.as_asgi()), + path("wss/dashboards//chat/", DashboardChatConsumer.as_asgi()), ] diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py new file mode 100644 index 000000000..4671354c1 --- /dev/null +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -0,0 +1,148 @@ +import json +from urllib.parse import parse_qs + +from asgiref.sync import async_to_sync + +from ddpui.core.dashboard_chat.events import ( + build_dashboard_chat_event, + dashboard_chat_group_name, + publish_dashboard_chat_event, +) +from ddpui.core.dashboard_chat.session_service import ( + DashboardChatSessionError, + create_dashboard_chat_user_message, + get_or_create_dashboard_chat_session, +) +from ddpui.celeryworkers.tasks import run_dashboard_chat_turn +from ddpui.models.dashboard import Dashboard +from ddpui.models.org_preferences import OrgPreferences +from ddpui.models.role_based_access import RolePermission +from ddpui.utils.feature_flags import get_all_feature_flags_for_org +from ddpui.websockets import BaseConsumer + +class DashboardChatConsumer(BaseConsumer): + """Authenticated websocket for dashboard-level chat.""" + + def connect(self): + query_string = parse_qs(self.scope["query_string"].decode()) + token = query_string.get("token", [None])[0] + orgslug = query_string.get("orgslug", [None])[0] + self.joined_session_groups = set() + + if not self.authenticate_user(token, orgslug): + self.close() + return + + dashboard_id = self.scope.get("url_route", {}).get("kwargs", {}).get("dashboard_id") + self.dashboard = Dashboard.objects.filter(id=dashboard_id, org=self.orguser.org).first() + if self.dashboard is None or not self._has_permission("can_view_dashboards"): + self.close() + return + + self.accept() + + def websocket_receive(self, message): + """Handle incoming dashboard chat websocket actions.""" + try: + payload = json.loads(message["text"]) + except (KeyError, ValueError): + self._respond_error("Invalid websocket payload") + return + + if payload.get("action") != "send_message": + self._respond_error("Unsupported websocket action") + return + + raw_message = str(payload.get("message") or "").strip() + if not raw_message: + self._respond_error("Message is required") + return + + available, unavailable_message = self._chat_available() + if not available: + self._respond_error(unavailable_message) + return + + try: + session = get_or_create_dashboard_chat_session( + orguser=self.orguser, + dashboard=self.dashboard, + session_id=payload.get("session_id"), + ) + except DashboardChatSessionError as error: + self._respond_error(str(error)) + return + + user_message = create_dashboard_chat_user_message( + session=session, + content=raw_message, + client_message_id=payload.get("client_message_id"), + ) + self._subscribe_to_session(str(session.session_id)) + publish_dashboard_chat_event( + str(session.session_id), + build_dashboard_chat_event( + event_type="progress", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(user_message.id), + data={"label": "thinking"}, + ), + ) + run_dashboard_chat_turn.delay(str(session.session_id), user_message.id) + + def websocket_disconnect(self, message): + """Remove the socket from any joined session groups on disconnect.""" + if getattr(self, "channel_layer", None) is None: + return + for group_name in getattr(self, "joined_session_groups", set()): + async_to_sync(self.channel_layer.group_discard)(group_name, self.channel_name) + + def dashboard_chat_event(self, event): + """Forward dashboard chat events from the channel layer to the browser.""" + self.send(text_data=event["event"]) + + def _subscribe_to_session(self, session_id: str) -> None: + """Join the session-scoped channel-layer group if not already subscribed.""" + group_name = dashboard_chat_group_name(session_id) + if group_name in self.joined_session_groups: + return + async_to_sync(self.channel_layer.group_add)(group_name, self.channel_name) + self.joined_session_groups.add(group_name) + + def _respond_error(self, message: str) -> None: + """Send one direct websocket error event.""" + self.send( + text_data=json.dumps( + build_dashboard_chat_event( + event_type="error", + dashboard_id=self.dashboard.id if getattr(self, "dashboard", None) else None, + data={"message": message}, + ) + ) + ) + + def _chat_available(self) -> tuple[bool, str]: + """Return whether the current org is ready for dashboard chat.""" + feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get("AI_DASHBOARD_CHAT", False) + if not feature_enabled: + return False, "Chat with dashboards is not enabled for this organization" + + org_preferences = OrgPreferences.objects.filter(org=self.orguser.org).first() + if org_preferences is None or not org_preferences.ai_data_sharing_enabled: + return False, "Chat with dashboards is not enabled for this organization" + + if self.orguser.org.dbt is None: + return False, "Chat with dashboards is not available because dbt is not configured" + + if self.orguser.org.dbt.vector_last_ingested_at is None: + return False, "Chat with dashboards is still being prepared for this organization" + + return True, "" + + def _has_permission(self, permission_slug: str) -> bool: + """Check the authenticated orguser's role permission directly.""" + return RolePermission.objects.filter( + role=self.orguser.new_role, + permission__slug=permission_slug, + ).exists() From 44b00ea9d7fc8b9009fa723c0b6cf01699e9ea9a Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 07:41:05 +0530 Subject: [PATCH 10/49] fix(ai-chat): allow dashboard chat websocket auth via access token cookie --- ddpui/tests/websockets/test_base_consumer.py | 38 ++++++++++++++++++++ ddpui/websockets/__init__.py | 16 +++++++++ 2 files changed, 54 insertions(+) create mode 100644 ddpui/tests/websockets/test_base_consumer.py diff --git a/ddpui/tests/websockets/test_base_consumer.py b/ddpui/tests/websockets/test_base_consumer.py new file mode 100644 index 000000000..a98ab9cbb --- /dev/null +++ b/ddpui/tests/websockets/test_base_consumer.py @@ -0,0 +1,38 @@ +from unittest.mock import Mock, patch + +import pytest + +from ddpui.websockets import BaseConsumer + + +@patch("ddpui.websockets.AccessToken") +@patch("ddpui.websockets.User.objects.filter") +@patch("ddpui.websockets.OrgUser.objects.filter") +def test_base_consumer_authenticate_user_uses_cookie_token_when_query_token_missing( + mock_orguser_filter, + mock_user_filter, + mock_access_token, +): + user = Mock(email="test@example.com") + orguser = Mock() + mock_access_token.return_value.payload = {"user_id": 42} + mock_user_filter.return_value.first.return_value = user + mock_orguser_filter.return_value.filter.return_value.first.return_value = orguser + + consumer = BaseConsumer() + consumer.scope = { + "headers": [ + (b"cookie", b"csrftoken=test; access_token=cookie-token; refresh_token=refresh-token") + ] + } + + assert consumer.authenticate_user(None, "test-org") is True + mock_access_token.assert_called_once_with("cookie-token") + + +def test_base_consumer_get_cookie_token_returns_none_without_cookie_header(): + consumer = BaseConsumer() + consumer.scope = {"headers": []} + + assert consumer._get_cookie_token() is None + diff --git a/ddpui/websockets/__init__.py b/ddpui/websockets/__init__.py index e263b4792..e918fb5e5 100644 --- a/ddpui/websockets/__init__.py +++ b/ddpui/websockets/__init__.py @@ -1,4 +1,5 @@ import json +from http.cookies import SimpleCookie from channels.generic.websocket import WebsocketConsumer from rest_framework_simplejwt.tokens import AccessToken from urllib.parse import parse_qs @@ -12,10 +13,25 @@ class BaseConsumer(WebsocketConsumer): + def _get_cookie_token(self): + """Return the access token from the websocket cookie header when present.""" + for header_name, header_value in self.scope.get("headers", []): + if header_name != b"cookie": + continue + + cookie = SimpleCookie() + cookie.load(header_value.decode()) + access_token = cookie.get("access_token") + if access_token is not None: + return access_token.value + + return None + def authenticate_user(self, token: str, orgslug: str): """Authenticate user using JWT token""" self.orguser = None self.user = None + token = token or self._get_cookie_token() try: # Validate and decode JWT using SimpleJWT's AccessToken From 99bec8093dd1183bfe94c5a4e209a09c09b3bd33 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 09:27:31 +0530 Subject: [PATCH 11/49] fix(ai-chat): align runtime safety and gating with approved plan --- ddpui/api/dashboard_native_api.py | 18 +++ ddpui/api/org_preferences_api.py | 18 +++ ddpui/core/dashboard_chat/config.py | 47 ++++++ ddpui/core/dashboard_chat/ingestion.py | 10 +- ddpui/core/dashboard_chat/runtime.py | 33 ++-- ddpui/core/dashboard_chat/session_service.py | 1 + ddpui/core/dashboard_chat/sql_guard.py | 112 +++++++++++++ .../test_dashboard_chat_settings_api.py | 59 +++++++ .../core/dashboard_chat/test_ingestion.py | 47 ++++++ .../tests/core/dashboard_chat/test_runtime.py | 152 +++++++++++++++++- .../dashboard_chat/test_session_service.py | 128 +++++++++++++++ 11 files changed, 614 insertions(+), 11 deletions(-) create mode 100644 ddpui/tests/core/dashboard_chat/test_session_service.py diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index 8d01610e8..c2702b14d 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -17,9 +17,11 @@ DashboardFilterType, ) from ddpui.models.dashboard_chat import DashboardAIContext +from ddpui.models.org_preferences import OrgPreferences from ddpui.models.org_user import OrgUser from ddpui.auth import has_permission from ddpui.utils.custom_logger import CustomLogger +from ddpui.utils.feature_flags import get_all_feature_flags_for_org from ddpui.services.dashboard_service import ( DashboardService, DashboardData, @@ -74,6 +76,19 @@ def _serialize_dashboard_ai_context(dashboard: Dashboard, context: DashboardAICo ) +def _ensure_dashboard_chat_feature_enabled(org) -> None: + """Hide dashboard chat settings endpoints unless the feature flag is enabled.""" + if not get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False): + raise HttpError(404, "Chat with dashboards is not enabled for this organization") + + +def _ensure_dashboard_chat_consent_enabled(org) -> None: + """Require consent before writing dashboard-specific AI context.""" + org_preferences = OrgPreferences.objects.filter(org=org).first() + if org_preferences is None or not org_preferences.ai_data_sharing_enabled: + raise HttpError(409, "Enable AI data sharing before updating dashboard AI context") + + # Endpoints @dashboard_native_router.get("/", response=List[DashboardResponse]) @has_permission(["can_view_dashboards"]) @@ -130,6 +145,7 @@ def export_dashboard(request, dashboard_id: int): def get_dashboard_ai_context(request, dashboard_id: int): """Load dashboard-level AI context settings for settings management.""" orguser: OrgUser = request.orguser + _ensure_dashboard_chat_feature_enabled(orguser.org) try: dashboard = DashboardService.get_dashboard(dashboard_id, orguser.org) @@ -154,6 +170,8 @@ def update_dashboard_ai_context( ): """Update dashboard-level AI context markdown for settings management.""" orguser: OrgUser = request.orguser + _ensure_dashboard_chat_feature_enabled(orguser.org) + _ensure_dashboard_chat_consent_enabled(orguser.org) try: dashboard = DashboardService.get_dashboard(dashboard_id, orguser.org) diff --git a/ddpui/api/org_preferences_api.py b/ddpui/api/org_preferences_api.py index dee146b20..30d278c73 100644 --- a/ddpui/api/org_preferences_api.py +++ b/ddpui/api/org_preferences_api.py @@ -50,6 +50,12 @@ def _is_dashboard_chat_feature_enabled(org) -> bool: return get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) +def _ensure_dashboard_chat_feature_enabled(org) -> None: + """Hide dashboard chat management APIs unless the feature flag is enabled.""" + if not _is_dashboard_chat_feature_enabled(org): + raise HttpError(404, "Chat with dashboards is not enabled for this organization") + + def _is_dbt_configured(org) -> bool: return org.dbt is not None @@ -213,6 +219,7 @@ def get_ai_dashboard_chat_settings(request): orguser: OrgUser = request.orguser org = orguser.org + _ensure_dashboard_chat_feature_enabled(org) org_preferences = _get_or_create_org_preferences(org) org_context = _get_or_create_org_ai_context(org) @@ -230,8 +237,14 @@ def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChat orguser: OrgUser = request.orguser org = orguser.org + _ensure_dashboard_chat_feature_enabled(org) org_preferences = _get_or_create_org_preferences(org) org_context = _get_or_create_org_ai_context(org) + target_ai_data_sharing_enabled = ( + payload.ai_data_sharing_enabled + if payload.ai_data_sharing_enabled is not None + else org_preferences.ai_data_sharing_enabled + ) if ( payload.ai_data_sharing_enabled is True @@ -244,6 +257,11 @@ def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChat org_preferences.ai_data_sharing_enabled = payload.ai_data_sharing_enabled if payload.org_context_markdown is not None: + if not target_ai_data_sharing_enabled: + raise HttpError( + 409, + "Enable AI data sharing before updating organization AI context", + ) org_context.markdown = payload.org_context_markdown org_context.updated_by = orguser org_context.updated_at = timezone.now() diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 37c69a373..d175e6657 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -1,8 +1,11 @@ """Configuration helpers for dashboard chat infrastructure.""" from dataclasses import dataclass +from enum import Enum import os +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType + def _parse_bool(value: str | None, default: bool) -> bool: """Parse a boolean env var using Dalgo's common truthy values.""" @@ -11,6 +14,50 @@ def _parse_bool(value: str | None, default: bool) -> bool: return value.strip().lower() in {"1", "true", "yes", "on"} +def _parse_csv_env(value: str | None) -> tuple[str, ...] | None: + """Parse a comma-separated env var into a normalized tuple.""" + if value is None: + return None + parsed_values = tuple( + item.strip().lower() for item in value.split(",") if item and item.strip() + ) + return parsed_values or None + + +@dataclass(frozen=True) +class DashboardChatSourceConfig: + """Environment-backed enablement for retrieval source types.""" + + enabled_source_types: tuple[str, ...] = tuple( + source_type.value for source_type in DashboardChatSourceType + ) + + @classmethod + def from_env(cls) -> "DashboardChatSourceConfig": + """Build source-type config from environment variables.""" + env_value = _parse_csv_env(os.getenv("AI_DASHBOARD_CHAT_ENABLED_SOURCE_TYPES")) + return cls( + enabled_source_types=env_value + or tuple(source_type.value for source_type in DashboardChatSourceType) + ) + + def is_enabled(self, source_type: DashboardChatSourceType | str) -> bool: + """Return whether the given source type should participate in runtime work.""" + source_type_value = source_type.value if isinstance(source_type, Enum) else source_type + return source_type_value in self.enabled_source_types + + def filter_enabled( + self, + source_types: list[DashboardChatSourceType | str] | tuple[DashboardChatSourceType | str, ...], + ) -> list[str]: + """Keep only the configured source types from a requested set.""" + return [ + source_type.value if isinstance(source_type, Enum) else source_type + for source_type in source_types + if self.is_enabled(source_type) + ] + + @dataclass(frozen=True) class DashboardChatVectorStoreConfig: """Environment-backed configuration for the Chroma sidecar and embeddings.""" diff --git a/ddpui/core/dashboard_chat/ingestion.py b/ddpui/core/dashboard_chat/ingestion.py index 2a427c722..01df96317 100644 --- a/ddpui/core/dashboard_chat/ingestion.py +++ b/ddpui/core/dashboard_chat/ingestion.py @@ -11,6 +11,7 @@ DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) +from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, @@ -102,9 +103,11 @@ def __init__( self, vector_store: ChromaDashboardChatVectorStore | None = None, dbt_docs_generator: Callable[[Org, object], DashboardChatDbtDocsArtifacts] | None = None, + source_config: DashboardChatSourceConfig | None = None, ): self.vector_store = vector_store or ChromaDashboardChatVectorStore() self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts + self.source_config = source_config or DashboardChatSourceConfig.from_env() def ingest_org(self, org: Org) -> DashboardChatIngestionResult: """Run dbt docs generation and rebuild the desired vector documents for an org.""" @@ -116,6 +119,7 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: desired_documents = [ document for source_type in INGEST_SOURCE_ORDER + if self.source_config.is_enabled(source_type) for document in documents_by_source[source_type.value] ] @@ -145,7 +149,11 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: docs_generated_at=dbt_docs.generated_at, vector_ingested_at=vector_ingested_at, source_document_counts={ - source_type.value: len(documents_by_source[source_type.value]) + source_type.value: ( + len(documents_by_source[source_type.value]) + if self.source_config.is_enabled(source_type) + else 0 + ) for source_type in INGEST_SOURCE_ORDER }, upserted_document_ids=upserted_document_ids, diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py index 30a20f8d2..0ec8e1c8b 100644 --- a/ddpui/core/dashboard_chat/runtime.py +++ b/ddpui/core/dashboard_chat/runtime.py @@ -11,6 +11,7 @@ DashboardChatAllowlistBuilder, ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig +from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.llm_client import ( DashboardChatLlmClient, OpenAIDashboardChatLlmClient, @@ -105,8 +106,10 @@ def __init__( llm_client: DashboardChatLlmClient | None = None, warehouse_tools_factory: Callable[[Org], DashboardChatWarehouseTools] | None = None, runtime_config: DashboardChatRuntimeConfig | None = None, + source_config: DashboardChatSourceConfig | None = None, ): self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() + self.source_config = source_config or DashboardChatSourceConfig.from_env() self.vector_store = vector_store or ChromaDashboardChatVectorStore() self.llm_client = llm_client or OpenAIDashboardChatLlmClient( model=self.runtime_config.llm_model, @@ -230,25 +233,31 @@ def _node_retrieve_docs(self, state: DashboardChatRuntimeState) -> DashboardChat dashboard_results = self._query_vector_store( org=org, query_text=state["user_query"], - source_types=[ + source_types=self.source_config.filter_enabled( + [ DashboardChatSourceType.DASHBOARD_EXPORT.value, DashboardChatSourceType.DASHBOARD_CONTEXT.value, - ], + ] + ), dashboard_id=state["dashboard_id"], ) org_results = self._query_vector_store( org=org, query_text=state["user_query"], - source_types=[DashboardChatSourceType.ORG_CONTEXT.value], + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.ORG_CONTEXT.value] + ), ) dbt_results = self._filter_allowlisted_dbt_results( self._query_vector_store( org=org, query_text=state["user_query"], - source_types=[ + source_types=self.source_config.filter_enabled( + [ DashboardChatSourceType.DBT_MANIFEST.value, DashboardChatSourceType.DBT_CATALOG.value, - ], + ] + ), ), state["allowlist"], ) @@ -721,14 +730,20 @@ def _build_related_dashboards( query_text: str, ) -> list[DashboardChatRelatedDashboard]: """Suggest other dashboards with matching retrieved context.""" + related_dashboard_source_types = self.source_config.filter_enabled( + [ + DashboardChatSourceType.DASHBOARD_CONTEXT.value, + DashboardChatSourceType.DASHBOARD_EXPORT.value, + ] + ) + if not related_dashboard_source_types: + return [] + related_results = self.vector_store.query( org.id, query_text=query_text, n_results=self.runtime_config.related_dashboard_limit * 4, - source_types=[ - DashboardChatSourceType.DASHBOARD_CONTEXT.value, - DashboardChatSourceType.DASHBOARD_EXPORT.value, - ], + source_types=related_dashboard_source_types, ) candidate_dashboard_ids = [ result.metadata.get("dashboard_id") diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/session_service.py index 3cda6fb11..57f918117 100644 --- a/ddpui/core/dashboard_chat/session_service.py +++ b/ddpui/core/dashboard_chat/session_service.py @@ -42,6 +42,7 @@ def get_or_create_dashboard_chat_session( session = DashboardChatSession.objects.filter( session_id=session_uuid, org=orguser.org, + orguser=orguser, dashboard=dashboard, ).first() if session is None: diff --git a/ddpui/core/dashboard_chat/sql_guard.py b/ddpui/core/dashboard_chat/sql_guard.py index 579da2e8d..256437a6a 100644 --- a/ddpui/core/dashboard_chat/sql_guard.py +++ b/ddpui/core/dashboard_chat/sql_guard.py @@ -23,6 +23,14 @@ "VACUUM", } +AGGREGATE_FUNCTION_PATTERNS = ( + r"\bCOUNT\s*\(", + r"\bSUM\s*\(", + r"\bAVG\s*\(", + r"\bMIN\s*\(", + r"\bMAX\s*\(", +) + PII_PATTERNS = [ r"\b(name|phone|email|address|national_id|id_number)\b", r"\b(contact|mobile|telephone|personal|identification)\b", @@ -79,6 +87,11 @@ def validate(self, sql: str) -> DashboardChatSqlValidationResult: if re.search(r"\bSELECT\s+\*", sql_upper): warnings.append("SELECT * detected. Prefer explicit column lists.") + if self._selects_row_level_pii(sanitized_sql): + errors.append( + "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." + ) + for pii_pattern in PII_PATTERNS: if re.search(pii_pattern, sanitized_sql, re.IGNORECASE): warnings.append(f"Query may touch PII-like columns matching {pii_pattern}.") @@ -128,3 +141,102 @@ def _extract_table_names(cls, sql: str) -> list[str]: tables.append(table_name.lower()) return list(dict.fromkeys(tables)) + + @classmethod + def _selects_row_level_pii(cls, sql: str) -> bool: + """Detect row-level sensitive fields in the outer SELECT list.""" + select_clause = cls._extract_outer_select_clause(sql) + if not select_clause: + return False + + for expression in cls._split_select_expressions(select_clause): + normalized_expression = expression.strip() + if not normalized_expression: + continue + if cls._contains_aggregate(normalized_expression): + continue + if any( + re.search(pii_pattern, normalized_expression, re.IGNORECASE) + for pii_pattern in PII_PATTERNS + ): + return True + return False + + @staticmethod + def _extract_outer_select_clause(sql: str) -> str | None: + """Return the outer-most SELECT projection segment.""" + sql_upper = sql.upper() + depth = 0 + select_start: int | None = None + + for index, character in enumerate(sql_upper): + if character == "(": + depth += 1 + continue + if character == ")": + depth = max(depth - 1, 0) + continue + + if depth == 0 and DashboardChatSqlGuard._matches_keyword(sql_upper, index, "SELECT"): + select_start = index + len("SELECT") + break + + if select_start is None: + return None + + depth = 0 + for index in range(select_start, len(sql_upper)): + character = sql_upper[index] + if character == "(": + depth += 1 + continue + if character == ")": + depth = max(depth - 1, 0) + continue + if depth == 0 and DashboardChatSqlGuard._matches_keyword(sql_upper, index, "FROM"): + return sql[select_start:index].strip() + + return None + + @staticmethod + def _matches_keyword(sql_upper: str, index: int, keyword: str) -> bool: + """Check whether a keyword occurs at a top-level position.""" + keyword_end = index + len(keyword) + if sql_upper[index:keyword_end] != keyword: + return False + + previous_character = sql_upper[index - 1] if index > 0 else " " + next_character = sql_upper[keyword_end] if keyword_end < len(sql_upper) else " " + return not (previous_character.isalnum() or previous_character == "_") and not ( + next_character.isalnum() or next_character == "_" + ) + + @staticmethod + def _split_select_expressions(select_clause: str) -> list[str]: + """Split a SELECT clause by top-level commas only.""" + expressions: list[str] = [] + current_expression: list[str] = [] + depth = 0 + + for character in select_clause: + if character == "(": + depth += 1 + elif character == ")": + depth = max(depth - 1, 0) + elif character == "," and depth == 0: + expressions.append("".join(current_expression).strip()) + current_expression = [] + continue + current_expression.append(character) + + if current_expression: + expressions.append("".join(current_expression).strip()) + return expressions + + @staticmethod + def _contains_aggregate(expression: str) -> bool: + """Treat aggregate projections as safe even if they mention sensitive columns.""" + return any( + re.search(pattern, expression, re.IGNORECASE) + for pattern in AGGREGATE_FUNCTION_PATTERNS + ) diff --git a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py index 25f9846eb..74c026bd2 100644 --- a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py +++ b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py @@ -146,6 +146,7 @@ def test_get_ai_dashboard_chat_settings_returns_enveloped_response(orguser, seed def test_update_ai_dashboard_chat_settings_stamps_consent_and_context(orguser, seed_db): request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) payload = UpdateOrgAIDashboardChatSchema( ai_data_sharing_enabled=True, org_context_markdown="## Org context", @@ -216,6 +217,7 @@ def test_get_ai_dashboard_chat_settings_requires_permission(guest_orguser, seed_ def test_get_dashboard_ai_context_returns_direct_payload(orguser, dashboard, seed_db): request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) response = get_dashboard_ai_context(request, dashboard.id) @@ -228,6 +230,13 @@ def test_get_dashboard_ai_context_returns_direct_payload(orguser, dashboard, see def test_update_dashboard_ai_context_persists_context(orguser, dashboard, seed_db): request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) + OrgPreferences.objects.create( + org=orguser.org, + ai_data_sharing_enabled=True, + ai_data_sharing_consented_by=orguser, + ai_data_sharing_consented_at=timezone.now(), + ) payload = UpdateDashboardAIContextSchema(dashboard_context_markdown="## Dashboard context") response = update_dashboard_ai_context(request, dashboard.id, payload) @@ -254,9 +263,59 @@ def test_get_dashboard_ai_context_requires_permission(guest_orguser, dashboard, def test_get_dashboard_ai_context_is_org_scoped(orguser, other_org_dashboard, seed_db): request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) with pytest.raises(HttpError) as excinfo: get_dashboard_ai_context(request, other_org_dashboard.id) assert excinfo.value.status_code == 404 assert str(excinfo.value) == "Dashboard not found" + + +def test_dashboard_chat_settings_are_hidden_when_feature_flag_is_off(orguser, seed_db): + request = mock_request(orguser) + + with pytest.raises(HttpError) as excinfo: + get_ai_dashboard_chat_settings(request) + + assert excinfo.value.status_code == 404 + assert str(excinfo.value) == "Chat with dashboards is not enabled for this organization" + + +def test_update_ai_dashboard_chat_settings_rejects_context_without_consent(orguser, seed_db): + request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) + + with pytest.raises(HttpError) as excinfo: + update_ai_dashboard_chat_settings( + request, + UpdateOrgAIDashboardChatSchema(org_context_markdown="## Org context"), + ) + + assert excinfo.value.status_code == 409 + assert str(excinfo.value) == "Enable AI data sharing before updating organization AI context" + + +def test_dashboard_ai_context_is_hidden_when_feature_flag_is_off(orguser, dashboard, seed_db): + request = mock_request(orguser) + + with pytest.raises(HttpError) as excinfo: + get_dashboard_ai_context(request, dashboard.id) + + assert excinfo.value.status_code == 404 + assert str(excinfo.value) == "Chat with dashboards is not enabled for this organization" + + +def test_update_dashboard_ai_context_requires_ai_consent(orguser, dashboard, seed_db): + request = mock_request(orguser) + enable_feature_flag("AI_DASHBOARD_CHAT", org=orguser.org) + + with pytest.raises(HttpError) as excinfo: + update_dashboard_ai_context( + request, + dashboard.id, + UpdateDashboardAIContextSchema(dashboard_context_markdown="## Dashboard context"), + ) + + assert excinfo.value.status_code == 409 + assert str(excinfo.value) == "Enable AI data sharing before updating dashboard AI context" diff --git a/ddpui/tests/core/dashboard_chat/test_ingestion.py b/ddpui/tests/core/dashboard_chat/test_ingestion.py index e0aa5a50b..380f5eb4d 100644 --- a/ddpui/tests/core/dashboard_chat/test_ingestion.py +++ b/ddpui/tests/core/dashboard_chat/test_ingestion.py @@ -20,6 +20,7 @@ DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) +from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService from ddpui.core.dashboard_chat.vector_store import DashboardChatStoredDocument from ddpui.ddpdbt.schema import DbtProjectParams @@ -436,3 +437,49 @@ def _raise_on_upsert(org_id, documents): } assert remaining_ids == original_ids assert vector_store.delete_calls == [] + + +def test_ingest_org_deletes_disabled_source_documents(org, orgdbt, orguser, dashboard): + """Disabled source types should be omitted from the target document set.""" + OrgAIContext.objects.create( + org=org, + markdown="# Org context\n\nImportant org notes.", + updated_by=orguser, + updated_at=timezone.now(), + ) + DashboardAIContext.objects.create( + dashboard=dashboard, + markdown="## Dashboard context\n\nThis dashboard tracks monthly reach.", + updated_by=orguser, + updated_at=timezone.now(), + ) + vector_store = FakeDashboardChatVectorStore() + artifacts = StoredArtifacts( + manifest_json={"metadata": {"project_name": "dashchat"}, "sources": {}, "nodes": {}}, + catalog_json={"sources": {}, "nodes": {}}, + generated_at=timezone.now(), + ) + initial_service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), + ) + initial_service.ingest_org(org) + + disabled_source_service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), + source_config=DashboardChatSourceConfig( + enabled_source_types=("dashboard_context", "dashboard_export", "dbt_manifest") + ), + ) + + result = disabled_source_service.ingest_org(org) + stored_source_types = { + document.metadata["source_type"] + for document in vector_store.get_documents(org.id, include_documents=False) + } + + assert result.source_document_counts["org_context"] == 0 + assert result.source_document_counts["dbt_catalog"] == 0 + assert "org_context" not in stored_source_types + assert "dbt_catalog" not in stored_source_types diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index f9a24e677..ca9be08e0 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -13,7 +13,7 @@ from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist, DashboardChatAllowlistBuilder -from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig +from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.core.dashboard_chat.runtime_types import ( DashboardChatConversationMessage, @@ -191,6 +191,24 @@ def compose_answer( return "Education has 120 beneficiaries on the current dashboard." +class PiiSqlPathLlm(SqlPathLlm): + """LLM stub that generates an unsafe row-level PII query.""" + + def generate_sql( + self, + user_query, + dashboard_summary, + query_plan, + schema_prompt, + distinct_values, + allowlisted_tables, + ): + return DashboardChatSqlDraft( + sql="SELECT email FROM analytics.program_reach LIMIT 25", + reason="Returns raw email addresses.", + ) + + @pytest.fixture def org(): organization = Org.objects.create( @@ -353,6 +371,25 @@ def test_sql_guard_enforces_single_statement_allowlist_and_limit(): assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) +def test_sql_guard_rejects_row_level_pii_queries(): + """SQL guard should reject row-level projections of sensitive fields.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + pii_query = guard.validate( + "SELECT email, COUNT(*) AS beneficiary_count " + "FROM analytics.program_reach " + "GROUP BY email " + "LIMIT 50" + ) + + assert pii_query.is_valid is False + assert pii_query.sanitized_sql is None + assert pii_query.errors == [ + "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." + ] + + def test_runtime_context_query_returns_citations_and_related_dashboards( org, primary_dashboard, @@ -472,3 +509,116 @@ def test_runtime_data_query_uses_distinct_values_before_sql_execution( assert response.sql is not None assert response.metadata["query_plan_mode"] == "sql" assert any(citation.source_type == "warehouse_table" for citation in response.citations) + + +def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_dashboard): + """Unsafe PII SQL should never reach warehouse execution.""" + transaction.commit() + vector_store = FakeVectorStore( + [ + DashboardChatVectorQueryResult( + document_id="doc-dashboard-export", + content="Chart id: 1. Data source: analytics.program_reach.", + metadata={ + "source_type": "dashboard_export", + "source_identifier": f"dashboard:{primary_dashboard.id}:chart:1", + "dashboard_id": primary_dashboard.id, + }, + distance=0.01, + ) + ] + ) + fake_warehouse = FakeWarehouseTools() + + runtime = DashboardChatRuntime( + vector_store=vector_store, + llm_client=PiiSqlPathLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, + runtime_config=DashboardChatRuntimeConfig( + retrieval_limit=6, + related_dashboard_limit=2, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), + ) + + response = runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="List email addresses for this dashboard", + ) + + assert fake_warehouse.executed_sql == [] + assert response.sql is None + assert response.sql_results is None + assert "aggregate the results or rephrase" in response.answer_text + assert response.metadata["sql_guard_errors"] == [ + "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." + ] + + +def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashboard, related_dashboard): + """Disabled source types should not be queried from the vector store.""" + transaction.commit() + vector_store = FakeVectorStore( + [ + DashboardChatVectorQueryResult( + document_id="doc-dashboard-context", + content="This dashboard tracks monthly reach across programs.", + metadata={ + "source_type": "dashboard_context", + "source_identifier": f"dashboard:{primary_dashboard.id}:context", + "dashboard_id": primary_dashboard.id, + }, + distance=0.02, + ), + DashboardChatVectorQueryResult( + document_id="doc-org-context", + content="Dalgo supports NGO dashboards and program reporting.", + metadata={ + "source_type": "org_context", + "source_identifier": f"org:{org.id}:context", + }, + distance=0.04, + ), + DashboardChatVectorQueryResult( + document_id="doc-related-dashboard", + content="This dashboard shows donor-wise funding and cashflow trends.", + metadata={ + "source_type": "dashboard_export", + "source_identifier": f"dashboard:{related_dashboard.id}:summary", + "dashboard_id": related_dashboard.id, + }, + distance=0.05, + ), + ] + ) + + runtime = DashboardChatRuntime( + vector_store=vector_store, + llm_client=ContextOnlyLlm(), + runtime_config=DashboardChatRuntimeConfig( + retrieval_limit=6, + related_dashboard_limit=2, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), + source_config=DashboardChatSourceConfig( + enabled_source_types=( + "dashboard_context", + "dashboard_export", + ) + ), + ) + + runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="Explain the reach metric", + ) + + queried_source_groups = [tuple(call["source_types"]) for call in vector_store.calls] + assert ("org_context",) not in queried_source_groups + assert ("dbt_manifest", "dbt_catalog") not in queried_source_groups diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py new file mode 100644 index 000000000..30da80b1f --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -0,0 +1,128 @@ +"""Tests for dashboard chat session creation and reuse rules.""" + +import os + +import django +import pytest + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from django.contrib.auth.models import User + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.core.dashboard_chat.session_service import ( + DashboardChatSessionError, + get_or_create_dashboard_chat_session, +) +from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardChatSession +from ddpui.models.org import Org +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.tests.api_tests.test_user_org_api import seed_db + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def org(): + organization = Org.objects.create( + name="Dashboard Chat Org", + slug="dashchat-service", + airbyte_workspace_id="workspace-1", + ) + yield organization + organization.delete() + + +@pytest.fixture +def dashboard(org, seed_db): + owner = OrgUser.objects.create( + user=User.objects.create( + username="dashchat-owner", + email="dashchat-owner@test.com", + password="testpassword", + ), + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + dashboard_instance = Dashboard.objects.create( + title="Impact Overview", + dashboard_type="native", + created_by=owner, + last_modified_by=owner, + org=org, + ) + yield dashboard_instance + dashboard_instance.delete() + owner.delete() + owner.user.delete() + + +@pytest.fixture +def session_owner(org, seed_db): + user = User.objects.create( + username="dashchat-session-owner", + email="dashchat-session-owner@test.com", + password="testpassword", + ) + orguser = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield orguser + orguser.delete() + user.delete() + + +@pytest.fixture +def other_orguser(org, seed_db): + user = User.objects.create( + username="dashchat-other-user", + email="dashchat-other-user@test.com", + password="testpassword", + ) + orguser = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield orguser + orguser.delete() + user.delete() + + +def test_get_or_create_dashboard_chat_session_creates_new_session(session_owner, dashboard): + """A missing session_id should create a new session for the current user.""" + session = get_or_create_dashboard_chat_session( + orguser=session_owner, + dashboard=dashboard, + session_id=None, + ) + + assert isinstance(session, DashboardChatSession) + assert session.orguser == session_owner + assert session.dashboard == dashboard + + +def test_get_or_create_dashboard_chat_session_rejects_other_user_session( + session_owner, + other_orguser, + dashboard, +): + """Session reuse is limited to the user who created the conversation.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + + with pytest.raises(DashboardChatSessionError, match="Chat session not found for this dashboard"): + get_or_create_dashboard_chat_session( + orguser=other_orguser, + dashboard=dashboard, + session_id=str(session.session_id), + ) From ce64b3511ab1b478bf3abf940db71349be0483c8 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 11:40:59 +0530 Subject: [PATCH 12/49] fix(ai-chat): address dashboard chat review findings --- ddpui/celeryworkers/tasks.py | 42 ++++++---- ddpui/core/dashboard_chat/dbt_docs.py | 48 ++++++------ ddpui/core/dashboard_chat/ingestion.py | 55 +++++++++---- ddpui/core/dashboard_chat/runtime.py | 78 +++++++++++++------ ddpui/core/dashboard_chat/session_service.py | 35 +++++++-- ddpui/core/dashboard_chat/sql_guard.py | 25 ++++-- ddpui/core/dashboard_chat/warehouse_tools.py | 29 ++++++- ...ashboardchatmessage_created_at_and_more.py | 31 ++++++++ ddpui/models/dashboard_chat.py | 9 ++- ddpui/services/dashboard_service.py | 37 ++++++--- .../test_dashboard_chat_settings_api.py | 6 +- .../core/dashboard_chat/test_ingestion.py | 31 +++++++- .../tests/core/dashboard_chat/test_runtime.py | 44 ++++++++--- .../dashboard_chat/test_session_service.py | 34 +++++++- .../core/dashboard_chat/test_vector_store.py | 13 +++- .../dashboard_chat/test_warehouse_tools.py | 65 ++++++++++++++++ .../test_dashboard_chat_consumer.py | 46 +++++++++++ ddpui/websockets/dashboard_chat_consumer.py | 19 ++++- 18 files changed, 525 insertions(+), 122 deletions(-) create mode 100644 ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py create mode 100644 ddpui/tests/core/dashboard_chat/test_warehouse_tools.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index 14a002ba3..ae97a5887 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -117,7 +117,10 @@ logger = CustomLogger("ddpui") UTC = timezone.UTC -DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS = 3 * 60 * 60 +DASHBOARD_CHAT_CONTEXT_BUILD_INTERVAL_SECONDS = 3 * 60 * 60 +DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS = ( + DASHBOARD_CHAT_CONTEXT_BUILD_INTERVAL_SECONDS + 5 * 60 +) @app.task(bind=True) @@ -1312,13 +1315,17 @@ def build_dashboard_chat_context_for_org(self, org_id: int): .first() ) if org is None: - logger.warning("dashboard chat context build skipped: org %s not found or missing dbt", org_id) + logger.warning( + "dashboard chat context build skipped: org %s not found or missing dbt", org_id + ) return {"status": "skipped_missing_org", "org_id": org_id} preferences = OrgPreferences.objects.filter(org=org).first() feature_enabled = get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) if not feature_enabled or preferences is None or not preferences.ai_data_sharing_enabled: - logger.info("dashboard chat context build skipped for org=%s because it is not eligible", org_id) + logger.info( + "dashboard chat context build skipped for org=%s because it is not eligible", org_id + ) return {"status": "skipped_ineligible", "org_id": org_id} redis_client = RedisClient.get_instance() @@ -1327,7 +1334,10 @@ def build_dashboard_chat_context_for_org(self, org_id: int): timeout=DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS, ) if not lock.acquire(blocking=False): - logger.info("dashboard chat context build skipped for org=%s because a rebuild is already running", org_id) + logger.info( + "dashboard chat context build skipped for org=%s because a rebuild is already running", + org_id, + ) return {"status": "skipped_locked", "org_id": org_id} try: @@ -1335,7 +1345,9 @@ def build_dashboard_chat_context_for_org(self, org_id: int): return { "status": "completed", "org_id": org_id, - "docs_generated_at": result.docs_generated_at.isoformat(), + "docs_generated_at": ( + result.docs_generated_at.isoformat() if result.docs_generated_at else None + ), "vector_last_ingested_at": result.vector_ingested_at.isoformat(), "source_document_counts": result.source_document_counts, } @@ -1344,7 +1356,9 @@ def build_dashboard_chat_context_for_org(self, org_id: int): if lock.owned(): lock.release() except Exception: - logger.exception("failed to release dashboard chat context build lock for org=%s", org_id) + logger.exception( + "failed to release dashboard chat context build lock for org=%s", org_id + ) @app.task @@ -1364,14 +1378,11 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): ) return {"status": "skipped_missing_session", "session_id": session_id} - user_message = ( - DashboardChatMessage.objects.filter( - id=user_message_id, - session=session, - role="user", - ) - .first() - ) + user_message = DashboardChatMessage.objects.filter( + id=user_message_id, + session=session, + role="user", + ).first() if user_message is None: logger.warning( "dashboard chat turn skipped because message %s was not found in session %s", @@ -1394,8 +1405,7 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): "intent": response.intent.value, "citations": [citation.to_dict() for citation in response.citations], "related_dashboards": [ - related_dashboard.to_dict() - for related_dashboard in response.related_dashboards + related_dashboard.to_dict() for related_dashboard in response.related_dashboards ], "warnings": response.warnings, "sql": response.sql, diff --git a/ddpui/core/dashboard_chat/dbt_docs.py b/ddpui/core/dashboard_chat/dbt_docs.py index dcc89c296..2c3e179a1 100644 --- a/ddpui/core/dashboard_chat/dbt_docs.py +++ b/ddpui/core/dashboard_chat/dbt_docs.py @@ -3,6 +3,7 @@ from dataclasses import dataclass import json from pathlib import Path +import tempfile import yaml from django.utils import timezone @@ -29,7 +30,7 @@ class DashboardChatDbtDocsArtifacts: target_dir: Path -def _write_profiles_file(org: Org, orgdbt: OrgDbt) -> Path: +def _write_profiles_file(org: Org, orgdbt: OrgDbt, profiles_dir: Path) -> Path: """Write the dbt profiles.yml required for dbt CLI execution.""" if orgdbt.cli_profile_block is None: raise DashboardChatDbtDocsError("dbt CLI profile block not found") @@ -44,7 +45,6 @@ def _write_profiles_file(org: Org, orgdbt: OrgDbt) -> Path: f"Failed to load dbt CLI profile for dashboard chat: {error}" ) from error - profiles_dir = Path(dbt_project_params.project_dir) / "profiles" profiles_dir.mkdir(parents=True, exist_ok=True) profile_path = profiles_dir / "profiles.yml" with open(profile_path, "w", encoding="utf-8") as profile_file: @@ -60,27 +60,29 @@ def generate_dashboard_chat_dbt_docs_artifacts( if orgdbt is None: raise DashboardChatDbtDocsError("dbt workspace not configured") - _write_profiles_file(org, orgdbt) - - try: - logger.info("running dbt deps for dashboard chat org=%s", org.id) - DbtProjectManager.run_dbt_command( - org, - orgdbt, - command=["deps"], - keyword_args={"profiles-dir": "profiles"}, - ) - logger.info("running dbt docs generate for dashboard chat org=%s", org.id) - DbtProjectManager.run_dbt_command( - org, - orgdbt, - command=["docs", "generate"], - keyword_args={"profiles-dir": "profiles"}, - ) - except Exception as error: - raise DashboardChatDbtDocsError( - f"dbt docs generate failed for dashboard chat: {error}" - ) from error + with tempfile.TemporaryDirectory(prefix=f"dashboard-chat-dbt-{org.id}-") as profiles_dir: + profile_path = _write_profiles_file(org, orgdbt, Path(profiles_dir)) + profiles_dir_arg = str(profile_path.parent) + + try: + logger.info("running dbt deps for dashboard chat org=%s", org.id) + DbtProjectManager.run_dbt_command( + org, + orgdbt, + command=["deps"], + keyword_args={"profiles-dir": profiles_dir_arg}, + ) + logger.info("running dbt docs generate for dashboard chat org=%s", org.id) + DbtProjectManager.run_dbt_command( + org, + orgdbt, + command=["docs", "generate"], + keyword_args={"profiles-dir": profiles_dir_arg}, + ) + except Exception as error: + raise DashboardChatDbtDocsError( + f"dbt docs generate failed for dashboard chat: {error}" + ) from error target_dir = Path(DbtProjectManager.get_dbt_project_dir(orgdbt)) / "target" manifest_path = target_dir / "manifest.json" diff --git a/ddpui/core/dashboard_chat/ingestion.py b/ddpui/core/dashboard_chat/ingestion.py index 01df96317..8fe4dd075 100644 --- a/ddpui/core/dashboard_chat/ingestion.py +++ b/ddpui/core/dashboard_chat/ingestion.py @@ -20,6 +20,7 @@ from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext from ddpui.models.org import Org +from ddpui.models.visualization import Chart from ddpui.services.dashboard_service import DashboardService MARKDOWN_CHUNK_MAX_CHARS = 1200 @@ -42,7 +43,7 @@ class DashboardChatIngestionResult: """Summary of one completed org context build.""" org_id: int - docs_generated_at: timezone.datetime + docs_generated_at: timezone.datetime | None vector_ingested_at: timezone.datetime source_document_counts: dict[str, int] upserted_document_ids: list[str] @@ -114,7 +115,11 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: if org.dbt is None: raise DashboardChatIngestionError("dbt workspace not configured") - dbt_docs = self.dbt_docs_generator(org, org.dbt) + dbt_docs = None + if self.source_config.is_enabled( + DashboardChatSourceType.DBT_MANIFEST + ) or self.source_config.is_enabled(DashboardChatSourceType.DBT_CATALOG): + dbt_docs = self.dbt_docs_generator(org, org.dbt) documents_by_source = self._build_documents(org, dbt_docs) desired_documents = [ document @@ -134,7 +139,9 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: ] upserted_document_ids: list[str] = [] if new_documents: - upserted_document_ids = sorted(self.vector_store.upsert_documents(org.id, new_documents)) + upserted_document_ids = sorted( + self.vector_store.upsert_documents(org.id, new_documents) + ) stale_document_ids = sorted(existing_document_ids - desired_document_ids) if stale_document_ids: @@ -146,7 +153,7 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: return DashboardChatIngestionResult( org_id=org.id, - docs_generated_at=dbt_docs.generated_at, + docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, vector_ingested_at=vector_ingested_at, source_document_counts={ source_type.value: ( @@ -163,7 +170,7 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: def _build_documents( self, org: Org, - dbt_docs: DashboardChatDbtDocsArtifacts, + dbt_docs: DashboardChatDbtDocsArtifacts | None, ) -> dict[str, list[DashboardChatVectorDocument]]: """Build the full desired vector document set for an org.""" documents_by_source: dict[str, list[DashboardChatVectorDocument]] = defaultdict(list) @@ -187,7 +194,19 @@ def _build_documents( dashboard__org=org, ).select_related("dashboard") } - dashboards = list(Dashboard.objects.filter(org=org).order_by("id")) + dashboards = list( + Dashboard.objects.filter(org=org).prefetch_related("filters").order_by("id") + ) + chart_ids = { + chart_id + for dashboard in dashboards + for chart_id in DashboardService.extract_chart_ids_from_components( + dashboard.components, + ) + } + charts_by_id = { + chart.id: chart for chart in Chart.objects.filter(org=org, id__in=chart_ids) + } for dashboard in dashboards: dashboard_context = dashboard_contexts.get(dashboard.id) @@ -204,17 +223,27 @@ def _build_documents( ) ) - export_payload = DashboardService.export_dashboard_context(dashboard.id, org) + export_payload = DashboardService.export_dashboard_context_for_dashboard( + dashboard, + org, + charts_by_id=charts_by_id, + ) documents_by_source[DashboardChatSourceType.DASHBOARD_EXPORT.value].extend( self._build_dashboard_export_documents(org.id, dashboard.id, export_payload) ) - documents_by_source[DashboardChatSourceType.DBT_MANIFEST.value].extend( - self._build_manifest_documents(org.id, dbt_docs) - ) - documents_by_source[DashboardChatSourceType.DBT_CATALOG.value].extend( - self._build_catalog_documents(org.id, dbt_docs) - ) + if dbt_docs is not None and self.source_config.is_enabled( + DashboardChatSourceType.DBT_MANIFEST + ): + documents_by_source[DashboardChatSourceType.DBT_MANIFEST.value].extend( + self._build_manifest_documents(org.id, dbt_docs) + ) + if dbt_docs is not None and self.source_config.is_enabled( + DashboardChatSourceType.DBT_CATALOG + ): + documents_by_source[DashboardChatSourceType.DBT_CATALOG.value].extend( + self._build_catalog_documents(org.id, dbt_docs) + ) return { source_type.value: documents_by_source.get(source_type.value, []) diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py index 0ec8e1c8b..448604c6f 100644 --- a/ddpui/core/dashboard_chat/runtime.py +++ b/ddpui/core/dashboard_chat/runtime.py @@ -2,6 +2,7 @@ from collections.abc import Callable, Sequence import json +import re from typing import Any, TypedDict from langgraph.graph import END, START, StateGraph @@ -128,7 +129,8 @@ def run( org: Org, dashboard_id: int, user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] | None = None, + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] + | None = None, ) -> DashboardChatResponse: """Run a single dashboard chat turn and return the structured response.""" initial_state: DashboardChatRuntimeState = { @@ -235,8 +237,8 @@ def _node_retrieve_docs(self, state: DashboardChatRuntimeState) -> DashboardChat query_text=state["user_query"], source_types=self.source_config.filter_enabled( [ - DashboardChatSourceType.DASHBOARD_EXPORT.value, - DashboardChatSourceType.DASHBOARD_CONTEXT.value, + DashboardChatSourceType.DASHBOARD_EXPORT.value, + DashboardChatSourceType.DASHBOARD_CONTEXT.value, ] ), dashboard_id=state["dashboard_id"], @@ -254,8 +256,8 @@ def _node_retrieve_docs(self, state: DashboardChatRuntimeState) -> DashboardChat query_text=state["user_query"], source_types=self.source_config.filter_enabled( [ - DashboardChatSourceType.DBT_MANIFEST.value, - DashboardChatSourceType.DBT_CATALOG.value, + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, ] ), ), @@ -286,7 +288,10 @@ def _node_load_schema_snippets( ) -> DashboardChatRuntimeState: """Load schema snippets for the relevant dashboard tables.""" intent_decision = state["intent_decision"] - if not intent_decision.force_sql_path and intent_decision.intent != DashboardChatIntent.DATA_QUERY: + if ( + not intent_decision.force_sql_path + and intent_decision.intent != DashboardChatIntent.DATA_QUERY + ): state["schema_snippets"] = {} state["schema_prompt"] = "" return state @@ -310,6 +315,11 @@ def _node_load_schema_snippets( state["schema_prompt"] = "" state["warnings"] = state.get("warnings", []) + [str(error)] return state + except Exception as error: + state["schema_snippets"] = {} + state["schema_prompt"] = "" + state["warnings"] = state.get("warnings", []) + [str(error)] + return state state["schema_snippets"] = schema_snippets state["schema_prompt"] = "\n\n".join( @@ -321,7 +331,10 @@ def _node_plan_query(self, state: DashboardChatRuntimeState) -> DashboardChatRun """Produce the structured execution plan.""" intent_decision = state["intent_decision"] - if intent_decision.intent == DashboardChatIntent.CONTEXT_QUERY and not intent_decision.force_sql_path: + if ( + intent_decision.intent == DashboardChatIntent.CONTEXT_QUERY + and not intent_decision.force_sql_path + ): state["query_plan"] = DashboardChatQueryPlan( mode=DashboardChatPlanMode.CONTEXT, reason=intent_decision.reason, @@ -391,11 +404,18 @@ def _node_lookup_distinct_values( if not state["allowlist"].is_allowed(table_name) or table_name not in available_tables: continue distinct_key = f"{table_name}.{text_filter.column_name}" - distinct_values[distinct_key] = warehouse_tools.get_distinct_values( - table_name=table_name, - column_name=text_filter.column_name, - limit=self.runtime_config.max_distinct_values, - ) + try: + distinct_values[distinct_key] = warehouse_tools.get_distinct_values( + table_name=table_name, + column_name=text_filter.column_name, + limit=self.runtime_config.max_distinct_values, + ) + except DashboardChatWarehouseToolsError as error: + state["warnings"] = state.get("warnings", []) + [str(error)] + distinct_values[distinct_key] = [] + except Exception as error: + state["warnings"] = state.get("warnings", []) + [str(error)] + distinct_values[distinct_key] = [] state["distinct_values"] = distinct_values return state @@ -461,16 +481,14 @@ def _node_compose_answer(self, state: DashboardChatRuntimeState) -> DashboardCha if sql_draft is not None: warnings.extend(warning for warning in sql_draft.warnings if warning not in warnings) if sql_validation is not None: - warnings.extend(warning for warning in sql_validation.warnings if warning not in warnings) + warnings.extend( + warning for warning in sql_validation.warnings if warning not in warnings + ) if intent_decision.intent == DashboardChatIntent.SMALL_TALK: - answer_text = ( - "I can help explain this dashboard or answer questions about the data behind its charts." - ) + answer_text = "I can help explain this dashboard or answer questions about the data behind its charts." elif intent_decision.intent == DashboardChatIntent.IRRELEVANT: - answer_text = ( - "I can help with questions about this dashboard, its charts, and the data behind them." - ) + answer_text = "I can help with questions about this dashboard, its charts, and the data behind them." elif intent_decision.intent == DashboardChatIntent.NEEDS_CLARIFICATION: answer_text = ( intent_decision.clarification_question @@ -520,7 +538,9 @@ def _node_compose_answer(self, state: DashboardChatRuntimeState) -> DashboardCha ) return state - def _node_finalize_response(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + def _node_finalize_response( + self, state: DashboardChatRuntimeState + ) -> DashboardChatRuntimeState: """Attach metadata and table citations to the final response.""" response = state["response"] citations = list(response.citations) @@ -631,14 +651,17 @@ def _heuristic_intent_decision( reason="Greeting or pleasantry", ) - if any(keyword in normalized_query for keyword in DATA_QUERY_KEYWORDS): + if DashboardChatRuntime._contains_keyword_phrase(normalized_query, DATA_QUERY_KEYWORDS): return DashboardChatIntentDecision( intent=DashboardChatIntent.DATA_QUERY, reason="Contains data-analysis keywords", force_sql_path=True, ) - if any(keyword in normalized_query for keyword in CONTEXT_QUERY_KEYWORDS): + if DashboardChatRuntime._contains_keyword_phrase( + normalized_query, + CONTEXT_QUERY_KEYWORDS, + ): return DashboardChatIntentDecision( intent=DashboardChatIntent.CONTEXT_QUERY, reason="Contains definition or explanation keywords", @@ -652,6 +675,13 @@ def _heuristic_intent_decision( ) return None + @staticmethod + def _contains_keyword_phrase(normalized_query: str, keywords: set[str]) -> bool: + """Match keywords on word boundaries to avoid substring false positives.""" + return any( + re.search(rf"\b{re.escape(keyword)}\b", normalized_query) for keyword in keywords + ) + def _query_vector_store( self, org: Org, @@ -835,7 +865,9 @@ def _citation_title( if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: return f"Dashboard context: {dashboard_title}" if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: - chart_id = DashboardChatRuntime._chart_id_from_source_identifier(document.source_identifier) + chart_id = DashboardChatRuntime._chart_id_from_source_identifier( + document.source_identifier + ) if chart_id is not None and chart_id in chart_lookup: return f"Chart: {chart_lookup[chart_id]}" return f"Dashboard export: {dashboard_title}" diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/session_service.py index 57f918117..92eba7760 100644 --- a/ddpui/core/dashboard_chat/session_service.py +++ b/ddpui/core/dashboard_chat/session_service.py @@ -2,6 +2,7 @@ from uuid import UUID +from django.db import IntegrityError from django.db import transaction from django.db.models import Max from django.utils import timezone @@ -119,19 +120,37 @@ def _create_dashboard_chat_message( """Create a session-scoped chat message with a stable next sequence number.""" with transaction.atomic(): locked_session = DashboardChatSession.objects.select_for_update().get(id=session.id) + if client_message_id: + existing_message = DashboardChatMessage.objects.filter( + session=locked_session, + client_message_id=client_message_id, + ).first() + if existing_message is not None: + return existing_message + next_sequence_number = ( locked_session.messages.aggregate(max_sequence_number=Max("sequence_number"))[ "max_sequence_number" ] or 0 ) + 1 - message = DashboardChatMessage.objects.create( - session=locked_session, - sequence_number=next_sequence_number, - role=role, - content=content, - client_message_id=client_message_id, - payload=payload, - ) + try: + message = DashboardChatMessage.objects.create( + session=locked_session, + sequence_number=next_sequence_number, + role=role, + content=content, + client_message_id=client_message_id, + payload=payload, + ) + except IntegrityError: + if not client_message_id: + raise + message = DashboardChatMessage.objects.filter( + session=locked_session, + client_message_id=client_message_id, + ).first() + if message is None: + raise DashboardChatSession.objects.filter(id=locked_session.id).update(updated_at=timezone.now()) return message diff --git a/ddpui/core/dashboard_chat/sql_guard.py b/ddpui/core/dashboard_chat/sql_guard.py index 256437a6a..40fbffc9d 100644 --- a/ddpui/core/dashboard_chat/sql_guard.py +++ b/ddpui/core/dashboard_chat/sql_guard.py @@ -8,6 +8,7 @@ from ddpui.core.dashboard_chat.runtime_types import DashboardChatSqlValidationResult FORBIDDEN_SQL_KEYWORDS = { + "INTO", "INSERT", "UPDATE", "DELETE", @@ -56,7 +57,9 @@ def validate(self, sql: str) -> DashboardChatSqlValidationResult: sql_without_comments = self._strip_sql_comments(sql) statements = [ - statement.strip() for statement in sqlparse.split(sql_without_comments) if statement.strip() + statement.strip() + for statement in sqlparse.split(sql_without_comments) + if statement.strip() ] if len(statements) != 1: return DashboardChatSqlValidationResult( @@ -71,7 +74,13 @@ def validate(self, sql: str) -> DashboardChatSqlValidationResult: if not (sql_upper.startswith("SELECT") or sql_upper.startswith("WITH")): errors.append("Query must start with SELECT or WITH") + select_into_detected = self._contains_select_into_clause(sanitized_sql) + if select_into_detected: + errors.append("SELECT INTO is not allowed") + for keyword in FORBIDDEN_SQL_KEYWORDS: + if keyword == "INTO" and select_into_detected: + continue if re.search(rf"\b{keyword}\b", sql_upper): errors.append(f"Forbidden keyword detected: {keyword}") @@ -114,8 +123,7 @@ def validate(self, sql: str) -> DashboardChatSqlValidationResult: @staticmethod def _strip_sql_comments(sql: str) -> str: """Remove line and block comments before validation.""" - sql_without_block_comments = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL) - return re.sub(r"--.*", "", sql_without_block_comments) + return sqlparse.format(sql, strip_comments=True) @classmethod def _extract_table_names(cls, sql: str) -> list[str]: @@ -162,6 +170,14 @@ def _selects_row_level_pii(cls, sql: str) -> bool: return True return False + @classmethod + def _contains_select_into_clause(cls, sql: str) -> bool: + """Detect SELECT ... INTO before the outer FROM clause.""" + select_clause = cls._extract_outer_select_clause(sql) + if not select_clause: + return False + return bool(re.search(r"\bINTO\b", select_clause, re.IGNORECASE)) + @staticmethod def _extract_outer_select_clause(sql: str) -> str | None: """Return the outer-most SELECT projection segment.""" @@ -237,6 +253,5 @@ def _split_select_expressions(select_clause: str) -> list[str]: def _contains_aggregate(expression: str) -> bool: """Treat aggregate projections as safe even if they mention sensitive columns.""" return any( - re.search(pattern, expression, re.IGNORECASE) - for pattern in AGGREGATE_FUNCTION_PATTERNS + re.search(pattern, expression, re.IGNORECASE) for pattern in AGGREGATE_FUNCTION_PATTERNS ) diff --git a/ddpui/core/dashboard_chat/warehouse_tools.py b/ddpui/core/dashboard_chat/warehouse_tools.py index 6afa3ebdf..9748768bd 100644 --- a/ddpui/core/dashboard_chat/warehouse_tools.py +++ b/ddpui/core/dashboard_chat/warehouse_tools.py @@ -1,9 +1,11 @@ """Warehouse access helpers used by dashboard chat runtime.""" +import json from typing import Any from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse +from ddpui.utils import secretsmanager from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory @@ -128,11 +130,30 @@ def _quote_table_ref(self, schema_name: str, table_name: str) -> str: def _quote_bigquery_table_ref(self, schema_name: str, table_name: str) -> str: """Quote a BigQuery fully-qualified table reference.""" - project_name = self.org_warehouse.bq_location + project_name = self._get_bigquery_project_id() if not project_name: - raise DashboardChatWarehouseToolsError("BigQuery location/project not configured") + raise DashboardChatWarehouseToolsError("BigQuery project id not configured") return f"`{project_name}.{schema_name}.{table_name}`" + def _get_bigquery_project_id(self) -> str | None: + """Read the BigQuery project id from stored warehouse credentials.""" + credentials = secretsmanager.retrieve_warehouse_credentials(self.org_warehouse) or {} + project_id = credentials.get("project_id") + if project_id: + return str(project_id) + + credentials_json = credentials.get("credentials_json") + if isinstance(credentials_json, str): + try: + parsed_credentials = json.loads(credentials_json) + except json.JSONDecodeError: + return None + project_id = parsed_credentials.get("project_id") + if project_id: + return str(project_id) + + return None + @staticmethod def _quote_postgres_identifier(identifier: str) -> str: """Quote a Postgres identifier while preserving its literal value.""" @@ -151,4 +172,6 @@ def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: if not table_name or "." not in table_name: return None schema_name, bare_table_name = table_name.split(".", 1) - return schema_name.strip().strip('"').strip("`"), bare_table_name.strip().strip('"').strip("`") + return schema_name.strip().strip('"').strip("`"), bare_table_name.strip().strip('"').strip( + "`" + ) diff --git a/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py b/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py new file mode 100644 index 000000000..fa536a031 --- /dev/null +++ b/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py @@ -0,0 +1,31 @@ +# Generated by Django 4.2 on 2026-03-20 05:49 + +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0152_orgdbt_docs_generated_at_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="dashboardchatmessage", + name="created_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + migrations.AlterField( + model_name="dashboardchatsession", + name="created_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + migrations.AddConstraint( + model_name="dashboardchatmessage", + constraint=models.UniqueConstraint( + condition=models.Q(client_message_id__isnull=False), + fields=("session", "client_message_id"), + name="dchat_message_session_client_msg_unique", + ), + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index f6f1561d1..2aee1c5bd 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -61,7 +61,7 @@ class DashboardChatSession(models.Model): org = models.ForeignKey(Org, on_delete=models.CASCADE) orguser = models.ForeignKey(OrgUser, null=True, on_delete=models.SET_NULL) dashboard = models.ForeignKey(Dashboard, on_delete=models.SET_NULL, null=True) - created_at = models.DateTimeField(auto_created=True, default=timezone.now) + created_at = models.DateTimeField(default=timezone.now) updated_at = models.DateTimeField(auto_now=True) class Meta: @@ -88,7 +88,7 @@ class DashboardChatMessage(models.Model): content = models.TextField(blank=True, default="") client_message_id = models.CharField(max_length=100, null=True, blank=True) payload = models.JSONField(null=True, blank=True) - created_at = models.DateTimeField(auto_created=True, default=timezone.now) + created_at = models.DateTimeField(default=timezone.now) class Meta: ordering = ["sequence_number"] @@ -97,4 +97,9 @@ class Meta: fields=["session", "sequence_number"], name="dchat_message_session_seq_unique", ), + models.UniqueConstraint( + fields=["session", "client_message_id"], + condition=models.Q(client_message_id__isnull=False), + name="dchat_message_session_client_msg_unique", + ), ] diff --git a/ddpui/services/dashboard_service.py b/ddpui/services/dashboard_service.py index 82193121e..a64cf9ec3 100644 --- a/ddpui/services/dashboard_service.py +++ b/ddpui/services/dashboard_service.py @@ -990,24 +990,43 @@ def validate_dashboard_config(dashboard: Dashboard) -> Dict[str, Any]: def export_dashboard_context(dashboard_id: int, org: Org) -> Dict[str, Any]: """Return dashboard data along with the full config for referenced charts.""" dashboard = DashboardService.get_dashboard(dashboard_id, org) - dashboard_response = DashboardService.get_dashboard_response(dashboard) + return DashboardService.export_dashboard_context_for_dashboard(dashboard, org) - charts = [] - for component_data in (dashboard.components or {}).values(): + @staticmethod + def extract_chart_ids_from_components(components: dict | None) -> list[int]: + """Extract referenced chart IDs from dashboard components while preserving order.""" + chart_ids: list[int] = [] + for component_data in (components or {}).values(): if component_data.get("type") != DashboardComponentType.CHART.value: continue chart_id = component_data.get("config", {}).get("chartId") - if not chart_id: - continue + if chart_id: + chart_ids.append(chart_id) + return list(dict.fromkeys(chart_ids)) - try: - chart = Chart.objects.get(id=chart_id, org=org) - except Chart.DoesNotExist: + @staticmethod + def export_dashboard_context_for_dashboard( + dashboard: Dashboard, + org: Org, + charts_by_id: Dict[int, Chart] | None = None, + ) -> Dict[str, Any]: + """Return dashboard data plus chart configs for an already loaded dashboard object.""" + dashboard_response = DashboardService.get_dashboard_response(dashboard) + chart_ids = DashboardService.extract_chart_ids_from_components(dashboard.components) + if charts_by_id is None: + charts_by_id = { + chart.id: chart for chart in Chart.objects.filter(id__in=chart_ids, org=org) + } + + charts = [] + for chart_id in chart_ids: + chart = charts_by_id.get(chart_id) + if chart is None: logger.warning( "Chart %s referenced by dashboard %s was not found", chart_id, - dashboard_id, + dashboard.id, ) continue diff --git a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py index 74c026bd2..1cefa61f8 100644 --- a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py +++ b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py @@ -57,7 +57,7 @@ def org(): @pytest.fixture -def orguser(authuser, org): +def orguser(authuser, org, seed_db): orguser = OrgUser.objects.create( user=authuser, org=org, @@ -68,7 +68,7 @@ def orguser(authuser, org): @pytest.fixture -def guest_orguser(org): +def guest_orguser(org, seed_db): guest_user = User.objects.create( username="chatsettingsguest", email="chatsettingsguest@test.com", @@ -85,7 +85,7 @@ def guest_orguser(org): @pytest.fixture -def other_org_dashboard(): +def other_org_dashboard(seed_db): other_org = Org.objects.create( name="Other Dashboard Chat Org", slug="other-chat-org", diff --git a/ddpui/tests/core/dashboard_chat/test_ingestion.py b/ddpui/tests/core/dashboard_chat/test_ingestion.py index 380f5eb4d..a1ae5e44c 100644 --- a/ddpui/tests/core/dashboard_chat/test_ingestion.py +++ b/ddpui/tests/core/dashboard_chat/test_ingestion.py @@ -235,10 +235,16 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb artifacts = generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) orgdbt.refresh_from_db() - assert (project_dir / "profiles" / "profiles.yml").exists() assert mock_run_dbt.call_count == 2 assert mock_run_dbt.call_args_list[0].kwargs["command"] == ["deps"] assert mock_run_dbt.call_args_list[1].kwargs["command"] == ["docs", "generate"] + first_profiles_dir = Path(mock_run_dbt.call_args_list[0].kwargs["keyword_args"]["profiles-dir"]) + second_profiles_dir = Path( + mock_run_dbt.call_args_list[1].kwargs["keyword_args"]["profiles-dir"] + ) + assert first_profiles_dir == second_profiles_dir + assert not (project_dir / "profiles" / "profiles.yml").exists() + assert not first_profiles_dir.exists() assert artifacts.manifest_json == manifest_json assert artifacts.catalog_json == catalog_json assert orgdbt.docs_generated_at is not None @@ -433,7 +439,8 @@ def _raise_on_upsert(org_id, documents): service.ingest_org(org) remaining_ids = { - document.document_id for document in vector_store.get_documents(org.id, include_documents=False) + document.document_id + for document in vector_store.get_documents(org.id, include_documents=False) } assert remaining_ids == original_ids assert vector_store.delete_calls == [] @@ -483,3 +490,23 @@ def test_ingest_org_deletes_disabled_source_documents(org, orgdbt, orguser, dash assert result.source_document_counts["dbt_catalog"] == 0 assert "org_context" not in stored_source_types assert "dbt_catalog" not in stored_source_types + + +def test_ingest_org_skips_dbt_docs_when_dbt_sources_are_disabled(org, orgdbt, dashboard): + """Disabling both dbt sources should skip dbt docs generation entirely.""" + vector_store = FakeDashboardChatVectorStore() + dbt_docs_generator = Mock(side_effect=AssertionError("dbt docs should not run")) + service = DashboardChatIngestionService( + vector_store=vector_store, + dbt_docs_generator=dbt_docs_generator, + source_config=DashboardChatSourceConfig( + enabled_source_types=("org_context", "dashboard_context", "dashboard_export") + ), + ) + + result = service.ingest_org(org) + + dbt_docs_generator.assert_not_called() + assert result.docs_generated_at is None + assert result.source_document_counts["dbt_manifest"] == 0 + assert result.source_document_counts["dbt_catalog"] == 0 diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index ca9be08e0..085d9887e 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -1,18 +1,14 @@ """Tests for dashboard chat LangGraph runtime, allowlist, and SQL guard.""" -import os - -import django import pytest from django.contrib.auth.models import User from django.db import transaction -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") -os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" -django.setup() - from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist, DashboardChatAllowlistBuilder +from ddpui.core.dashboard_chat.allowlist import ( + DashboardChatAllowlist, + DashboardChatAllowlistBuilder, +) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.core.dashboard_chat.runtime_types import ( @@ -209,6 +205,16 @@ def generate_sql( ) +def test_heuristic_intent_does_not_treat_summary_as_sum_keyword(): + """Substring matches like sum->summary should not force the SQL path.""" + decision = DashboardChatRuntime._heuristic_intent_decision( + user_query="Give me a summary of this dashboard", + conversation_history=[], + ) + + assert decision is None or decision.intent != DashboardChatIntent.DATA_QUERY + + @pytest.fixture def org(): organization = Org.objects.create( @@ -365,7 +371,9 @@ def test_sql_guard_enforces_single_statement_allowlist_and_limit(): assert disallowed_table.is_valid is False assert any("not accessible" in error for error in disallowed_table.errors) - allowed_query = guard.validate("SELECT COUNT(*) AS beneficiary_count FROM analytics.program_reach") + allowed_query = guard.validate( + "SELECT COUNT(*) AS beneficiary_count FROM analytics.program_reach" + ) assert allowed_query.is_valid is True assert allowed_query.sanitized_sql.endswith("LIMIT 200") assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) @@ -390,6 +398,20 @@ def test_sql_guard_rejects_row_level_pii_queries(): ] +def test_sql_guard_rejects_select_into_queries(): + """SQL guard should reject SELECT ... INTO statements.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + select_into_query = guard.validate( + "SELECT program_name INTO temp_programs FROM analytics.program_reach LIMIT 50" + ) + + assert select_into_query.is_valid is False + assert select_into_query.sanitized_sql is None + assert "SELECT INTO is not allowed" in select_into_query.errors + + def test_runtime_context_query_returns_citations_and_related_dashboards( org, primary_dashboard, @@ -558,7 +580,9 @@ def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_das ] -def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashboard, related_dashboard): +def test_runtime_skips_disabled_source_types_during_retrieval( + org, primary_dashboard, related_dashboard +): """Disabled source types should not be queried from the vector store.""" transaction.commit() vector_store = FakeVectorStore( diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index 30da80b1f..ca057461d 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -14,10 +14,11 @@ from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.core.dashboard_chat.session_service import ( DashboardChatSessionError, + create_dashboard_chat_user_message, get_or_create_dashboard_chat_session, ) from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardChatSession +from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org import Org from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role @@ -120,9 +121,38 @@ def test_get_or_create_dashboard_chat_session_rejects_other_user_session( dashboard=dashboard, ) - with pytest.raises(DashboardChatSessionError, match="Chat session not found for this dashboard"): + with pytest.raises( + DashboardChatSessionError, match="Chat session not found for this dashboard" + ): get_or_create_dashboard_chat_session( orguser=other_orguser, dashboard=dashboard, session_id=str(session.session_id), ) + + +def test_create_dashboard_chat_user_message_is_idempotent_for_client_message_id( + session_owner, + dashboard, +): + """Retries with the same client message id should reuse the stored message.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + + first_message = create_dashboard_chat_user_message( + session=session, + content="Why did funding drop?", + client_message_id="client-1", + ) + second_message = create_dashboard_chat_user_message( + session=session, + content="Why did funding drop?", + client_message_id="client-1", + ) + + assert first_message.id == second_message.id + assert first_message.sequence_number == 1 + assert DashboardChatMessage.objects.filter(session=session).count() == 1 diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index 287bef6bd..51b44e7f8 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -158,8 +158,19 @@ def test_vector_document_has_stable_id_and_required_metadata(): chunk_index=3, updated_at=updated_at, ) + other_document = DashboardChatVectorDocument( + org_id=7, + source_type=DashboardChatSourceType.DBT_MANIFEST, + source_identifier="model.public.fact_enrollments", + content="manifest chunk", + dashboard_id=9, + chart_id=12, + title="Fact Enrollments", + chunk_index=3, + updated_at=updated_at, + ) - assert document.document_id == document.document_id + assert document.document_id == other_document.document_id assert document.metadata() == { "org_id": 7, "source_type": "dbt_manifest", diff --git a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py new file mode 100644 index 000000000..3cca9d879 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py @@ -0,0 +1,65 @@ +"""Unit tests for dashboard chat warehouse helpers.""" + +import json +import os +from types import SimpleNamespace +from unittest.mock import patch + +import django +import pytest + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" +django.setup() + +from ddpui.core.dashboard_chat.warehouse_tools import ( + DashboardChatWarehouseTools, + DashboardChatWarehouseToolsError, +) + + +def _build_bigquery_tools(): + return DashboardChatWarehouseTools( + org=SimpleNamespace(id=1), + org_warehouse=SimpleNamespace( + wtype="bigquery", credentials="warehouse-secret", bq_location="asia-south1" + ), + warehouse_client=object(), + ) + + +def test_quote_bigquery_table_ref_uses_project_id_from_credentials(): + """BigQuery table refs should use project_id from stored credentials, not dataset location.""" + with patch( + "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + return_value={"project_id": "analytics-project"}, + ): + tools = _build_bigquery_tools() + assert tools._quote_bigquery_table_ref("analytics", "program_reach") == ( + "`analytics-project.analytics.program_reach`" + ) + + +def test_quote_bigquery_table_ref_reads_nested_project_id_from_credentials_json(): + """credentials_json payloads should still provide the BigQuery project id.""" + with patch( + "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + return_value={"credentials_json": json.dumps({"project_id": "analytics-project"})}, + ): + tools = _build_bigquery_tools() + assert tools._quote_bigquery_table_ref("analytics", "program_reach") == ( + "`analytics-project.analytics.program_reach`" + ) + + +def test_quote_bigquery_table_ref_requires_project_id(): + """A missing project id should fail explicitly.""" + with patch( + "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + return_value={"dataset_location": "asia-south1"}, + ): + tools = _build_bigquery_tools() + with pytest.raises( + DashboardChatWarehouseToolsError, match="BigQuery project id not configured" + ): + tools._quote_bigquery_table_ref("analytics", "program_reach") diff --git a/ddpui/tests/websockets/test_dashboard_chat_consumer.py b/ddpui/tests/websockets/test_dashboard_chat_consumer.py index 43b596012..4af5ca03f 100644 --- a/ddpui/tests/websockets/test_dashboard_chat_consumer.py +++ b/ddpui/tests/websockets/test_dashboard_chat_consumer.py @@ -76,3 +76,49 @@ def test_dashboard_chat_consumer_send_message_creates_session_and_dispatches_tas consumer._subscribe_to_session.assert_called_once_with("session-123") mock_publish_event.assert_called_once() mock_delay.assert_called_once_with("session-123", 17) + + +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_event") +@patch( + "ddpui.websockets.dashboard_chat_consumer.run_dashboard_chat_turn.delay", + side_effect=RuntimeError("enqueue failed"), +) +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_send_message_returns_error_when_enqueue_fails( + mock_get_or_create_session, + mock_create_user_message, + mock_delay, + mock_publish_event, +): + session = Mock(session_id="session-123") + user_message = Mock(id=17) + mock_get_or_create_session.return_value = session + mock_create_user_message.return_value = user_message + + consumer = DashboardChatConsumer() + consumer.dashboard = Mock(id=42) + consumer.orguser = Mock() + consumer.send = Mock() + consumer._chat_available = Mock(return_value=(True, "")) + consumer._subscribe_to_session = Mock() + + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + mock_delay.assert_called_once_with("session-123", 17) + consumer._subscribe_to_session.assert_not_called() + mock_publish_event.assert_not_called() + + payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + assert payload["event_type"] == "error" + assert payload["data"]["message"] == "Unable to start chat right now" diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 4671354c1..90b5b87fa 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -17,9 +17,13 @@ from ddpui.models.dashboard import Dashboard from ddpui.models.org_preferences import OrgPreferences from ddpui.models.role_based_access import RolePermission +from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.feature_flags import get_all_feature_flags_for_org from ddpui.websockets import BaseConsumer +logger = CustomLogger("ddpui") + + class DashboardChatConsumer(BaseConsumer): """Authenticated websocket for dashboard-level chat.""" @@ -78,6 +82,16 @@ def websocket_receive(self, message): content=raw_message, client_message_id=payload.get("client_message_id"), ) + try: + run_dashboard_chat_turn.delay(str(session.session_id), user_message.id) + except Exception: + logger.exception( + "dashboard chat turn could not be enqueued for session=%s", + session.session_id, + ) + self._respond_error("Unable to start chat right now") + return + self._subscribe_to_session(str(session.session_id)) publish_dashboard_chat_event( str(session.session_id), @@ -89,7 +103,6 @@ def websocket_receive(self, message): data={"label": "thinking"}, ), ) - run_dashboard_chat_turn.delay(str(session.session_id), user_message.id) def websocket_disconnect(self, message): """Remove the socket from any joined session groups on disconnect.""" @@ -124,7 +137,9 @@ def _respond_error(self, message: str) -> None: def _chat_available(self) -> tuple[bool, str]: """Return whether the current org is ready for dashboard chat.""" - feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get("AI_DASHBOARD_CHAT", False) + feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get( + "AI_DASHBOARD_CHAT", False + ) if not feature_enabled: return False, "Chat with dashboards is not enabled for this organization" From 5f35ee8d068af1e04b1f60849d59718ea19ca3cd Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 20 Mar 2026 11:53:17 +0530 Subject: [PATCH 13/49] fix(ai-chat): close remaining backend review gaps --- .env.template | 6 +++--- ddpui/api/org_preferences_api.py | 19 ++++++++++--------- ddpui/core/dashboard_chat/events.py | 25 ++++++++++++++++++------- ddpui/core/dashboard_chat/runtime.py | 12 ++++++++++-- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/.env.template b/.env.template index 8dfe46ce7..444f5e37d 100644 --- a/.env.template +++ b/.env.template @@ -147,11 +147,11 @@ OPENAI_API_KEY="" # AI DASHBOARD CHAT VECTOR STORE #################################################################################################### -AI_DASHBOARD_CHAT_CHROMA_HOST="localhost" +AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=org_ +AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=text-embedding-3-small +AI_DASHBOARD_CHAT_CHROMA_HOST=localhost AI_DASHBOARD_CHAT_CHROMA_PORT=8003 AI_DASHBOARD_CHAT_CHROMA_SSL=False -AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX="org_" -AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL="text-embedding-3-small" #################################################################################################### # MONITORING & LOGGING diff --git a/ddpui/api/org_preferences_api.py b/ddpui/api/org_preferences_api.py index 30d278c73..9f4f65215 100644 --- a/ddpui/api/org_preferences_api.py +++ b/ddpui/api/org_preferences_api.py @@ -35,9 +35,7 @@ def _get_or_create_org_preferences(org): - org_preferences = OrgPreferences.objects.filter(org=org).first() - if org_preferences is None: - org_preferences = OrgPreferences.objects.create(org=org) + org_preferences, _ = OrgPreferences.objects.get_or_create(org=org) return org_preferences @@ -72,7 +70,9 @@ def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): ), ai_data_sharing_consented_at=org_preferences.ai_data_sharing_consented_at, org_context_markdown=org_context.markdown, - org_context_updated_by=org_context.updated_by.user.email if org_context.updated_by else None, + org_context_updated_by=org_context.updated_by.user.email + if org_context.updated_by + else None, org_context_updated_at=org_context.updated_at, dbt_configured=_is_dbt_configured(org), docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, @@ -246,10 +246,7 @@ def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChat else org_preferences.ai_data_sharing_enabled ) - if ( - payload.ai_data_sharing_enabled is True - and org_preferences.ai_data_sharing_enabled is False - ): + if payload.ai_data_sharing_enabled is True and org_preferences.ai_data_sharing_enabled is False: org_preferences.ai_data_sharing_consented_by = orguser org_preferences.ai_data_sharing_consented_at = timezone.now() @@ -275,6 +272,7 @@ def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChat "res": _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context).dict(), } + @orgpreference_router.get("/ai-dashboard-chat/status") def get_ai_dashboard_chat_status(request): """Return feature readiness for dashboard chat for the current org.""" @@ -283,7 +281,10 @@ def get_ai_dashboard_chat_status(request): org_preferences = _get_or_create_org_preferences(org) - return {"success": True, "res": _serialize_ai_dashboard_chat_status(org, org_preferences).dict()} + return { + "success": True, + "res": _serialize_ai_dashboard_chat_status(org, org_preferences).dict(), + } @orgpreference_router.get("/toolinfo") diff --git a/ddpui/core/dashboard_chat/events.py b/ddpui/core/dashboard_chat/events.py index ec3ddec5f..636f4a98c 100644 --- a/ddpui/core/dashboard_chat/events.py +++ b/ddpui/core/dashboard_chat/events.py @@ -6,6 +6,10 @@ from channels.layers import get_channel_layer from django.utils import timezone +from ddpui.utils.custom_logger import CustomLogger + +logger = CustomLogger("ddpui") + def dashboard_chat_group_name(session_id: str) -> str: """Return the channel-layer group name for a dashboard chat session.""" @@ -39,10 +43,17 @@ def publish_dashboard_chat_event(session_id: str, event: dict) -> None: channel_layer = get_channel_layer() if channel_layer is None: return - async_to_sync(channel_layer.group_send)( - dashboard_chat_group_name(session_id), - { - "type": "dashboard_chat_event", - "event": json.dumps(event), - }, - ) + try: + async_to_sync(channel_layer.group_send)( + dashboard_chat_group_name(session_id), + { + "type": "dashboard_chat_event", + "event": json.dumps(event), + }, + ) + except Exception: + logger.exception( + "failed to publish dashboard chat event for session=%s event_type=%s", + session_id, + event.get("event_type"), + ) diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py index 448604c6f..dc342b2ba 100644 --- a/ddpui/core/dashboard_chat/runtime.py +++ b/ddpui/core/dashboard_chat/runtime.py @@ -520,6 +520,8 @@ def _node_compose_answer(self, state: DashboardChatRuntimeState) -> DashboardCha for related_dashboard in state.get("related_dashboards", []) ], ) + except AssertionError: + raise except Exception: answer_text = self._fallback_answer_text( retrieved_documents=state.get("retrieved_documents", []), @@ -545,7 +547,11 @@ def _node_finalize_response( response = state["response"] citations = list(response.citations) sql_validation = state.get("sql_validation") - if sql_validation is not None: + if ( + sql_validation is not None + and sql_validation.is_valid + and sql_validation.sanitized_sql is not None + ): citations.extend( DashboardChatCitation( source_type="warehouse_table", @@ -577,7 +583,9 @@ def _node_finalize_response( "retrieved_document_ids": [ document.document_id for document in state.get("retrieved_documents", []) ], - "allowlisted_tables": sorted(state["allowlist"].allowed_tables), + "allowlisted_tables": sorted( + state.get("allowlist", DashboardChatAllowlist()).allowed_tables + ), "sql_guard_errors": state.get("sql_validation").errors if state.get("sql_validation") else [], From 3a4dfb28b55ce170e332bf66ea380c34498ae6d0 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Tue, 24 Mar 2026 10:58:37 +0530 Subject: [PATCH 14/49] fix(ai-chat): align dashboard chat runtime and prompts --- ddpui/celeryworkers/tasks.py | 194 +- ddpui/core/dashboard_chat/allowlist.py | 73 + ddpui/core/dashboard_chat/config.py | 10 +- ddpui/core/dashboard_chat/dbt_docs.py | 27 +- ddpui/core/dashboard_chat/ingestion.py | 79 +- ddpui/core/dashboard_chat/llm_client.py | 401 +-- ddpui/core/dashboard_chat/prompt_cache.py | 8 + ddpui/core/dashboard_chat/prompt_store.py | 233 ++ ddpui/core/dashboard_chat/runtime.py | 3039 +++++++++++++---- ddpui/core/dashboard_chat/runtime_types.py | 122 +- ddpui/core/dashboard_chat/session_cache.py | 101 + ddpui/core/dashboard_chat/session_service.py | 66 +- ddpui/core/dashboard_chat/vector_documents.py | 34 +- ddpui/core/dashboard_chat/vector_store.py | 161 +- ddpui/core/dashboard_chat/warehouse_tools.py | 14 +- .../0154_dashboardchatprompttemplate.py | 240 ++ ...boardchatsession_vector_collection_name.py | 17 + ddpui/models/dashboard_chat.py | 51 + .../core/dashboard_chat/test_ingestion.py | 150 +- .../core/dashboard_chat/test_llm_client.py | 189 + .../core/dashboard_chat/test_prompt_store.py | 69 + .../tests/core/dashboard_chat/test_runtime.py | 1821 ++++++++-- .../dashboard_chat/test_session_service.py | 65 +- ddpui/tests/core/dashboard_chat/test_tasks.py | 57 +- .../core/dashboard_chat/test_vector_store.py | 7 + .../dashboard_chat/test_warehouse_tools.py | 6 - .../test_dashboard_chat_consumer.py | 90 +- ddpui/websockets/dashboard_chat_consumer.py | 91 +- 28 files changed, 6017 insertions(+), 1398 deletions(-) create mode 100644 ddpui/core/dashboard_chat/prompt_cache.py create mode 100644 ddpui/core/dashboard_chat/prompt_store.py create mode 100644 ddpui/core/dashboard_chat/session_cache.py create mode 100644 ddpui/migrations/0154_dashboardchatprompttemplate.py create mode 100644 ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py create mode 100644 ddpui/tests/core/dashboard_chat/test_llm_client.py create mode 100644 ddpui/tests/core/dashboard_chat/test_prompt_store.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index ae97a5887..a16ca8c9e 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -97,14 +97,13 @@ ) from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.core import llm_service -from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService from ddpui.core.dashboard_chat.events import ( build_dashboard_chat_event, publish_dashboard_chat_event, ) -from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.core.dashboard_chat.session_service import ( create_dashboard_chat_assistant_message, + find_dashboard_chat_assistant_reply, list_dashboard_chat_history, serialize_dashboard_chat_message, ) @@ -1309,6 +1308,8 @@ def schedule_dashboard_chat_context_builds(): @app.task(bind=True) def build_dashboard_chat_context_for_org(self, org_id: int): """Build dashboard chat retrieval context for one org if the org is eligible.""" + from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService + org = ( Org.objects.select_related("dbt", "preferences") .filter(id=org_id, dbt__isnull=False) @@ -1364,6 +1365,90 @@ def build_dashboard_chat_context_for_org(self, org_id: int): @app.task def run_dashboard_chat_turn(session_id: str, user_message_id: int): """Run one dashboard chat turn asynchronously and emit websocket events.""" + try: + result = execute_dashboard_chat_turn(session_id, user_message_id) + session = result["session"] + user_message = result["user_message"] + assistant_message = result.get("assistant_message") + + if result["status"] == "completed" and assistant_message is not None: + publish_dashboard_chat_event( + str(session.session_id), + build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=session.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ), + ) + return { + "status": "completed", + "session_id": str(session.session_id), + "assistant_message_id": assistant_message.id, + } + + if result["status"] == "skipped_existing_reply" and assistant_message is not None: + logger.info( + "dashboard chat turn reused existing assistant message for session=%s message_id=%s", + session_id, + user_message_id, + ) + return { + "status": "skipped_existing_reply", + "session_id": str(session.session_id), + "assistant_message_id": assistant_message.id, + } + + if result["status"] == "skipped_missing_session": + logger.warning( + "dashboard chat turn skipped because session %s was not found or has no dashboard", + session_id, + ) + return {"status": "skipped_missing_session", "session_id": session_id} + + if result["status"] == "skipped_missing_message": + logger.warning( + "dashboard chat turn skipped because message %s was not found in session %s", + user_message_id, + session_id, + ) + return {"status": "skipped_missing_message", "session_id": session_id} + + raise RuntimeError(f"Unexpected dashboard chat turn status: {result['status']}") + except Exception: + logger.exception( + "dashboard chat turn failed for session=%s message_id=%s", + session_id, + user_message_id, + ) + from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession + + session = ( + DashboardChatSession.objects.select_related("dashboard") + .filter(session_id=session_id) + .first() + ) + user_message = ( + DashboardChatMessage.objects.filter(id=user_message_id, role="user").first() + ) + if session is not None and session.dashboard is not None and user_message is not None: + publish_dashboard_chat_event( + str(session.session_id), + build_dashboard_chat_event( + event_type="error", + session_id=str(session.session_id), + dashboard_id=session.dashboard.id, + message_id=str(user_message.id), + data={"message": "Something went wrong while generating the response"}, + ), + ) + raise + + +def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: + """Run one dashboard chat turn synchronously and persist the assistant reply.""" + from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession session = ( @@ -1372,11 +1457,7 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): .first() ) if session is None or session.dashboard is None: - logger.warning( - "dashboard chat turn skipped because session %s was not found or has no dashboard", - session_id, - ) - return {"status": "skipped_missing_session", "session_id": session_id} + return {"status": "skipped_missing_session", "session": None, "user_message": None} user_message = DashboardChatMessage.objects.filter( id=user_message_id, @@ -1384,71 +1465,46 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): role="user", ).first() if user_message is None: - logger.warning( - "dashboard chat turn skipped because message %s was not found in session %s", - user_message_id, - session_id, - ) - return {"status": "skipped_missing_message", "session_id": session_id} + return {"status": "skipped_missing_message", "session": session, "user_message": None} - try: - response = DashboardChatRuntime().run( - org=session.org, - dashboard_id=session.dashboard.id, - user_query=user_message.content, - conversation_history=list_dashboard_chat_history( - session, - exclude_message_id=user_message.id, - ), - ) - assistant_payload = { - "intent": response.intent.value, - "citations": [citation.to_dict() for citation in response.citations], - "related_dashboards": [ - related_dashboard.to_dict() for related_dashboard in response.related_dashboards - ], - "warnings": response.warnings, - "sql": response.sql, - "sql_results": response.sql_results, - "metadata": response.metadata, - } - assistant_message = create_dashboard_chat_assistant_message( - session=session, - content=response.answer_text, - payload=assistant_payload, - ) - publish_dashboard_chat_event( - str(session.session_id), - build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=session.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), - ), - ) + existing_assistant_message = find_dashboard_chat_assistant_reply( + session=session, + user_message=user_message, + ) + if existing_assistant_message is not None: return { - "status": "completed", - "session_id": str(session.session_id), - "assistant_message_id": assistant_message.id, + "status": "skipped_existing_reply", + "session": session, + "user_message": user_message, + "assistant_message": existing_assistant_message, } - except Exception: - logger.exception( - "dashboard chat turn failed for session=%s message_id=%s", - session_id, - user_message_id, - ) - publish_dashboard_chat_event( - str(session.session_id), - build_dashboard_chat_event( - event_type="error", - session_id=str(session.session_id), - dashboard_id=session.dashboard.id, - message_id=str(user_message.id), - data={"message": "Something went wrong while generating the response"}, - ), - ) - raise + + response = DashboardChatRuntime().run( + org=session.org, + dashboard_id=session.dashboard.id, + user_query=user_message.content, + session_id=str(session.session_id), + vector_collection_name=session.vector_collection_name, + conversation_history=list_dashboard_chat_history( + session, + exclude_message_id=user_message.id, + ), + ) + response_payload = response.to_dict() + assistant_payload = { + key: value for key, value in response_payload.items() if key != "answer_text" + } + assistant_message = create_dashboard_chat_assistant_message( + session=session, + content=response.answer_text, + payload=assistant_payload, + ) + return { + "status": "completed", + "session": session, + "user_message": user_message, + "assistant_message": assistant_message, + } @app.on_after_finalize.connect diff --git a/ddpui/core/dashboard_chat/allowlist.py b/ddpui/core/dashboard_chat/allowlist.py index b70ca2f55..29256f111 100644 --- a/ddpui/core/dashboard_chat/allowlist.py +++ b/ddpui/core/dashboard_chat/allowlist.py @@ -3,6 +3,7 @@ import json from dataclasses import dataclass, field from pathlib import Path +from typing import Any from ddpui.core.orgdbt_manager import DbtProjectManager from ddpui.models.org import OrgDbt @@ -185,3 +186,75 @@ def _add_unique_id_and_upstreams( nodes_by_unique_id=nodes_by_unique_id, visited=visited, ) + + @classmethod + def build_dbt_index( + cls, + manifest_json: dict | None, + allowlist: DashboardChatAllowlist, + ) -> dict[str, Any]: + """Build a compact allowlisted dbt index for deterministic model-search tools.""" + if not manifest_json: + return {"resources_by_unique_id": {}} + + nodes_by_unique_id = cls._manifest_nodes_by_unique_id(manifest_json) + parent_map = manifest_json.get("parent_map") or {} + child_map = manifest_json.get("child_map") or {} + resources_by_unique_id: dict[str, dict[str, Any]] = {} + + for unique_id in sorted(allowlist.allowed_unique_ids): + node = nodes_by_unique_id.get(unique_id) + if node is None: + continue + + table_name = cls._table_name_for_node(node) + resources_by_unique_id[unique_id] = { + "unique_id": unique_id, + "resource_type": str(node.get("resource_type") or ""), + "name": str(node.get("name") or ""), + "schema": str(node.get("schema") or ""), + "database": str(node.get("database") or ""), + "description": str(node.get("description") or ""), + "table": table_name, + "columns": [ + { + "name": str(column.get("name") or column_name), + "type": str(column.get("data_type") or column.get("type") or ""), + "description": str(column.get("description") or ""), + } + for column_name, column in (node.get("columns") or {}).items() + ], + "upstream": cls._lineage_entries( + unique_ids=parent_map.get(unique_id) or [], + nodes_by_unique_id=nodes_by_unique_id, + allowlist=allowlist, + ), + "downstream": cls._lineage_entries( + unique_ids=child_map.get(unique_id) or [], + nodes_by_unique_id=nodes_by_unique_id, + allowlist=allowlist, + ), + } + + return {"resources_by_unique_id": resources_by_unique_id} + + @classmethod + def _lineage_entries( + cls, + *, + unique_ids: list[str], + nodes_by_unique_id: dict[str, dict], + allowlist: DashboardChatAllowlist, + ) -> list[str]: + """Return compact allowlisted lineage labels for one dbt resource.""" + lineage_entries: list[str] = [] + for unique_id in unique_ids: + if not allowlist.is_unique_id_allowed(unique_id): + continue + node = nodes_by_unique_id.get(unique_id) + if node is None: + lineage_entries.append(unique_id) + continue + table_name = cls._table_name_for_node(node) + lineage_entries.append(table_name or str(node.get("name") or unique_id)) + return lineage_entries diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index d175e6657..6fe63fdb4 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -88,9 +88,9 @@ class DashboardChatRuntimeConfig: """Environment-backed configuration for dashboard chat orchestration.""" llm_model: str = "gpt-4o-mini" - llm_timeout_ms: int = 45000 + llm_timeout_ms: int = 12000 + llm_max_attempts: int = 1 retrieval_limit: int = 6 - related_dashboard_limit: int = 3 max_query_rows: int = 200 max_distinct_values: int = 50 max_schema_tables: int = 4 @@ -100,11 +100,9 @@ def from_env(cls) -> "DashboardChatRuntimeConfig": """Build runtime config from environment variables.""" return cls( llm_model=os.getenv("AI_DASHBOARD_CHAT_LLM_MODEL", "gpt-4o-mini"), - llm_timeout_ms=int(os.getenv("AI_DASHBOARD_CHAT_LLM_TIMEOUT_MS", "45000")), + llm_timeout_ms=int(os.getenv("AI_DASHBOARD_CHAT_LLM_TIMEOUT_MS", "12000")), + llm_max_attempts=int(os.getenv("AI_DASHBOARD_CHAT_LLM_MAX_ATTEMPTS", "1")), retrieval_limit=int(os.getenv("AI_DASHBOARD_CHAT_RETRIEVAL_LIMIT", "6")), - related_dashboard_limit=int( - os.getenv("AI_DASHBOARD_CHAT_RELATED_DASHBOARD_LIMIT", "3") - ), max_query_rows=int(os.getenv("AI_DASHBOARD_CHAT_MAX_QUERY_ROWS", "200")), max_distinct_values=int( os.getenv("AI_DASHBOARD_CHAT_MAX_DISTINCT_VALUES", "50") diff --git a/ddpui/core/dashboard_chat/dbt_docs.py b/ddpui/core/dashboard_chat/dbt_docs.py index 2c3e179a1..f487a6913 100644 --- a/ddpui/core/dashboard_chat/dbt_docs.py +++ b/ddpui/core/dashboard_chat/dbt_docs.py @@ -8,10 +8,12 @@ import yaml from django.utils import timezone +from ddpui.core.git_manager import GitManager, GitManagerError from ddpui.core.orgdbt_manager import DbtProjectManager from ddpui.ddpprefect import prefect_service -from ddpui.models.org import Org, OrgDbt +from ddpui.models.org import Org, OrgDbt, TransformType from ddpui.utils.custom_logger import CustomLogger +from ddpui.utils import secretsmanager logger = CustomLogger("ddpui.dashboard_chat.dbt_docs") @@ -30,13 +32,32 @@ class DashboardChatDbtDocsArtifacts: target_dir: Path +def _refresh_git_repo_if_needed(orgdbt: OrgDbt) -> None: + """Pull the latest changes for git-backed dbt projects before docs generation.""" + if orgdbt.transform_type != TransformType.GIT: + return + + repo_dir = Path(DbtProjectManager.get_dbt_project_dir(orgdbt)) + pat = None + if orgdbt.gitrepo_access_token_secret: + pat = secretsmanager.retrieve_github_pat(orgdbt.gitrepo_access_token_secret) + + try: + git_manager = GitManager(repo_local_path=str(repo_dir), pat=pat, validate_git=True) + git_manager.pull_changes() + except GitManagerError as error: + raise DashboardChatDbtDocsError( + f"Failed to refresh the local dbt repository for dashboard chat: {error}" + ) from error + + def _write_profiles_file(org: Org, orgdbt: OrgDbt, profiles_dir: Path) -> Path: """Write the dbt profiles.yml required for dbt CLI execution.""" if orgdbt.cli_profile_block is None: raise DashboardChatDbtDocsError("dbt CLI profile block not found") try: - dbt_project_params = DbtProjectManager.gather_dbt_project_params(org, orgdbt) + DbtProjectManager.gather_dbt_project_params(org, orgdbt) profile = prefect_service.get_dbt_cli_profile_block(orgdbt.cli_profile_block.block_name)[ "profile" ] @@ -60,6 +81,8 @@ def generate_dashboard_chat_dbt_docs_artifacts( if orgdbt is None: raise DashboardChatDbtDocsError("dbt workspace not configured") + _refresh_git_repo_if_needed(orgdbt) + with tempfile.TemporaryDirectory(prefix=f"dashboard-chat-dbt-{org.id}-") as profiles_dir: profile_path = _write_profiles_file(org, orgdbt, Path(profiles_dir)) profiles_dir_arg = str(profile_path.parent) diff --git a/ddpui/core/dashboard_chat/ingestion.py b/ddpui/core/dashboard_chat/ingestion.py index 8fe4dd075..562f94647 100644 --- a/ddpui/core/dashboard_chat/ingestion.py +++ b/ddpui/core/dashboard_chat/ingestion.py @@ -2,6 +2,7 @@ from collections import defaultdict from dataclasses import dataclass +from datetime import timedelta import json from typing import Callable @@ -18,7 +19,7 @@ ) from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext +from ddpui.models.dashboard_chat import DashboardAIContext, DashboardChatSession, OrgAIContext from ddpui.models.org import Org from ddpui.models.visualization import Chart from ddpui.services.dashboard_service import DashboardService @@ -115,6 +116,11 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: if org.dbt is None: raise DashboardChatIngestionError("dbt workspace not configured") + collection_versioned_at = timezone.now() + target_collection_name = self.vector_store.collection_name( + org.id, + version=collection_versioned_at, + ) dbt_docs = None if self.source_config.is_enabled( DashboardChatSourceType.DBT_MANIFEST @@ -127,29 +133,31 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: if self.source_config.is_enabled(source_type) for document in documents_by_source[source_type.value] ] - - existing_documents = self.vector_store.get_documents(org.id) - existing_document_ids = {document.document_id for document in existing_documents} - desired_document_ids = {document.document_id for document in desired_documents} - - new_documents = [ - document - for document in desired_documents - if document.document_id not in existing_document_ids - ] - upserted_document_ids: list[str] = [] - if new_documents: - upserted_document_ids = sorted( - self.vector_store.upsert_documents(org.id, new_documents) + if self.vector_store.load_collection( + org.id, + collection_name=target_collection_name, + allow_legacy_fallback=False, + ) is not None: + self.vector_store.delete_collection( + org.id, + collection_name=target_collection_name, ) - stale_document_ids = sorted(existing_document_ids - desired_document_ids) - if stale_document_ids: - self.vector_store.delete_documents(org.id, ids=stale_document_ids) + upserted_document_ids = sorted( + self.vector_store.upsert_documents( + org.id, + desired_documents, + collection_name=target_collection_name, + ) + ) - vector_ingested_at = timezone.now() - org.dbt.vector_last_ingested_at = vector_ingested_at + vector_ingested_at = collection_versioned_at + org.dbt.vector_last_ingested_at = collection_versioned_at org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) + self._garbage_collect_inactive_collections( + org=org, + active_collection_name=target_collection_name, + ) return DashboardChatIngestionResult( org_id=org.id, @@ -164,9 +172,38 @@ def ingest_org(self, org: Org) -> DashboardChatIngestionResult: for source_type in INGEST_SOURCE_ORDER }, upserted_document_ids=upserted_document_ids, - deleted_document_ids=stale_document_ids, + deleted_document_ids=[], ) + def _garbage_collect_inactive_collections( + self, + *, + org: Org, + active_collection_name: str, + ) -> None: + """Delete old versioned collections that are not pinned by recent chat sessions.""" + retention_cutoff = timezone.now() - timedelta(hours=24) + recent_sessions = DashboardChatSession.objects.filter( + org=org, + updated_at__gte=retention_cutoff, + ) + pinned_collection_names = { + collection_name + for collection_name in recent_sessions.values_list("vector_collection_name", flat=True) + if collection_name + } + if recent_sessions.filter(vector_collection_name__isnull=True).exists(): + pinned_collection_names.add(self.vector_store.collection_name(org.id)) + pinned_collection_names.add(active_collection_name) + + for collection_name in self.vector_store.list_org_collection_names(org.id): + if collection_name in pinned_collection_names: + continue + self.vector_store.delete_collection( + org.id, + collection_name=collection_name, + ) + def _build_documents( self, org: Org, diff --git a/ddpui/core/dashboard_chat/llm_client.py b/ddpui/core/dashboard_chat/llm_client.py index 9c48e1be9..fd936c0ad 100644 --- a/ddpui/core/dashboard_chat/llm_client.py +++ b/ddpui/core/dashboard_chat/llm_client.py @@ -1,20 +1,21 @@ """Direct OpenAI client wrapper for dashboard chat runtime.""" -from collections.abc import Sequence import json +import logging import os +from time import sleep from typing import Any, Protocol +from ddpui.core.dashboard_chat.prompt_store import DashboardChatPromptStore from ddpui.core.dashboard_chat.runtime_types import ( - DashboardChatConversationMessage, + DashboardChatConversationContext, + DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatPlanMode, - DashboardChatQueryPlan, - DashboardChatRetrievedDocument, - DashboardChatSqlDraft, - DashboardChatTextFilterPlan, ) +from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey + +logger = logging.getLogger("ddpui") class DashboardChatLlmClient(Protocol): @@ -23,273 +24,245 @@ class DashboardChatLlmClient(Protocol): def classify_intent( self, user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage], - dashboard_summary: str, + conversation_context: DashboardChatConversationContext, ) -> DashboardChatIntentDecision: """Classify the incoming query.""" - def plan_query( - self, - user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage], - dashboard_summary: str, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - schema_prompt: str, - allowlisted_tables: Sequence[str], - ) -> DashboardChatQueryPlan: - """Build a structured plan for the query.""" + def compose_small_talk(self, user_query: str) -> str: + """Compose a brief small-talk response describing dashboard chat capabilities.""" - def generate_sql( - self, - user_query: str, - dashboard_summary: str, - query_plan: DashboardChatQueryPlan, - schema_prompt: str, - distinct_values: dict[str, list[str]], - allowlisted_tables: Sequence[str], - ) -> DashboardChatSqlDraft: - """Generate SQL from the structured plan.""" + def get_prompt(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: + """Return one stored dashboard chat prompt.""" + + def reset_usage(self) -> None: + """Reset per-turn usage tracking before a new runtime invocation.""" - def compose_answer( + def run_tool_loop_turn( self, - user_query: str, - dashboard_summary: str, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - sql: str | None, - sql_results: list[dict[str, Any]] | None, - warnings: Sequence[str], - related_dashboard_titles: Sequence[str], - ) -> str: - """Compose the final answer text.""" + *, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + tool_choice: str, + operation: str, + ) -> dict[str, Any]: + """Run one prototype-style tool-loop completion.""" class OpenAIDashboardChatLlmClient: """Direct OpenAI SDK adapter with JSON-mode helpers.""" + TECHNICAL_DIFFICULTIES_MESSAGE = ( + "I'm experiencing technical difficulties. Please try again." + ) + def __init__( self, api_key: str | None = None, model: str = "gpt-4o-mini", - timeout_ms: int = 45000, + timeout_ms: int = 12000, + max_attempts: int = 1, client: Any = None, + prompt_store: DashboardChatPromptStore | None = None, ): self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.model = model self.timeout_ms = timeout_ms + self.max_attempts = max(1, max_attempts) + self.prompt_store = prompt_store or DashboardChatPromptStore() + self.usage_events: list[dict[str, Any]] = [] if client is None: if not self.api_key: raise ValueError("OPENAI_API_KEY must be set for dashboard chat runtime") from openai import OpenAI - client = OpenAI(api_key=self.api_key, timeout=timeout_ms / 1000) + client = OpenAI( + api_key=self.api_key, + timeout=timeout_ms / 1000, + max_retries=0, + ) self.client = client + def reset_usage(self) -> None: + """Reset aggregated OpenAI usage before one new chat turn.""" + self.usage_events = [] + def classify_intent( self, user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage], - dashboard_summary: str, + conversation_context: DashboardChatConversationContext, ) -> DashboardChatIntentDecision: - """Classify intent with lightweight conversation awareness.""" - prompt = { - "dashboard_summary": dashboard_summary, - "conversation_history": [message.__dict__ for message in conversation_history[-6:]], - "user_query": user_query, - } - result = self._complete_json( - system_prompt=( - "Classify the user query for an NGO dashboard assistant. " - "Return JSON with keys intent, reason, force_sql_path, clarification_question. " - "Allowed intents: data_query, context_query, needs_clarification, small_talk, irrelevant. " - "Set force_sql_path=true for any query that asks for counts, trends, breakdowns, comparisons, " - "filters, or tabular data." - ), - user_prompt=json.dumps(prompt, ensure_ascii=True), + """Classify intent with prototype-style conversation awareness.""" + system_prompt = self.prompt_store.get( + DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION ) - intent_value = result.get("intent", DashboardChatIntent.CONTEXT_QUERY.value) + if conversation_context.last_sql_query or conversation_context.last_chart_ids: + system_prompt += ( + "\n\nCONVERSATION CONTEXT:\n" + f"- Previous SQL: {conversation_context.last_sql_query or 'None'}\n" + f"- Previous tables: {', '.join(conversation_context.last_tables_used) or 'None'}\n" + f"- Previous charts: {', '.join(conversation_context.last_chart_ids) or 'None'}\n" + f"- Last response type: {conversation_context.last_response_type or 'None'}\n\n" + "Use this context to detect follow-up queries that want to modify or expand on previous results." + ) + try: + result = self._complete_json( + operation="intent_classification", + system_prompt=system_prompt, + user_prompt=f"Classify this query: {user_query}", + ) + except Exception: + logger.exception("Dashboard chat intent classification failed") + return DashboardChatIntentDecision( + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + confidence=0.0, + reason="Intent classification failed", + clarification_question=self.TECHNICAL_DIFFICULTIES_MESSAGE, + ) + intent_value = result.get("intent", DashboardChatIntent.QUERY_WITHOUT_SQL.value) try: intent = DashboardChatIntent(intent_value) except ValueError: - intent = DashboardChatIntent.CONTEXT_QUERY + intent = DashboardChatIntent.QUERY_WITHOUT_SQL + follow_up_result = result.get("follow_up_context") or {} + follow_up_context = DashboardChatFollowUpContext( + is_follow_up=bool(follow_up_result.get("is_follow_up")), + follow_up_type=follow_up_result.get("follow_up_type"), + reusable_elements=follow_up_result.get("reusable_elements") or {}, + modification_instruction=follow_up_result.get("modification_instruction"), + ) return DashboardChatIntentDecision( intent=intent, + confidence=float(result.get("confidence") or 0.0), reason=str(result.get("reason") or "LLM classification"), - force_sql_path=bool(result.get("force_sql_path", intent == DashboardChatIntent.DATA_QUERY)), - clarification_question=result.get("clarification_question"), - ) - - def plan_query( - self, - user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage], - dashboard_summary: str, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - schema_prompt: str, - allowlisted_tables: Sequence[str], - ) -> DashboardChatQueryPlan: - """Generate a structured execution plan.""" - prompt = { - "dashboard_summary": dashboard_summary, - "conversation_history": [message.__dict__ for message in conversation_history[-6:]], - "retrieved_documents": [ - { - "source_type": document.source_type, - "source_identifier": document.source_identifier, - "content": document.content[:500], - } - for document in retrieved_documents[:8] - ], - "schema_prompt": schema_prompt, - "allowlisted_tables": list(allowlisted_tables), - "user_query": user_query, - } - result = self._complete_json( - system_prompt=( - "Plan how to answer the dashboard question. " - "Return JSON with keys mode, reason, relevant_tables, schema_lookup_tables, text_filters, " - "answer_strategy, clarification_question. " - "Allowed modes: sql, context, clarify. " - "text_filters must be an array of objects with table_name, column_name, requested_value. " - "If the question can be answered from context or retrieved docs without SQL, choose context. " - "If the question needs row-level or aggregate data, choose sql." - ), - user_prompt=json.dumps(prompt, ensure_ascii=True), - ) - mode_value = result.get("mode", DashboardChatPlanMode.CONTEXT.value) - try: - mode = DashboardChatPlanMode(mode_value) - except ValueError: - mode = DashboardChatPlanMode.CONTEXT - return DashboardChatQueryPlan( - mode=mode, - reason=str(result.get("reason") or "LLM plan"), - relevant_tables=_normalize_table_list(result.get("relevant_tables")), - schema_lookup_tables=_normalize_table_list(result.get("schema_lookup_tables")), - text_filters=[ - DashboardChatTextFilterPlan( - table_name=str(item.get("table_name") or "").lower(), - column_name=str(item.get("column_name") or ""), - requested_value=str(item.get("requested_value") or ""), + missing_info=[str(item) for item in result.get("missing_info", []) if item], + force_tool_usage=bool( + result.get( + "force_tool_usage", + intent + in { + DashboardChatIntent.QUERY_WITH_SQL, + DashboardChatIntent.FOLLOW_UP_SQL, + }, ) - for item in result.get("text_filters", []) - if item.get("table_name") and item.get("column_name") and item.get("requested_value") - ], - answer_strategy=result.get("answer_strategy"), - clarification_question=result.get("clarification_question"), - ) - - def generate_sql( - self, - user_query: str, - dashboard_summary: str, - query_plan: DashboardChatQueryPlan, - schema_prompt: str, - distinct_values: dict[str, list[str]], - allowlisted_tables: Sequence[str], - ) -> DashboardChatSqlDraft: - """Generate a single read-only SQL statement.""" - prompt = { - "dashboard_summary": dashboard_summary, - "query_plan": { - "mode": query_plan.mode.value, - "reason": query_plan.reason, - "relevant_tables": query_plan.relevant_tables, - "schema_lookup_tables": query_plan.schema_lookup_tables, - "text_filters": [text_filter.__dict__ for text_filter in query_plan.text_filters], - "answer_strategy": query_plan.answer_strategy, - }, - "schema_prompt": schema_prompt, - "distinct_values": distinct_values, - "allowlisted_tables": list(allowlisted_tables), - "user_query": user_query, - } - result = self._complete_json( - system_prompt=( - "Generate one safe read-only SQL query. " - "Return JSON with keys sql, reason, warnings, clarification_question. " - "The SQL must be a single SELECT or WITH...SELECT statement that only references allowlisted tables. " - "Use exact values from the provided distinct_values map for text filters when available. " - "If the question cannot be answered safely, return sql as null and provide clarification_question." ), - user_prompt=json.dumps(prompt, ensure_ascii=True), - ) - sql = result.get("sql") - if sql is not None: - sql = str(sql).strip() - return DashboardChatSqlDraft( - sql=sql or None, - reason=str(result.get("reason") or "LLM SQL draft"), - warnings=[str(warning) for warning in result.get("warnings", [])], clarification_question=result.get("clarification_question"), + follow_up_context=follow_up_context, ) - def compose_answer( - self, - user_query: str, - dashboard_summary: str, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - sql: str | None, - sql_results: list[dict[str, Any]] | None, - warnings: Sequence[str], - related_dashboard_titles: Sequence[str], - ) -> str: - """Compose the final user-facing answer.""" - response = self.client.chat.completions.create( - model=self.model, - temperature=0, + def compose_small_talk(self, user_query: str) -> str: + """Generate a brief friendly response using the prototype capabilities prompt.""" + response = self._create_chat_completion( messages=[ { "role": "system", - "content": ( - "You answer NGO dashboard questions. " - "Use plain language, cite data-backed claims carefully, and avoid exposing hidden reasoning. " - "If SQL results are empty, say that no matching rows were found." - ), - }, - { - "role": "user", - "content": json.dumps( - { - "dashboard_summary": dashboard_summary, - "user_query": user_query, - "retrieved_documents": [ - { - "source_type": document.source_type, - "source_identifier": document.source_identifier, - "content": document.content[:400], - } - for document in retrieved_documents[:8] - ], - "sql": sql, - "sql_results": sql_results, - "warnings": list(warnings), - "related_dashboards": list(related_dashboard_titles), - }, - ensure_ascii=True, + "content": self.prompt_store.get( + DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES ), }, + {"role": "user", "content": user_query}, ], + temperature=0.5, + max_tokens=80, ) + self._record_usage("small_talk", response) answer = response.choices[0].message.content or "" return answer.strip() - def _complete_json(self, system_prompt: str, user_prompt: str) -> dict[str, Any]: + def get_prompt(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: + """Return one stored dashboard chat prompt.""" + return self.prompt_store.get(prompt_key) + + def run_tool_loop_turn( + self, + *, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + tool_choice: str, + operation: str, + ) -> dict[str, Any]: + """Run one raw OpenAI tool-calling turn and normalize the response.""" + try: + response = self._create_chat_completion( + messages=messages, + tools=tools, + tool_choice=tool_choice, + temperature=0, + ) + except Exception: + return {"content": self.TECHNICAL_DIFFICULTIES_MESSAGE, "tool_calls": []} + self._record_usage(operation, response) + message = response.choices[0].message + tool_calls: list[dict[str, Any]] = [] + if message.tool_calls: + for tool_call in message.tool_calls: + tool_calls.append( + { + "id": tool_call.id, + "name": tool_call.function.name, + "args": tool_call.function.arguments, + } + ) + return {"content": message.content or "", "tool_calls": tool_calls} + + def _complete_json(self, operation: str, system_prompt: str, user_prompt: str) -> dict[str, Any]: """Run a JSON-mode chat completion and parse the result.""" - response = self.client.chat.completions.create( - model=self.model, - temperature=0, - response_format={"type": "json_object"}, + response = self._create_chat_completion( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], + temperature=0, + response_format={"type": "json_object"}, ) + self._record_usage(operation, response) content = response.choices[0].message.content or "{}" return json.loads(content) + def usage_summary(self) -> dict[str, Any]: + """Return aggregated OpenAI chat-completion usage for the current turn.""" + totals = { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + } + for event in self.usage_events: + totals["prompt_tokens"] += event.get("prompt_tokens", 0) + totals["completion_tokens"] += event.get("completion_tokens", 0) + totals["total_tokens"] += event.get("total_tokens", 0) + return { + "model": self.model, + "calls": list(self.usage_events), + "totals": totals, + } + + def _record_usage(self, operation: str, response: Any) -> None: + """Capture usage data from one OpenAI response when available.""" + usage = getattr(response, "usage", None) + if usage is None: + return + self.usage_events.append( + { + "operation": operation, + "model": self.model, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "completion_tokens": getattr(usage, "completion_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + ) -def _normalize_table_list(value: Any) -> list[str]: - """Normalize a JSON value into a lowercased table list.""" - if not isinstance(value, list): - return [] - return [str(table_name).lower() for table_name in value if table_name] + def _create_chat_completion(self, **kwargs: Any) -> Any: + """Run one OpenAI chat completion with a small interactive retry envelope.""" + last_error: Exception | None = None + for attempt in range(self.max_attempts): + try: + return self.client.chat.completions.create( + model=self.model, + **kwargs, + ) + except Exception as error: + last_error = error + if attempt == self.max_attempts - 1: + break + sleep(min(2**attempt, 2)) + assert last_error is not None + raise last_error diff --git a/ddpui/core/dashboard_chat/prompt_cache.py b/ddpui/core/dashboard_chat/prompt_cache.py new file mode 100644 index 000000000..b27bdd544 --- /dev/null +++ b/ddpui/core/dashboard_chat/prompt_cache.py @@ -0,0 +1,8 @@ +"""Cache helpers for dashboard chat prompt templates.""" + +DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS = 24 * 60 * 60 + + +def build_dashboard_chat_prompt_cache_key(prompt_key: str) -> str: + """Return the cache key used for one dashboard chat prompt template.""" + return f"dashboard_chat_prompt:{prompt_key}" diff --git a/ddpui/core/dashboard_chat/prompt_store.py b/ddpui/core/dashboard_chat/prompt_store.py new file mode 100644 index 000000000..84e79d914 --- /dev/null +++ b/ddpui/core/dashboard_chat/prompt_store.py @@ -0,0 +1,233 @@ +"""Database-backed prompt template lookup for dashboard chat.""" + +from django.core.cache import cache + +from ddpui.core.dashboard_chat.prompt_cache import ( + DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS, + build_dashboard_chat_prompt_cache_key, +) +from ddpui.models.dashboard_chat import ( + DashboardChatPromptTemplate, + DashboardChatPromptTemplateKey, +) + +PROTOTYPE_INTENT_CLASSIFICATION_PROMPT = """# Enhanced Intent Classification System Prompt + +You are an intent classification agent for a "Chat with Dashboards" system. Your job is to classify user queries about the CURRENT dashboard, its charts, its datasets, the dbt models that power it, and the organization/dashboard context attached to it. Questions about other dashboards, similar dashboards, or dashboards beyond the current one are **irrelevant**. + +## Intent Categories + +1. **query_with_sql** - Needs data analysis (numbers, trends, rankings, breakdowns, comparisons) +2. **query_without_sql** - Can be answered from metadata (definitions, calculation logic, chart explanations) +3. **follow_up_sql** - Follow-up query that modifies previous SQL query (add dimension, filter, timeframe) +4. **follow_up_context** - Follow-up requesting more explanation about previous results +5. **needs_clarification** - Question is too vague or ambiguous +6. **small_talk** - Greetings, jokes, non-business conversation +7. **irrelevant** - Questions outside the current dashboard's scope, including requests about other dashboards + +## Classification Guidelines + +**query_with_sql** examples: +- "How many students are in the EcoChamps program?" +- "Show me session completion trends over time" +- "Top 10 schools by assessment performance" +- "Compare reading comprehension by city" +- "What's the monthly breakdown of planned vs conducted sessions?" + +**query_without_sql** examples: +- "What does 'planned_session' mean?" +- "How is reading comprehension calculated?" +- "Which dataset powers the student count chart?" +- "What metrics are available in this dashboard?" +- "Explain what this chart shows" +- "What is the mission and vision of Bhumi?" +- "Summarize the Bhumi programs described in the context file" + +**follow_up_sql** examples (requires previous SQL context): +- "Now split by chapter" (add dimension) +- "Filter to CGI donors only" (add filter) +- "Same but for last quarter" (modify timeframe) +- "Show weekly instead" (change aggregation) + +**follow_up_context** examples (requires previous context): +- "Explain that metric" +- "How is that calculated?" +- "What does that mean?" +- "Tell me more about that" + +**needs_clarification** examples: +- "Is performance improving?" (missing: which metric, time period) +- "Show me the data" (missing: which data, program) +- "What's the biggest issue?" (missing: context, metric) + +## Follow-up Detection + +When conversation history is available, classify as follow-up **only if the new query depends on the previous turn**. Use all three tests: +1. Explicit reference to prior output ("that", "same", "those results", "the previous query"). +2. Modification language applied to prior query ("now split by", "filter that", "same but", "add chapter", "remove donor"). +3. Explanations about prior output ("explain that", "what does that mean"). + +If the question can stand alone and be answered without previous context, treat it as a new `query_with_sql` or `query_without_sql`, **not** follow_up_sql/follow_up_context. + +If so, classify as follow_up_sql or follow_up_context based on whether SQL modification is needed. + +## Current-Dashboard Boundary + +- Treat requests about "other dashboards", "related dashboards", "similar dashboards", or "which dashboard should I look at" as **irrelevant**. +- Treat requests that compare this dashboard to some other dashboard as **irrelevant** unless the question can be answered entirely from the current dashboard's own data and context. +- The assistant is scoped to one dashboard only. + +## Output Format + +Respond with valid JSON only: + +For new queries: +```json +{ + "intent": "query_with_sql", + "confidence": 0.9, + "reason": "User is asking for specific numbers requiring data analysis", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": false, + "follow_up_type": null, + "reusable_elements": {}, + "modification_instruction": null + } +} +``` + +For follow-up queries: +```json +{ + "intent": "follow_up_sql", + "confidence": 0.95, + "reason": "User wants to modify previous query by adding dimension", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": true, + "follow_up_type": "add_dimension", + "reusable_elements": { + "previous_sql": "from conversation context", + "previous_tables": ["staging.eco_student25_26_stg"], + "add_instruction": "group by chapter" + }, + "modification_instruction": "split by chapter" + } +} +``` + +## Tool Usage Rules + +Set `force_tool_usage: true` for: +- All query_with_sql intents +- All follow_up_sql intents +- query_without_sql when specific chart/dataset lookup needed + +Set `force_tool_usage: false` for: +- small_talk, needs_clarification, irrelevant +- query_without_sql for general explanation questions + +## Context Awareness + +Use conversation history to: +- Detect follow-up patterns +- Understand context references ("that metric", "same query") +- Determine if SQL modification or explanation is needed +- Extract reusable elements (tables, metrics, filters) from previous queries + +Classify the following user query:""" + +PROTOTYPE_NEW_QUERY_SYSTEM_PROMPT = """You are a data analysis assistant with access to tools. Your job is to help users understand program data and answer their questions accurately. + +IMPORTANT RULES: +1. For data questions: ALWAYS start by searching for relevant charts using retrieve_docs +2. Use chart metadata to identify which datasets/tables to query - charts are your roadmap to data +3. For definition questions: You may use tools to get context or answer from human context +4. Never guess table names, column names, or data values +5. Always call get_distinct_values before using WHERE clauses on text columns +6. Only write SELECT queries, never INSERT/UPDATE/DELETE +7. CRITICAL: When list_tables_by_keyword returns tables, you MUST use the EXACT table names returned - never modify schema or table names +8. NEVER assume tables exist in specific schemas - always discover them using list_tables_by_keyword first +9. When counting entities (students, people, sites, states, programs, cases, etc.), avoid COUNT(*). Prefer COUNT(DISTINCT ) using the most specific ID/name field available (e.g., student_id, roll_no, state_name). If unsure which field uniquely identifies the entity, inspect schema first, and fetch distinct values for candidate ID columns before writing SQL. +9. When you propose SQL, immediately call run_sql_query to execute it. Do not ask for confirmation. +10. Call get_distinct_values only for columns you plan to filter in the current query. +11. Limit get_schema_snippets to the tables you intend to query (avoid extra tables). +12. If a requested geographic/location field is missing, choose the most specific available location dimension (e.g., city → chapter → school) and answer using that, explicitly noting the substitution in the response. +13. When someone asks for "changes" in metrics, look for increases and decreases by comparing values across time periods (baseline vs midline vs endline) or comparing current vs previous periods. +14. Only use the EXACT schema-qualified table names returned by the tools. Do not rewrite schemas or table names. +15. IMPORTANT: Only tables relevant to the current dashboard are accessible. If a table is not found, it may not be relevant to this dashboard. Use charts from the current dashboard to guide your analysis. +16. Do not suggest other dashboards. If the question asks about dashboards beyond the current one, stay within the current dashboard context and answer only with data available here. + +Available tools: +- retrieve_docs: Find relevant charts, datasets, context, or dbt models +- search_dbt_models: Search for dbt models by keyword +- get_dbt_model_info: Get detailed info about a specific dbt model +- get_schema_snippets: Get column names and types for tables +- get_distinct_values: Get actual values in a column (required before WHERE clauses) +- check_table_row_count: Check if a table has data before querying +- run_sql_query: Execute a read-only SQL query + +Tool usage flow for data questions: +1. FIRST: Call retrieve_docs to find relevant CHARTS that match the question +2. If charts found: Use the dataset/table names from chart metadata to guide your queries +3. If no relevant chart datasets found: ALWAYS call list_tables_by_keyword with the main entity (e.g. "students", "fellowship", "baseline") +4. Call get_schema_snippets ONLY for the exact table names returned by list_tables_by_keyword +5. Use the EXACT table names from step 3/4 in your SQL queries - do not change schema or table names +6. If filtering: Call get_distinct_values for filter columns +7. ALWAYS call run_sql_query with validated SQL - NEVER give up without trying""" + +PROTOTYPE_FOLLOW_UP_SYSTEM_PROMPT = """You are handling a follow-up query that modifies a previous question. + +FOLLOW-UP RULES: +1. Reuse context from the previous query when possible (tables, metrics, base SQL) +2. For SQL modifications: modify the previous SQL rather than starting from scratch +3. For new filters: ALWAYS call get_distinct_values first +4. For new dimensions: ensure the column exists in the schema +5. When you generate SQL, execute it by calling run_sql_query immediately; do not ask for confirmation. +6. Only fetch distinct values for columns you will filter, and limit schema lookups to tables you plan to query. +7. Stay within the current dashboard only. Do not suggest or switch to other dashboards.""" + +PROTOTYPE_SMALL_TALK_CAPABILITIES_PROMPT = ( + "You are a helpful assistant for questions about the current dashboard. " + "Briefly explain what you can do: retrieve dashboard/chart/dbt context, " + "run safe read-only SQL for counts/trends/breakdowns, and clarify metrics from this dashboard. " + "Keep answers concise, friendly, and non-technical when possible." +) + +DEFAULT_DASHBOARD_CHAT_PROMPTS = { + DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION: PROTOTYPE_INTENT_CLASSIFICATION_PROMPT, + DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM: PROTOTYPE_NEW_QUERY_SYSTEM_PROMPT, + DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM: PROTOTYPE_FOLLOW_UP_SYSTEM_PROMPT, + DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES: ( + PROTOTYPE_SMALL_TALK_CAPABILITIES_PROMPT + ), +} + + +class DashboardChatPromptStore: + """Cached lookup for dashboard chat prompt templates.""" + + def get(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: + """Return one prompt template from cache, DB, or built-in defaults.""" + normalized_prompt_key = ( + prompt_key.value + if isinstance(prompt_key, DashboardChatPromptTemplateKey) + else str(prompt_key) + ) + cache_key = build_dashboard_chat_prompt_cache_key(normalized_prompt_key) + cached_prompt = cache.get(cache_key) + if cached_prompt is not None: + return cached_prompt + + stored_prompt = ( + DashboardChatPromptTemplate.objects.filter(key=normalized_prompt_key) + .values_list("prompt", flat=True) + .first() + ) + prompt = ( + stored_prompt + or DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey(normalized_prompt_key)] + ) + cache.set(cache_key, prompt, DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS) + return prompt diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py index dc342b2ba..689d5d087 100644 --- a/ddpui/core/dashboard_chat/runtime.py +++ b/ddpui/core/dashboard_chat/runtime.py @@ -1,15 +1,21 @@ -"""LangGraph runtime for dashboard chat orchestration.""" +"""Prototype-faithful LangGraph runtime for dashboard chat orchestration.""" from collections.abc import Callable, Sequence import json +import logging import re from typing import Any, TypedDict +from django.core.cache import cache +from django.core.serializers.json import DjangoJSONEncoder +from django.db import close_old_connections, connections from langgraph.graph import END, START, StateGraph from ddpui.core.dashboard_chat.allowlist import ( DashboardChatAllowlist, DashboardChatAllowlistBuilder, + build_dashboard_chat_table_name, + normalize_dashboard_chat_table_name, ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig @@ -17,17 +23,25 @@ DashboardChatLlmClient, OpenAIDashboardChatLlmClient, ) +from ddpui.core.dashboard_chat.session_cache import ( + DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, + build_dashboard_chat_session_snapshot_cache_key, + deserialize_allowlist, + deserialize_distinct_cache, + deserialize_schema_snippets, + serialize_allowlist, + serialize_distinct_cache, + serialize_schema_snippets, +) from ddpui.core.dashboard_chat.runtime_types import ( DashboardChatCitation, + DashboardChatConversationContext, DashboardChatConversationMessage, DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatPlanMode, - DashboardChatQueryPlan, - DashboardChatRelatedDashboard, DashboardChatResponse, DashboardChatRetrievedDocument, - DashboardChatSqlDraft, + DashboardChatSchemaSnippet, DashboardChatSqlValidationResult, ) from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard @@ -37,69 +51,196 @@ DashboardChatWarehouseTools, DashboardChatWarehouseToolsError, ) -from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import ( + DashboardChatPromptTemplateKey, +) from ddpui.models.org import Org from ddpui.services.dashboard_service import DashboardService -SIMPLE_GREETINGS = {"hi", "hey", "hello", "thanks", "thank you", "gm", "good morning"} -DATA_QUERY_KEYWORDS = { - "count", - "counts", - "trend", - "compare", - "breakdown", - "how many", - "total", - "sum", - "average", - "avg", - "top", - "bottom", - "show me", - "list", - "split by", - "group by", -} -CONTEXT_QUERY_KEYWORDS = { - "what does", - "explain", - "definition", - "metric", - "why", - "how is", - "which chart", - "which dataset", - "context", -} +logger = logging.getLogger(__name__) +GREETING_PATTERN = re.compile( + r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you)\b[\s!.?]*$", + re.IGNORECASE, +) class DashboardChatRuntimeState(TypedDict, total=False): - """LangGraph state for dashboard chat.""" + """LangGraph state for one dashboard chat turn.""" org: Org dashboard_id: int + session_id: str | None + vector_collection_name: str | None user_query: str conversation_history: list[DashboardChatConversationMessage] + conversation_context: DashboardChatConversationContext + small_talk_response: str | None dashboard_export: dict[str, Any] - dashboard_summary: str + dbt_index: dict[str, Any] allowlist: DashboardChatAllowlist + session_schema_cache: dict[str, DashboardChatSchemaSnippet] + session_distinct_cache: set[tuple[str, str, str]] intent_decision: DashboardChatIntentDecision retrieved_documents: list[DashboardChatRetrievedDocument] citations: list[DashboardChatCitation] - related_dashboards: list[DashboardChatRelatedDashboard] - schema_prompt: str - schema_snippets: dict[str, Any] - query_plan: DashboardChatQueryPlan - distinct_values: dict[str, list[str]] - sql_draft: DashboardChatSqlDraft | None + tool_calls: list[dict[str, Any]] + sql: str | None sql_validation: DashboardChatSqlValidationResult | None sql_results: list[dict[str, Any]] | None warnings: list[str] + usage: dict[str, Any] response: DashboardChatResponse class DashboardChatRuntime: - """Run dashboard chat queries through a LangGraph workflow.""" + """Run dashboard chat turns with the prototype's explicit intent routing and tool loop.""" + + TOOL_SPECIFICATIONS = [ + { + "type": "function", + "function": { + "name": "retrieve_docs", + "description": "Search for relevant charts, datasets, dbt models, or context sections.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "types": { + "type": "array", + "items": { + "type": "string", + "enum": ["chart", "dataset", "context", "dbt_model"], + }, + "description": "Document types to search", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_schema_snippets", + "description": "Get column information for database tables.", + "parameters": { + "type": "object", + "properties": { + "tables": { + "type": "array", + "items": {"type": "string"}, + "description": "Fully-qualified table names (schema.table)", + } + }, + "required": ["tables"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "search_dbt_models", + "description": "Search dbt models by keyword to find relevant data models.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query for model names/descriptions", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_dbt_model_info", + "description": "Get detailed information about a specific dbt model.", + "parameters": { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "description": "Model name or schema.table", + } + }, + "required": ["model_name"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_distinct_values", + "description": "Get distinct values for a column (required before filtering on text columns).", + "parameters": { + "type": "object", + "properties": { + "table": { + "type": "string", + "description": "Fully-qualified table name", + }, + "column": {"type": "string", "description": "Column name"}, + "limit": {"type": "integer", "minimum": 1, "maximum": 200, "default": 50}, + }, + "required": ["table", "column"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "run_sql_query", + "description": "Execute a read-only SQL query on the database.", + "parameters": { + "type": "object", + "properties": { + "sql": {"type": "string", "description": "SELECT query to execute"} + }, + "required": ["sql"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "list_tables_by_keyword", + "description": "Find tables whose name or columns match a keyword (no hard-coding).", + "parameters": { + "type": "object", + "properties": { + "keyword": { + "type": "string", + "description": "Keyword such as donor, funding, student", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 50, "default": 15}, + }, + "required": ["keyword"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "check_table_row_count", + "description": "Get the total number of rows in a table to check if it has data.", + "parameters": { + "type": "object", + "properties": { + "table": { + "type": "string", + "description": "Fully-qualified table name (schema.table)", + } + }, + "required": ["table"], + }, + }, + }, + ] def __init__( self, @@ -115,6 +256,7 @@ def __init__( self.llm_client = llm_client or OpenAIDashboardChatLlmClient( model=self.runtime_config.llm_model, timeout_ms=self.runtime_config.llm_timeout_ms, + max_attempts=self.runtime_config.llm_max_attempts, ) self.warehouse_tools_factory = warehouse_tools_factory or ( lambda org: DashboardChatWarehouseTools( @@ -129,35 +271,51 @@ def run( org: Org, dashboard_id: int, user_query: str, - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] + session_id: str | None = None, + vector_collection_name: str | None = None, + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None = None, ) -> DashboardChatResponse: - """Run a single dashboard chat turn and return the structured response.""" + """Run one dashboard chat turn.""" + if hasattr(self.llm_client, "reset_usage"): + self.llm_client.reset_usage() + if hasattr(self.vector_store, "reset_usage"): + self.vector_store.reset_usage() initial_state: DashboardChatRuntimeState = { "org": org, "dashboard_id": dashboard_id, + "session_id": session_id, + "vector_collection_name": vector_collection_name, "user_query": user_query, "conversation_history": self._normalize_conversation_history(conversation_history), "warnings": [], + "usage": {}, } final_state = self.graph.invoke(initial_state) return final_state["response"] def _build_graph(self): - """Create the LangGraph state machine.""" + """Build the explicit prototype-aligned intent graph.""" graph = StateGraph(DashboardChatRuntimeState) - graph.add_node("load_context", self._node_load_context) - graph.add_node("route_intent", self._node_route_intent) - graph.add_node("build_allowlist", self._node_build_allowlist) - graph.add_node("retrieve_docs", self._node_retrieve_docs) - graph.add_node("load_schema_snippets", self._node_load_schema_snippets) - graph.add_node("plan_query", self._node_plan_query) - graph.add_node("lookup_distinct_values", self._node_lookup_distinct_values) - graph.add_node("generate_sql", self._node_generate_sql) - graph.add_node("validate_sql", self._node_validate_sql) - graph.add_node("execute_sql", self._node_execute_sql) - graph.add_node("compose_answer", self._node_compose_answer) - graph.add_node("finalize", self._node_finalize_response) + graph.add_node("load_context", self._wrap_node(self._node_load_context)) + graph.add_node("route_intent", self._wrap_node(self._node_route_intent)) + graph.add_node("handle_small_talk", self._wrap_node(self._node_handle_small_talk)) + graph.add_node("handle_irrelevant", self._wrap_node(self._node_handle_irrelevant)) + graph.add_node( + "handle_needs_clarification", + self._wrap_node(self._node_handle_needs_clarification), + ) + graph.add_node("handle_query_with_sql", self._wrap_node(self._node_handle_query_with_sql)) + graph.add_node( + "handle_query_without_sql", + self._wrap_node(self._node_handle_query_without_sql), + ) + graph.add_node("handle_follow_up_sql", self._wrap_node(self._node_handle_follow_up_sql)) + graph.add_node( + "handle_follow_up_context", + self._wrap_node(self._node_handle_follow_up_context), + ) + graph.add_node("finalize", self._wrap_node(self._node_finalize_response)) graph.add_edge(START, "load_context") graph.add_edge("load_context", "route_intent") @@ -165,389 +323,1027 @@ def _build_graph(self): "route_intent", self._route_after_intent, { - "compose_answer": "compose_answer", - "build_allowlist": "build_allowlist", - }, - ) - graph.add_edge("build_allowlist", "retrieve_docs") - graph.add_edge("retrieve_docs", "load_schema_snippets") - graph.add_edge("load_schema_snippets", "plan_query") - graph.add_conditional_edges( - "plan_query", - self._route_after_plan, - { - "compose_answer": "compose_answer", - "lookup_distinct_values": "lookup_distinct_values", - }, - ) - graph.add_edge("lookup_distinct_values", "generate_sql") - graph.add_edge("generate_sql", "validate_sql") - graph.add_conditional_edges( - "validate_sql", - self._route_after_sql_validation, - { - "compose_answer": "compose_answer", - "execute_sql": "execute_sql", + DashboardChatIntent.SMALL_TALK.value: "handle_small_talk", + DashboardChatIntent.IRRELEVANT.value: "handle_irrelevant", + DashboardChatIntent.NEEDS_CLARIFICATION.value: "handle_needs_clarification", + DashboardChatIntent.QUERY_WITH_SQL.value: "handle_query_with_sql", + DashboardChatIntent.QUERY_WITHOUT_SQL.value: "handle_query_without_sql", + DashboardChatIntent.FOLLOW_UP_SQL.value: "handle_follow_up_sql", + DashboardChatIntent.FOLLOW_UP_CONTEXT.value: "handle_follow_up_context", }, ) - graph.add_edge("execute_sql", "compose_answer") - graph.add_edge("compose_answer", "finalize") + graph.add_edge("handle_small_talk", "finalize") + graph.add_edge("handle_irrelevant", "finalize") + graph.add_edge("handle_needs_clarification", "finalize") + graph.add_edge("handle_query_with_sql", "finalize") + graph.add_edge("handle_query_without_sql", "finalize") + graph.add_edge("handle_follow_up_sql", "finalize") + graph.add_edge("handle_follow_up_context", "finalize") graph.add_edge("finalize", END) return graph.compile() + @staticmethod + def _wrap_node(handler: Callable[[DashboardChatRuntimeState], DashboardChatRuntimeState]): + """Run each LangGraph node with thread-local Django DB cleanup.""" + + def wrapped(state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + close_old_connections() + try: + return handler(state) + finally: + connections.close_all() + + return wrapped + def _node_load_context(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Load dashboard context and summary.""" - dashboard_export = DashboardService.export_dashboard_context( - state["dashboard_id"], - state["org"], - ) - state["dashboard_export"] = dashboard_export - state["dashboard_summary"] = self._build_dashboard_summary(dashboard_export) + """Load or reuse the session-stable dashboard context snapshot.""" + snapshot = self._load_session_snapshot(state) + state["dashboard_export"] = snapshot["dashboard_export"] + state["dbt_index"] = snapshot["dbt_index"] + state["allowlist"] = snapshot["allowlist"] + state["session_schema_cache"] = snapshot["schema_cache"] + state["session_distinct_cache"] = snapshot["distinct_cache"] return state def _node_route_intent(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Classify the user query.""" - intent_decision = self._heuristic_intent_decision( + """Use the prototype router prompt for all non-trivial routing.""" + conversation_context = self._extract_conversation_context(state["conversation_history"]) + fast_path_intent = self._build_fast_path_intent(state["user_query"]) + if fast_path_intent is not None: + state["conversation_context"] = conversation_context + state["intent_decision"] = fast_path_intent + state["small_talk_response"] = self._build_fast_path_small_talk_response( + state["user_query"] + ) + return state + intent_decision = self.llm_client.classify_intent( user_query=state["user_query"], - conversation_history=state["conversation_history"], + conversation_context=conversation_context, ) - if intent_decision is None: - intent_decision = self.llm_client.classify_intent( - user_query=state["user_query"], - conversation_history=state["conversation_history"], - dashboard_summary=state["dashboard_summary"], - ) + state["conversation_context"] = conversation_context state["intent_decision"] = intent_decision return state - def _node_build_allowlist(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Build the dashboard table allowlist from export data and dbt lineage.""" - manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) - state["allowlist"] = DashboardChatAllowlistBuilder.build( - state["dashboard_export"], - manifest_json=manifest_json, + def _node_handle_small_talk( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Handle simple social turns without any tool use.""" + state["response"] = DashboardChatResponse( + answer_text=state.get("small_talk_response") + or self._compose_small_talk_response(state["user_query"]), + intent=DashboardChatIntent.SMALL_TALK, + usage=self._build_usage_summary(), ) return state - def _node_retrieve_docs(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Retrieve dashboard, org, and dbt context from Chroma.""" - org = state["org"] - dashboard_results = self._query_vector_store( - org=org, - query_text=state["user_query"], - source_types=self.source_config.filter_enabled( - [ - DashboardChatSourceType.DASHBOARD_EXPORT.value, - DashboardChatSourceType.DASHBOARD_CONTEXT.value, - ] + def _node_handle_irrelevant( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Handle questions outside dashboard chat scope.""" + state["response"] = DashboardChatResponse( + answer_text=( + "I can only answer questions about this dashboard, its charts, and the data behind them." ), - dashboard_id=state["dashboard_id"], + intent=DashboardChatIntent.IRRELEVANT, + usage=self._build_usage_summary(), ) - org_results = self._query_vector_store( - org=org, - query_text=state["user_query"], - source_types=self.source_config.filter_enabled( - [DashboardChatSourceType.ORG_CONTEXT.value] + return state + + def _node_handle_needs_clarification( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Ask for clarification when the router says the query is underspecified.""" + intent_decision = state["intent_decision"] + state["response"] = DashboardChatResponse( + answer_text=( + intent_decision.clarification_question + or self._clarification_fallback(intent_decision.missing_info) ), + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + usage=self._build_usage_summary(), ) - dbt_results = self._filter_allowlisted_dbt_results( - self._query_vector_store( - org=org, - query_text=state["user_query"], - source_types=self.source_config.filter_enabled( - [ - DashboardChatSourceType.DBT_MANIFEST.value, - DashboardChatSourceType.DBT_CATALOG.value, - ] - ), - ), - state["allowlist"], + return state + + def _node_handle_query_with_sql( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Run the prototype new-query tool loop for SQL-routed questions.""" + return self._run_prototype_intent(state, max_turns=15, follow_up=False) + + def _node_handle_query_without_sql( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Run the prototype new-query tool loop for context-only questions.""" + return self._run_prototype_intent(state, max_turns=15, follow_up=False) + + def _node_handle_follow_up_sql( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Run the prototype follow-up loop for SQL-modifying turns.""" + return self._run_prototype_intent(state, max_turns=6, follow_up=True) + + def _node_handle_follow_up_context( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Run the prototype follow-up loop for explanatory follow-ups.""" + return self._run_prototype_intent(state, max_turns=6, follow_up=True) + + def _run_prototype_intent( + self, + state: DashboardChatRuntimeState, + *, + max_turns: int, + follow_up: bool, + ) -> DashboardChatRuntimeState: + """Execute one prototype-style tool loop and store the response on state.""" + allowlist = state["allowlist"] + + query_embedding = self._embed_query( + state["user_query"], + embedding_cache={}, ) - retrieved_documents = self._merge_retrieval_results( - dashboard_results=dashboard_results, - org_results=org_results, - dbt_results=dbt_results, + messages = ( + self._build_follow_up_messages(state) + if follow_up + else self._build_new_query_messages(state) + ) + execution_result = self._execute_tool_loop( + state=state, + messages=messages, + max_turns=max_turns, + initial_embedding_cache={state["user_query"]: query_embedding}, ) - state["retrieved_documents"] = retrieved_documents + + state["retrieved_documents"] = execution_result["retrieved_documents"] state["citations"] = self._build_citations( - retrieved_documents=retrieved_documents, + retrieved_documents=execution_result["retrieved_documents"], dashboard_export=state["dashboard_export"], - allowlist=state["allowlist"], + allowlist=allowlist, ) - state["related_dashboards"] = self._build_related_dashboards( - org=org, - current_dashboard_id=state["dashboard_id"], - query_text=state["user_query"], + state["tool_calls"] = execution_result["tool_calls"] + state["sql"] = execution_result["sql"] + state["sql_validation"] = execution_result["sql_validation"] + state["sql_results"] = execution_result["sql_results"] + state["warnings"] = execution_result["warnings"] + state["response"] = DashboardChatResponse( + answer_text=execution_result["answer_text"], + intent=state["intent_decision"].intent, + citations=state["citations"], + warnings=execution_result["warnings"], + sql=execution_result["sql"], + sql_results=execution_result["sql_results"], + usage=self._build_usage_summary(), + tool_calls=execution_result["tool_calls"], ) return state - def _node_load_schema_snippets( + def _build_new_query_messages( self, state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Load schema snippets for the relevant dashboard tables.""" + ) -> list[dict[str, Any]]: + """Build the prototype new-query message stack.""" + system_prompt = self.llm_client.get_prompt( + DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM + ) + return [ + { + "role": "system", + "content": system_prompt, + }, + {"role": "user", "content": state["user_query"]}, + ] + + def _build_follow_up_messages( + self, + state: DashboardChatRuntimeState, + ) -> list[dict[str, Any]]: + """Build the prototype follow-up message stack.""" + modification_type = self._detect_sql_modification_type(state["user_query"]) + system_prompt = self.llm_client.get_prompt( + DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM + ) + return [ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "system", + "content": self._build_follow_up_context_prompt( + state["conversation_context"], + state["user_query"], + ), + }, + {"role": "system", "content": f"MODIFICATION_TYPE: {modification_type}"}, + {"role": "user", "content": state["user_query"]}, + ] + + def _execute_tool_loop( + self, + *, + state: DashboardChatRuntimeState, + messages: list[dict[str, Any]], + max_turns: int, + initial_embedding_cache: dict[str, list[float]] | None = None, + ) -> dict[str, Any]: + """Execute the prototype's iterative tool loop.""" + execution_context: dict[str, Any] = { + "distinct_cache": set(state.get("session_distinct_cache") or set()), + "embedding_cache": dict(initial_embedding_cache or {}), + "schema_cache": dict(state.get("session_schema_cache") or {}), + "retrieved_documents": [], + "retrieved_document_ids": set(), + "tool_calls": [], + "warnings": list(state.get("warnings", [])), + "warehouse_tools": None, + "last_sql": None, + "last_sql_results": None, + "last_sql_validation": None, + } + self._seed_distinct_cache_from_previous_sql(state, execution_context) intent_decision = state["intent_decision"] - if ( - not intent_decision.force_sql_path - and intent_decision.intent != DashboardChatIntent.DATA_QUERY - ): - state["schema_snippets"] = {} - state["schema_prompt"] = "" - return state - candidate_tables = state["allowlist"].prioritized_tables( - limit=self.runtime_config.max_schema_tables, + for turn_index in range(max_turns): + tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" + ai_message = self.llm_client.run_tool_loop_turn( + messages=messages, + tools=self.TOOL_SPECIFICATIONS, + tool_choice=tool_choice, + operation=f"tool_loop_{intent_decision.intent.value}", + ) + tool_calls = ai_message.get("tool_calls") or [] + assistant_record: dict[str, Any] = { + "role": "assistant", + "content": ai_message.get("content", "") or "", + } + if tool_calls: + assistant_record["tool_calls"] = [ + { + "id": tool_call.get("id"), + "type": "function", + "function": { + "name": tool_call.get("name"), + "arguments": ( + tool_call.get("args") + if isinstance(tool_call.get("args"), str) + else json.dumps(tool_call.get("args") or {}) + ), + }, + } + for tool_call in tool_calls + ] + messages.append(assistant_record) + + if not tool_calls: + return self._build_execution_result( + answer_text=( + (ai_message.get("content") or "").strip() + or self._fallback_answer_text( + execution_context["retrieved_documents"], + execution_context["last_sql_results"], + ) + ), + execution_context=execution_context, + max_turns_reached=False, + ) + + for tool_call in tool_calls: + raw_args = tool_call.get("args") or {} + args = raw_args + if isinstance(raw_args, str): + try: + args = json.loads(raw_args) + except json.JSONDecodeError: + args = {} + result = self._execute_tool( + tool_name=str(tool_call.get("name") or ""), + args=args, + state=state, + execution_context=execution_context, + ) + execution_context["tool_calls"].append( + self._summarize_tool_call( + tool_name=str(tool_call.get("name") or ""), + args=args, + result=result, + ) + ) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.get("id"), + "content": json.dumps( + self._serialize_tool_result(result), + cls=DjangoJSONEncoder, + ), + } + ) + if str(tool_call.get("name") or "") == "run_sql_query" and result.get("success"): + return self._build_execution_result( + answer_text=( + result.get("data_preview") + or self._fallback_answer_text( + execution_context["retrieved_documents"], + execution_context["last_sql_results"], + ) + ), + execution_context=execution_context, + max_turns_reached=False, + ) + + return self._build_execution_result( + answer_text=self._max_turns_message( + state["user_query"], + execution_context["retrieved_documents"], + ), + execution_context=execution_context, + max_turns_reached=True, ) - if not candidate_tables: - state["schema_snippets"] = {} - state["schema_prompt"] = "" - state["warnings"] = state.get("warnings", []) + [ - "No dashboard tables were available for schema inspection.", - ] - return state + def _execute_tool( + self, + *, + tool_name: str, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Execute one prototype tool against the Dalgo runtime primitives.""" try: - warehouse_tools = self.warehouse_tools_factory(state["org"]) - schema_snippets = warehouse_tools.get_schema_snippets(candidate_tables) + if tool_name == "retrieve_docs": + return self._tool_retrieve_docs(args, state, execution_context) + if tool_name == "get_schema_snippets": + return self._tool_get_schema_snippets(args, state, execution_context) + if tool_name == "search_dbt_models": + return self._tool_search_dbt_models(args, state, execution_context) + if tool_name == "get_dbt_model_info": + return self._tool_get_dbt_model_info(args, state, execution_context) + if tool_name == "get_distinct_values": + return self._tool_get_distinct_values(args, state, execution_context) + if tool_name == "run_sql_query": + return self._run_sql_with_distinct_guard(args, state, execution_context) + if tool_name == "list_tables_by_keyword": + return self._tool_list_tables_by_keyword(args, state, execution_context) + if tool_name == "check_table_row_count": + return self._tool_check_table_row_count(args, state, execution_context) + return {"error": f"Unknown tool: {tool_name}"} except DashboardChatWarehouseToolsError as error: - state["schema_snippets"] = {} - state["schema_prompt"] = "" - state["warnings"] = state.get("warnings", []) + [str(error)] - return state + logger.warning("Dashboard chat tool %s failed: %s", tool_name, error) + execution_context["warnings"].append(str(error)) + return {"error": str(error)} except Exception as error: - state["schema_snippets"] = {} - state["schema_prompt"] = "" - state["warnings"] = state.get("warnings", []) + [str(error)] - return state + logger.exception("Dashboard chat tool %s failed", tool_name) + execution_context["warnings"].append(str(error)) + return {"error": str(error)} - state["schema_snippets"] = schema_snippets - state["schema_prompt"] = "\n\n".join( - snippet.to_prompt_text() for snippet in schema_snippets.values() - ) - return state + def _tool_retrieve_docs( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Retrieve current-dashboard, org, and dbt context using the prototype tool contract.""" + query = str(args.get("query") or state["user_query"]).strip() + limit = max(1, min(int(args.get("limit", 8)), 20)) + requested_types = [ + str(doc_type) + for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) + ] + retrieved_documents: list[DashboardChatRetrievedDocument] = [] + + if "chart" in requested_types: + retrieved_documents.extend( + self._query_vector_store( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_EXPORT.value] + ), + dashboard_id=state["dashboard_id"], + query_embedding=self._embed_query(query, execution_context["embedding_cache"]), + ) + ) + if "context" in requested_types: + retrieved_documents.extend( + self._query_vector_store( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_CONTEXT.value] + ), + dashboard_id=state["dashboard_id"], + query_embedding=self._embed_query(query, execution_context["embedding_cache"]), + ) + ) + retrieved_documents.extend( + self._query_vector_store( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.ORG_CONTEXT.value] + ), + query_embedding=self._embed_query(query, execution_context["embedding_cache"]), + ) + ) + if "dataset" in requested_types or "dbt_model" in requested_types: + dbt_results = self._query_vector_store( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [ + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + ] + ), + query_embedding=self._embed_query(query, execution_context["embedding_cache"]), + ) + retrieved_documents.extend( + self._filter_allowlisted_dbt_results(dbt_results, state["allowlist"]) + ) - def _node_plan_query(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Produce the structured execution plan.""" - intent_decision = state["intent_decision"] + merged_results = self._dedupe_retrieved_documents(retrieved_documents)[:limit] + for document in merged_results: + if document.document_id in execution_context["retrieved_document_ids"]: + continue + execution_context["retrieved_document_ids"].add(document.document_id) + execution_context["retrieved_documents"].append(document) + + docs = [ + self._tool_document_payload( + document, + state["allowlist"], + state["dashboard_export"], + ) + for document in merged_results + ] + return {"docs": docs, "count": len(docs)} - if ( - intent_decision.intent == DashboardChatIntent.CONTEXT_QUERY - and not intent_decision.force_sql_path - ): - state["query_plan"] = DashboardChatQueryPlan( - mode=DashboardChatPlanMode.CONTEXT, - reason=intent_decision.reason, + def _tool_get_schema_snippets( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Return schema snippets for allowlisted tables only.""" + requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] + allowed_tables = [ + table_name + for table_name in requested_tables + if state["allowlist"].is_allowed(table_name) + ] + filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) + schema_cache = self._schema_cache( + state, + execution_context, + tables=allowed_tables, + ) + tables_payload = [ + {"table": table_name, "columns": snippet.columns} + for table_name, snippet in schema_cache.items() + if table_name in allowed_tables + ] + response: dict[str, Any] = {"tables": tables_payload} + if filtered_tables: + response["filtered_tables"] = filtered_tables + response["filter_note"] = ( + f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." ) - return state + return response - if intent_decision.intent in { - DashboardChatIntent.SMALL_TALK, - DashboardChatIntent.IRRELEVANT, - DashboardChatIntent.NEEDS_CLARIFICATION, - }: - state["query_plan"] = DashboardChatQueryPlan( - mode=DashboardChatPlanMode.CLARIFY, - reason=intent_decision.reason, - clarification_question=intent_decision.clarification_question, + def _tool_search_dbt_models( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Search allowlisted dbt nodes by name, description, and column metadata.""" + query = str(args.get("query") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 8)), 20)) + if not query: + return {"models": [], "count": 0} + + results: list[dict[str, Any]] = [] + for node in self._dbt_resources_by_unique_id(state).values(): + table_name = node.get("table") + haystacks = [ + str(node.get("name") or ""), + str(node.get("description") or ""), + str(table_name or ""), + ] + for column in node.get("columns") or []: + haystacks.append(str(column.get("name") or "")) + haystacks.append(str(column.get("description") or "")) + if query not in " ".join(haystacks).lower(): + continue + results.append( + { + "name": str(node.get("name") or ""), + "schema": str(node.get("schema") or ""), + "database": str(node.get("database") or ""), + "description": str(node.get("description") or ""), + "columns": [ + str(column.get("name") or "") + for column in (node.get("columns") or []) + ][:20], + "table": table_name, + } ) - return state + if len(results) >= limit: + break + + return {"models": results, "count": len(results)} + + def _tool_get_dbt_model_info( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Return one dbt model's description, columns, and lineage.""" + model_name = str(args.get("model_name") or "").strip().lower() + if not model_name: + return {"error": "model_name is required"} + + matched_unique_id: str | None = None + matched_node: dict[str, Any] | None = None + for unique_id, node in self._dbt_resources_by_unique_id(state).items(): + table_name = node.get("table") + candidates = { + str(node.get("name") or "").lower(), + str(table_name or "").lower(), + } + if model_name not in candidates: + continue + matched_unique_id = unique_id + matched_node = node + break + + if matched_unique_id is None or matched_node is None: + return {"error": f"Model not found: {model_name}"} + + return { + "model": str(matched_node.get("name") or ""), + "schema": str(matched_node.get("schema") or ""), + "database": str(matched_node.get("database") or ""), + "description": str(matched_node.get("description") or ""), + "columns": list(matched_node.get("columns") or [])[:50], + "upstream": list(matched_node.get("upstream") or []), + "downstream": list(matched_node.get("downstream") or []), + } - allowlisted_tables = state["allowlist"].prioritized_tables() - if not allowlisted_tables and intent_decision.force_sql_path: - state["query_plan"] = DashboardChatQueryPlan( - mode=DashboardChatPlanMode.CLARIFY, - reason="Current dashboard does not expose any allowlisted tables for SQL.", - clarification_question=( - "I can explain this dashboard, but it does not expose a data source I can query safely." + def _tool_get_distinct_values( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Return distinct values and persist validated filter values for the session.""" + table_name = str(args.get("table") or "").lower() + column_name = str(args.get("column") or "") + limit = max(1, min(int(args.get("limit", 50)), 200)) + if not state["allowlist"].is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": ( + f"Table {table_name} is not accessible in the current dashboard context." ), - ) - return state + } - query_plan = self.llm_client.plan_query( - user_query=state["user_query"], - conversation_history=state["conversation_history"], - dashboard_summary=state["dashboard_summary"], - retrieved_documents=state.get("retrieved_documents", []), - schema_prompt=state.get("schema_prompt", ""), - allowlisted_tables=allowlisted_tables, - ) - query_plan = self._normalize_query_plan( - query_plan=query_plan, - allowlist=state["allowlist"], - default_tables=allowlisted_tables, + schema_cache = self._schema_cache(state, execution_context) + snippet = schema_cache.get(table_name) + normalized_column_name = column_name.lower() + if snippet is not None and normalized_column_name not in { + str(column.get("name") or "").lower() for column in snippet.columns + }: + candidates = self._find_tables_with_column(normalized_column_name, schema_cache) + return { + "error": "column_not_in_table", + "table": table_name, + "column": column_name, + "candidates": candidates, + "message": ( + f"Column {column_name} is not available on {table_name}. " + "Use a table that contains it, inspect that schema, and retry the lookup." + ), + } + + values = self._warehouse_tools(execution_context, state["org"]).get_distinct_values( + table_name=table_name, + column_name=column_name, + limit=limit, ) - state["query_plan"] = query_plan - return state + self._record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=table_name, + column_name=column_name, + values=values, + ) + return { + "table": table_name, + "column": column_name, + "values": values, + "count": len(values), + } - def _node_lookup_distinct_values( + def _tool_list_tables_by_keyword( self, + args: dict[str, Any], state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Fetch distinct values for requested text filters.""" - distinct_values: dict[str, list[str]] = {} - query_plan = state["query_plan"] - if not query_plan.text_filters: - state["distinct_values"] = distinct_values - return state - - try: - warehouse_tools = self.warehouse_tools_factory(state["org"]) - except DashboardChatWarehouseToolsError as error: - state["warnings"] = state.get("warnings", []) + [str(error)] - state["distinct_values"] = distinct_values - return state + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Search allowlisted tables by table name or column name.""" + keyword = str(args.get("keyword") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 15)), 50)) + if not keyword: + return {"tables": []} + + allowlist_tables_source = state["allowlist"].prioritized_tables() or sorted( + state["allowlist"].allowed_tables + ) + allowlisted_tables = list( + dict.fromkeys(table_name.lower() for table_name in allowlist_tables_source) + ) + direct_match_tables = [ + table_name + for table_name in allowlisted_tables + if keyword in table_name or keyword in table_name.rsplit(".", 1)[-1] + ] - available_tables = set(state.get("schema_snippets", {}).keys()) - for text_filter in query_plan.text_filters: - table_name = text_filter.table_name.lower() - if not state["allowlist"].is_allowed(table_name) or table_name not in available_tables: - continue - distinct_key = f"{table_name}.{text_filter.column_name}" + schema_cache: dict[str, Any] = {} + lookup_tables = direct_match_tables or allowlisted_tables + if lookup_tables: try: - distinct_values[distinct_key] = warehouse_tools.get_distinct_values( - table_name=table_name, - column_name=text_filter.column_name, - limit=self.runtime_config.max_distinct_values, + schema_cache = self._schema_cache( + state, + execution_context, + tables=lookup_tables, ) - except DashboardChatWarehouseToolsError as error: - state["warnings"] = state.get("warnings", []) + [str(error)] - distinct_values[distinct_key] = [] except Exception as error: - state["warnings"] = state.get("warnings", []) + [str(error)] - distinct_values[distinct_key] = [] + logger.warning("Dashboard chat keyword table lookup fell back to names only: %s", error) + execution_context["warnings"].append(str(error)) - state["distinct_values"] = distinct_values - return state + matches: list[dict[str, Any]] = [] + seen_tables: set[str] = set() - def _node_generate_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Generate SQL from the structured plan.""" - query_plan = state["query_plan"] - if query_plan.mode != DashboardChatPlanMode.SQL: - state["sql_draft"] = None - return state + for table_name in direct_match_tables: + column_names = [ + str(column.get("name") or "") + for column in getattr(schema_cache.get(table_name), "columns", []) + ] + matches.append({"table": table_name, "columns": column_names[:40]}) + seen_tables.add(table_name) + if len(matches) >= limit: + break - sql_draft = self.llm_client.generate_sql( - user_query=state["user_query"], - dashboard_summary=state["dashboard_summary"], - query_plan=query_plan, - schema_prompt=self._schema_prompt_for_plan( - state.get("schema_snippets", {}), - query_plan, + for table_name, snippet in schema_cache.items(): + if table_name in seen_tables: + continue + column_names = [str(column.get("name") or "") for column in snippet.columns] + if not any(keyword in column_name.lower() for column_name in column_names): + continue + matches.append({"table": table_name, "columns": column_names[:40]}) + if len(matches) >= limit: + break + + if matches: + return { + "tables": matches, + "hint": ( + f"Found {len(matches)} allowlisted tables. Check schema before assuming table structure." + ), + } + return { + "tables": [], + "hint": ( + f"No allowlisted tables matched '{keyword}'. Try a broader keyword or retrieve chart docs first." ), - distinct_values=state.get("distinct_values", {}), - allowlisted_tables=state["allowlist"].prioritized_tables(), + } + + def _tool_check_table_row_count( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Count rows in one allowlisted table.""" + table_name = str(args.get("table") or "").lower() + if not state["allowlist"].is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": ( + f"Table {table_name} is not accessible in the current dashboard context." + ), + } + + sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" + validation = DashboardChatSqlGuard( + allowlist=state["allowlist"], + max_rows=1, + ).validate(sql) + if not validation.is_valid or not validation.sanitized_sql: + return {"error": "sql_validation_failed", "issues": validation.errors} + + rows = self._warehouse_tools(execution_context, state["org"]).execute_sql( + validation.sanitized_sql ) - state["sql_draft"] = sql_draft - return state + row_count = 0 + if rows: + row_count = int(rows[0].get("row_count") or 0) + return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} - def _node_validate_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Run SQL guard validation.""" - sql_draft = state.get("sql_draft") - if sql_draft is None or not sql_draft.sql: - state["sql_validation"] = None - return state + def _run_sql_with_distinct_guard( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any]: + """Validate SQL like the prototype and let the tool loop self-correct on failures.""" + sql = str(args.get("sql") or "").strip() + if not sql: + return {"error": "sql_missing", "message": "SQL is required"} + + allowlist_validation = self._validate_sql_allowlist(sql, state["allowlist"]) + if not allowlist_validation["valid"]: + return { + "error": "table_not_allowed", + "invalid_tables": allowlist_validation["invalid_tables"], + "message": allowlist_validation["message"], + } + + follow_up_dimension_validation = self._validate_follow_up_dimension_usage( + sql=sql, + state=state, + execution_context=execution_context, + ) + if follow_up_dimension_validation is not None: + return follow_up_dimension_validation + missing_distinct = self._missing_distinct(sql, state, execution_context) + if missing_distinct: + return { + "error": "must_fetch_distinct_values", + "missing": missing_distinct, + "message": ( + "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." + ), + } validation = DashboardChatSqlGuard( allowlist=state["allowlist"], max_rows=self.runtime_config.max_query_rows, - ).validate(sql_draft.sql) - state["sql_validation"] = validation - return state - - def _node_execute_sql(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Execute the validated SQL.""" - sql_validation = state["sql_validation"] - if sql_validation is None or not sql_validation.sanitized_sql: - state["sql_results"] = None - return state + ).validate(sql) + execution_context["last_sql_validation"] = validation + if not validation.is_valid or not validation.sanitized_sql: + return { + "error": "sql_validation_failed", + "issues": validation.errors, + "warnings": validation.warnings, + } + + missing_columns = self._missing_columns_in_primary_table( + sql=validation.sanitized_sql, + state=state, + execution_context=execution_context, + ) + if missing_columns is not None: + return missing_columns + execution_context["last_sql"] = validation.sanitized_sql try: - warehouse_tools = self.warehouse_tools_factory(state["org"]) - state["sql_results"] = warehouse_tools.execute_sql(sql_validation.sanitized_sql) - except DashboardChatWarehouseToolsError as error: - state["warnings"] = state.get("warnings", []) + [str(error)] - state["sql_results"] = None - return state + rows = self._warehouse_tools(execution_context, state["org"]).execute_sql( + validation.sanitized_sql + ) + except Exception as error: + structured_error = self._structured_sql_execution_error( + sql=validation.sanitized_sql, + error=error, + state=state, + execution_context=execution_context, + ) + if structured_error is not None: + return structured_error + return { + "success": False, + "error": str(error), + "sql_used": validation.sanitized_sql, + } + + serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) + execution_context["last_sql_results"] = serialized_rows + self._record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=validation.sanitized_sql, + ) + return { + "success": True, + "row_count": len(serialized_rows), + "data_preview": self._preview_sql_rows(serialized_rows), + "error": None, + "sql_used": validation.sanitized_sql, + "columns": list(serialized_rows[0].keys()) if serialized_rows else [], + "rows": serialized_rows, + } - def _node_compose_answer(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Assemble the answer text across simple, context, and SQL paths.""" - intent_decision = state["intent_decision"] - query_plan = state.get("query_plan") - sql_draft = state.get("sql_draft") - sql_validation = state.get("sql_validation") + def _missing_columns_in_primary_table( + self, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any] | None: + """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" + table_references = self._table_references(sql) + referenced_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") + ] + if not referenced_tables: + return None - warnings = list(dict.fromkeys(state.get("warnings", []))) - if sql_draft is not None: - warnings.extend(warning for warning in sql_draft.warnings if warning not in warnings) - if sql_validation is not None: - warnings.extend( - warning for warning in sql_validation.warnings if warning not in warnings + schema_cache = self._schema_cache(state, execution_context, tables=referenced_tables) + all_schema_cache = self._schema_cache(state, execution_context) + missing_columns_by_table: dict[str, set[str]] = {} + candidate_tables_by_column: dict[str, list[str]] = {} + tables_in_query = list(dict.fromkeys(referenced_tables)) + + for qualifier, column_name in self._referenced_sql_identifier_refs(sql): + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=column_name, + table_references=table_references, + schema_cache=schema_cache, ) + if resolved_table is not None: + continue - if intent_decision.intent == DashboardChatIntent.SMALL_TALK: - answer_text = "I can help explain this dashboard or answer questions about the data behind its charts." - elif intent_decision.intent == DashboardChatIntent.IRRELEVANT: - answer_text = "I can help with questions about this dashboard, its charts, and the data behind them." - elif intent_decision.intent == DashboardChatIntent.NEEDS_CLARIFICATION: - answer_text = ( - intent_decision.clarification_question - or "Could you be more specific about the metric, program, or time period you want?" - ) - elif query_plan and query_plan.mode == DashboardChatPlanMode.CLARIFY: - answer_text = query_plan.clarification_question or ( - sql_draft.clarification_question - if sql_draft is not None and sql_draft.clarification_question - else "I need a bit more detail before I can answer that safely." - ) - elif sql_draft is not None and not sql_draft.sql and sql_draft.clarification_question: - answer_text = sql_draft.clarification_question - elif sql_validation is not None and not sql_validation.is_valid: - answer_text = "I couldn't answer that safely from this dashboard context." - if sql_validation.errors: - answer_text += f" {sql_validation.errors[0]}" - else: - try: - answer_text = self.llm_client.compose_answer( - user_query=state["user_query"], - dashboard_summary=state["dashboard_summary"], - retrieved_documents=state.get("retrieved_documents", []), - sql=sql_validation.sanitized_sql if sql_validation else None, - sql_results=state.get("sql_results"), - warnings=warnings, - related_dashboard_titles=[ - related_dashboard.title - for related_dashboard in state.get("related_dashboards", []) - ], + if qualifier is not None: + target_table = ( + self._resolve_table_qualifier(qualifier, table_references) + or self._primary_table_name(sql) + or tables_in_query[0] ) - except AssertionError: - raise - except Exception: - answer_text = self._fallback_answer_text( - retrieved_documents=state.get("retrieved_documents", []), - sql_results=state.get("sql_results"), + else: + matching_tables = self._tables_with_column( + column_name, + tables_in_query, + schema_cache, ) + if len(matching_tables) > 1: + continue + target_table = self._primary_table_name(sql) or tables_in_query[0] + + missing_columns_by_table.setdefault(target_table, set()).add(column_name) + candidate_tables_by_column[column_name] = self._find_tables_with_column( + column_name, + all_schema_cache, + ) - state["response"] = DashboardChatResponse( - answer_text=answer_text.strip(), - intent=intent_decision.intent, - citations=state.get("citations", []), - related_dashboards=state.get("related_dashboards", []), - warnings=warnings, - sql=sql_validation.sanitized_sql if sql_validation else None, - sql_results=state.get("sql_results"), - metadata={}, + missing_columns = sorted( + { + column_name + for columns in missing_columns_by_table.values() + for column_name in columns + } ) - return state + if not missing_columns: + return None - def _node_finalize_response( - self, state: DashboardChatRuntimeState - ) -> DashboardChatRuntimeState: - """Attach metadata and table citations to the final response.""" - response = state["response"] - citations = list(response.citations) - sql_validation = state.get("sql_validation") - if ( + primary_table = self._primary_table_name(sql) or tables_in_query[0] + target_table = ( + next(iter(missing_columns_by_table)) + if len(missing_columns_by_table) == 1 + else primary_table + ) + best_table = self._best_table_for_missing_columns( + missing_columns, + all_schema_cache, + ) + message = ( + f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " + "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." + ) + if best_table: + message += f" Best candidate table: {best_table}." + result = { + "error": "column_not_in_table", + "table": target_table, + "missing_columns": missing_columns, + "candidate_tables": candidate_tables_by_column, + "best_table": best_table, + "message": message, + } + if len(missing_columns) == 1: + column_name = missing_columns[0] + result["column"] = column_name + result["candidates"] = candidate_tables_by_column.get(column_name, []) + return result + + def _structured_sql_execution_error( + self, + *, + sql: str, + error: Exception, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any] | None: + """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" + error_text = str(error) + missing_column_match = re.search( + r'column "(?:[\w]+\.)?([^"]+)" does not exist', + error_text, + flags=re.IGNORECASE, + ) + if missing_column_match: + missing_column = missing_column_match.group(1).lower() + schema_cache = self._schema_cache(state, execution_context) + candidate_tables = self._find_tables_with_column(missing_column, schema_cache) + return { + "error": "column_not_in_table", + "table": self._primary_table_name(sql), + "column": missing_column, + "missing_columns": [missing_column], + "candidates": candidate_tables, + "candidate_tables": {missing_column: candidate_tables}, + "best_table": candidate_tables[0] if candidate_tables else None, + "message": ( + f"Column {missing_column} is not available on the current table. " + "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." + ), + "sql_used": sql, + } + return None + + def _validate_follow_up_dimension_usage( + self, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> dict[str, Any] | None: + """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" + intent_decision = state["intent_decision"] + if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: + return None + if intent_decision.follow_up_context.follow_up_type != "add_dimension": + return None + + requested_dimension = self._extract_requested_follow_up_dimension( + intent_decision.follow_up_context.modification_instruction or state["user_query"] + ) + if not requested_dimension: + return None + + previous_sql = state["conversation_context"].last_sql_query or "" + current_dimensions = self._structural_dimensions_from_sql(sql) + previous_dimensions = self._structural_dimensions_from_sql(previous_sql) + normalized_requested_dimension = self._normalize_dimension_name(requested_dimension) + if ( + normalized_requested_dimension in current_dimensions + and normalized_requested_dimension not in previous_dimensions + ): + return None + + candidate_tables = self._find_tables_with_column( + requested_dimension, + self._schema_cache(state, execution_context), + ) + return { + "error": "requested_dimension_missing", + "requested_dimension": requested_dimension, + "previous_dimensions": sorted(previous_dimensions), + "current_dimensions": sorted(current_dimensions), + "candidate_tables": candidate_tables, + "message": ( + f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " + "Use the requested dimension exactly, or pick a table that contains it." + ), + } + + def _node_finalize_response( + self, + state: DashboardChatRuntimeState, + ) -> DashboardChatRuntimeState: + """Attach warehouse citations and metadata to the finished response.""" + response = state["response"] + citations = list(response.citations) + sql_validation = state.get("sql_validation") + if ( sql_validation is not None and sql_validation.is_valid and sql_validation.sanitized_sql is not None @@ -564,140 +1360,374 @@ def _node_finalize_response( if table_name ) + allowlist = state.get("allowlist") or DashboardChatAllowlist() state["response"] = DashboardChatResponse( answer_text=response.answer_text, intent=response.intent, citations=list(dict.fromkeys(citations)), - related_dashboards=response.related_dashboards, warnings=response.warnings, sql=response.sql, sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, metadata={ "dashboard_id": state["dashboard_id"], - "query_plan_mode": state.get("query_plan").mode.value - if state.get("query_plan") - else None, - "query_plan_tables": state.get("query_plan").relevant_tables - if state.get("query_plan") - else [], "retrieved_document_ids": [ - document.document_id for document in state.get("retrieved_documents", []) + document.document_id for document in state.get("retrieved_documents") or [] ], - "allowlisted_tables": sorted( - state.get("allowlist", DashboardChatAllowlist()).allowed_tables - ), - "sql_guard_errors": state.get("sql_validation").errors - if state.get("sql_validation") - else [], + "allowlisted_tables": sorted(allowlist.allowed_tables), + "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], + "intent_reason": state["intent_decision"].reason, + "missing_info": state["intent_decision"].missing_info, + "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, }, ) return state - @staticmethod - def _route_after_intent(state: DashboardChatRuntimeState) -> str: - """Route simple intents directly to answer composition.""" - intent = state["intent_decision"].intent - if intent in { - DashboardChatIntent.SMALL_TALK, - DashboardChatIntent.IRRELEVANT, - DashboardChatIntent.NEEDS_CLARIFICATION, - }: - return "compose_answer" - return "build_allowlist" + def _build_execution_result( + self, + *, + answer_text: str, + execution_context: dict[str, Any], + max_turns_reached: bool, + ) -> dict[str, Any]: + """Normalize tool-loop state into one runtime response payload.""" + if max_turns_reached: + execution_context["tool_calls"].append({"name": "max_turns_reached"}) + warnings = list(dict.fromkeys(execution_context["warnings"])) + return { + "answer_text": answer_text.strip(), + "retrieved_documents": execution_context["retrieved_documents"], + "tool_calls": execution_context["tool_calls"], + "sql": execution_context["last_sql"], + "sql_validation": execution_context["last_sql_validation"], + "sql_results": execution_context["last_sql_results"], + "warnings": warnings, + } + + def _warehouse_tools( + self, + execution_context: dict[str, Any], + org: Org, + ) -> DashboardChatWarehouseTools: + """Build the warehouse tool helper lazily for the turn.""" + warehouse_tools = execution_context.get("warehouse_tools") + if warehouse_tools is None: + warehouse_tools = self.warehouse_tools_factory(org) + execution_context["warehouse_tools"] = warehouse_tools + return warehouse_tools + + def _schema_cache( + self, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + tables: Sequence[str] | None = None, + ) -> dict[str, Any]: + """Load and cache schema snippets for allowlisted tables.""" + requested_tables = [ + table_name.lower() + for table_name in ( + tables if tables is not None else state["allowlist"].prioritized_tables() + ) + if state["allowlist"].is_allowed(table_name) + ] + cache = execution_context["schema_cache"] + missing_tables = [table_name for table_name in requested_tables if table_name not in cache] + if missing_tables: + snippets = self._warehouse_tools(execution_context, state["org"]).get_schema_snippets( + missing_tables + ) + for table_name, snippet in snippets.items(): + cache[table_name.lower()] = snippet + if snippets: + self._persist_session_schema_cache(state, cache) + if tables is None: + return cache + return { + table_name: cache[table_name] + for table_name in requested_tables + if table_name in cache + } + + def _seed_distinct_cache_from_previous_sql( + self, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> None: + """Treat text filters from the previous successful SQL as already validated for follow-ups.""" + previous_sql = state["conversation_context"].last_sql_query + if not previous_sql: + return + + self._record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=previous_sql, + ) @staticmethod - def _route_after_plan(state: DashboardChatRuntimeState) -> str: - """Route SQL plans to distinct lookup and all others to answer composition.""" - query_plan = state["query_plan"] - if query_plan.mode == DashboardChatPlanMode.SQL: - return "lookup_distinct_values" - return "compose_answer" + def _dbt_resources_by_unique_id( + state: DashboardChatRuntimeState, + ) -> dict[str, dict[str, Any]]: + """Return the allowlisted dbt index built at session start.""" + dbt_index = state.get("dbt_index") or {} + return dict(dbt_index.get("resources_by_unique_id") or {}) + + def _embed_query( + self, + query_text: str, + embedding_cache: dict[str, list[float]], + ) -> list[float]: + """Cache embeddings per query string during one turn.""" + if query_text not in embedding_cache: + embedding_cache[query_text] = self.vector_store.embed_query(query_text) + return embedding_cache[query_text] @staticmethod - def _route_after_sql_validation(state: DashboardChatRuntimeState) -> str: - """Only execute SQL after it passes validation.""" - sql_validation = state.get("sql_validation") - if sql_validation is not None and sql_validation.is_valid and sql_validation.sanitized_sql: - return "execute_sql" - return "compose_answer" + def _route_after_intent(state: DashboardChatRuntimeState) -> str: + """Route to one explicit handler per prototype intent.""" + return state["intent_decision"].intent.value @staticmethod def _normalize_conversation_history( - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, str]] | None, + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, ) -> list[DashboardChatConversationMessage]: - """Normalize conversation history into typed messages.""" + """Normalize stored history into the typed runtime message format.""" normalized_messages: list[DashboardChatConversationMessage] = [] for item in conversation_history or []: if isinstance(item, DashboardChatConversationMessage): normalized_messages.append(item) - else: - normalized_messages.append( - DashboardChatConversationMessage( - role=str(item.get("role") or "user"), - content=str(item.get("content") or ""), - ) + continue + normalized_messages.append( + DashboardChatConversationMessage( + role=str(item.get("role") or "user"), + content=str(item.get("content") or ""), + payload=item.get("payload") or {}, ) + ) return normalized_messages - @staticmethod - def _heuristic_intent_decision( - user_query: str, + @classmethod + def _extract_conversation_context( + cls, conversation_history: Sequence[DashboardChatConversationMessage], - ) -> DashboardChatIntentDecision | None: - """Fast path for obvious intents before consulting the LLM.""" - normalized_query = user_query.strip().lower() - if not normalized_query: - return DashboardChatIntentDecision( - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - reason="Empty query", - clarification_question="What would you like to know about this dashboard?", - ) + ) -> DashboardChatConversationContext: + """Extract reusable conversation context like the prototype conversation manager.""" + context = DashboardChatConversationContext() + recent_history = list(conversation_history)[-10:] - if normalized_query in SIMPLE_GREETINGS: - return DashboardChatIntentDecision( - intent=DashboardChatIntent.SMALL_TALK, - reason="Greeting or pleasantry", - ) + for message in reversed(recent_history): + if message.role != "assistant": + continue - if DashboardChatRuntime._contains_keyword_phrase(normalized_query, DATA_QUERY_KEYWORDS): - return DashboardChatIntentDecision( - intent=DashboardChatIntent.DATA_QUERY, - reason="Contains data-analysis keywords", - force_sql_path=True, - ) + payload = message.payload or {} + sql = payload.get("sql") + metadata = payload.get("metadata") or {} + citations = payload.get("citations") or [] + chart_ids = cls._extract_chart_ids_from_payload(payload) + + if chart_ids and context.last_sql_query and not context.last_chart_ids: + context = DashboardChatConversationContext( + last_sql_query=context.last_sql_query, + last_tables_used=context.last_tables_used, + last_chart_ids=chart_ids, + last_metrics=context.last_metrics, + last_dimensions=context.last_dimensions, + last_filters=context.last_filters, + last_response_type=context.last_response_type, + last_answer_text=context.last_answer_text, + last_intent=context.last_intent, + ) + break + + if sql and not context.last_sql_query: + tables = [ + str(table_name).lower() + for table_name in metadata.get("query_plan_tables") or [] + if table_name + ] + if not tables: + tables = [ + str(citation.get("table_name")).lower() + for citation in citations + if citation.get("table_name") + ] + if not tables: + tables = DashboardChatSqlGuard._extract_table_names(str(sql)) + context = DashboardChatConversationContext( + last_sql_query=str(sql), + last_tables_used=list(dict.fromkeys(tables)), + last_chart_ids=chart_ids, + last_metrics=cls._extract_metrics_from_sql(str(sql)), + last_dimensions=cls._extract_dimensions_from_sql(str(sql)), + last_filters=cls._extract_filters_from_sql(str(sql)), + last_response_type="sql_result", + last_answer_text=message.content, + last_intent=str(payload.get("intent") or ""), + ) + if chart_ids: + break + continue - if DashboardChatRuntime._contains_keyword_phrase( - normalized_query, - CONTEXT_QUERY_KEYWORDS, + if payload and context.last_response_type is None: + context = DashboardChatConversationContext( + last_chart_ids=chart_ids, + last_response_type="metadata_answer", + last_answer_text=message.content, + last_intent=str(payload.get("intent") or ""), + ) + + return context + + @staticmethod + def _extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: + """Extract chart ids from persisted metadata/citations like the prototype chat history.""" + metadata = payload.get("metadata") or {} + chart_ids = [str(chart_id) for chart_id in metadata.get("chart_ids_used") or [] if chart_id] + if chart_ids: + return list(dict.fromkeys(chart_ids)) + + extracted_chart_ids: list[str] = [] + for citation in payload.get("citations") or []: + source_identifier = str(citation.get("source_identifier") or "") + chart_id = DashboardChatRuntime._chart_id_from_source_identifier(source_identifier) + if chart_id is not None: + extracted_chart_ids.append(str(chart_id)) + return list(dict.fromkeys(extracted_chart_ids)) + + @classmethod + def _build_follow_up_context_prompt( + cls, + conversation_context: DashboardChatConversationContext, + user_query: str, + ) -> str: + """Build the prototype follow-up context prompt.""" + return "\n".join( + [ + "PREVIOUS QUERY CONTEXT:", + f"Last SQL: {conversation_context.last_sql_query or 'None'}", + f"Tables used: {', '.join(conversation_context.last_tables_used) or 'None'}", + f"Metrics: {', '.join(conversation_context.last_metrics) or 'None'}", + f"Dimensions: {', '.join(conversation_context.last_dimensions) or 'None'}", + f"Filters: {', '.join(conversation_context.last_filters) or 'None'}", + "", + f"NEW INSTRUCTION: {user_query}", + "", + "TASK: Modify the previous query based on the new instruction. Reuse tables and context where possible.", + ] + ) + + @staticmethod + def _detect_sql_modification_type(user_query: str) -> str: + """Detect the same coarse follow-up modification categories as the prototype.""" + lowered_query = user_query.lower() + if any(keyword in lowered_query for keyword in ["by", "split by", "break down", "group by"]): + return "add_dimension" + if any(keyword in lowered_query for keyword in ["filter", "only", "exclude", "where"]): + return "add_filter" + if any( + keyword in lowered_query + for keyword in ["last", "this", "previous", "next", "monthly", "weekly", "quarterly"] ): - return DashboardChatIntentDecision( - intent=DashboardChatIntent.CONTEXT_QUERY, - reason="Contains definition or explanation keywords", - ) + return "modify_timeframe" + if any( + keyword in lowered_query + for keyword in ["total", "sum", "count", "average", "avg", "maximum", "minimum"] + ): + return "change_aggregation" + return "general_modification" - if len(normalized_query.split()) <= 2 and conversation_history: - return DashboardChatIntentDecision( - intent=DashboardChatIntent.DATA_QUERY, - reason="Short follow-up treated as a data refinement", - force_sql_path=True, - ) + @staticmethod + def _extract_requested_follow_up_dimension(text: str) -> str | None: + """Extract the requested follow-up dimension and normalize natural-language spaces.""" + normalized_text = text.strip().lower() + patterns = [ + r"split\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"break\s+down\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"group\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"\bby\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + ] + for pattern in patterns: + match = re.search(pattern, normalized_text) + if not match: + continue + candidate = re.split( + r"\b(with|for|in|across|between)\b", + match.group(1), + maxsplit=1, + )[0] + candidate = re.sub(r"[^a-zA-Z0-9_\s]", " ", candidate) + normalized_candidate = "_".join(part for part in candidate.split() if part) + if normalized_candidate: + return normalized_candidate return None @staticmethod - def _contains_keyword_phrase(normalized_query: str, keywords: set[str]) -> bool: - """Match keywords on word boundaries to avoid substring false positives.""" - return any( - re.search(rf"\b{re.escape(keyword)}\b", normalized_query) for keyword in keywords + def _extract_metrics_from_sql(sql: str) -> list[str]: + """Extract aggregate expressions from the previous SQL for follow-up prompts.""" + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return [] + metrics: list[str] = [] + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + normalized_expression = expression.strip() + if normalized_expression and DashboardChatSqlGuard._contains_aggregate( + normalized_expression + ): + metrics.append(normalized_expression) + return metrics[:5] + + @staticmethod + def _extract_dimensions_from_sql(sql: str) -> list[str]: + """Extract GROUP BY dimensions from the previous SQL.""" + match = re.search( + r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not match: + return [] + return [ + dimension.strip().strip('`"') + for dimension in match.group(1).split(",") + if dimension.strip() + ][:5] + + @staticmethod + def _extract_filters_from_sql(sql: str) -> list[str]: + """Extract WHERE-clause filters from the previous SQL.""" + match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, ) + if not match: + return [] + + where_clause = match.group(1).strip() + filters: list[str] = [] + for pattern in [ + r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", + r"([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\([^)]+\)", + ]: + for filter_match in re.findall(pattern, where_clause, flags=re.IGNORECASE): + if isinstance(filter_match, tuple) and len(filter_match) == 2: + filters.append(f"{filter_match[0]} = {filter_match[1]}") + else: + filters.append(str(filter_match)) + return filters[:5] def _query_vector_store( self, + *, org: Org, + collection_name: str | None, query_text: str, source_types: Sequence[str], dashboard_id: int | None = None, + query_embedding: list[float] | None = None, ) -> list[DashboardChatRetrievedDocument]: - """Run a vector query and map it into runtime documents.""" + """Query Chroma and normalize the results.""" if not source_types: return [] @@ -707,6 +1737,8 @@ def _query_vector_store( n_results=self.runtime_config.retrieval_limit, source_types=list(source_types), dashboard_id=dashboard_id, + query_embedding=query_embedding, + collection_name=collection_name, ) return [ DashboardChatRetrievedDocument( @@ -736,21 +1768,13 @@ def _filter_allowlisted_dbt_results( return filtered_results @staticmethod - def _merge_retrieval_results( - dashboard_results: Sequence[DashboardChatRetrievedDocument], - org_results: Sequence[DashboardChatRetrievedDocument], - dbt_results: Sequence[DashboardChatRetrievedDocument], + def _dedupe_retrieved_documents( + results: Sequence[DashboardChatRetrievedDocument], ) -> list[DashboardChatRetrievedDocument]: - """Prioritize current-dashboard docs, then org docs, then dbt docs.""" - scored_results: list[tuple[tuple[int, float], DashboardChatRetrievedDocument]] = [] - for priority, result_group in enumerate([dashboard_results, org_results, dbt_results]): - for result in result_group: - scored_results.append( - ( - (priority, result.distance if result.distance is not None else 999.0), - result, - ) - ) + """Deduplicate retrieved documents while preserving better-ranked items.""" + scored_results: list[tuple[float, DashboardChatRetrievedDocument]] = [] + for result in results: + scored_results.append((result.distance if result.distance is not None else 999.0, result)) merged_results: list[DashboardChatRetrievedDocument] = [] seen_document_ids: set[str] = set() @@ -761,77 +1785,20 @@ def _merge_retrieval_results( seen_document_ids.add(result.document_id) return merged_results - def _build_related_dashboards( - self, - org: Org, - current_dashboard_id: int, - query_text: str, - ) -> list[DashboardChatRelatedDashboard]: - """Suggest other dashboards with matching retrieved context.""" - related_dashboard_source_types = self.source_config.filter_enabled( - [ - DashboardChatSourceType.DASHBOARD_CONTEXT.value, - DashboardChatSourceType.DASHBOARD_EXPORT.value, - ] - ) - if not related_dashboard_source_types: - return [] - - related_results = self.vector_store.query( - org.id, - query_text=query_text, - n_results=self.runtime_config.related_dashboard_limit * 4, - source_types=related_dashboard_source_types, - ) - candidate_dashboard_ids = [ - result.metadata.get("dashboard_id") - for result in related_results - if result.metadata.get("dashboard_id") - and result.metadata.get("dashboard_id") != current_dashboard_id - ] - if not candidate_dashboard_ids: - return [] - - dashboard_titles = { - dashboard.id: dashboard.title - for dashboard in Dashboard.objects.filter(org=org, id__in=set(candidate_dashboard_ids)) - } - suggestions: list[DashboardChatRelatedDashboard] = [] - seen_dashboard_ids: set[int] = set() - for result in related_results: - dashboard_id = result.metadata.get("dashboard_id") - if ( - dashboard_id in seen_dashboard_ids - or dashboard_id == current_dashboard_id - or dashboard_id not in dashboard_titles - ): - continue - suggestions.append( - DashboardChatRelatedDashboard( - dashboard_id=int(dashboard_id), - title=dashboard_titles[dashboard_id], - reason=self._compact_snippet(result.content), - ) - ) - seen_dashboard_ids.add(int(dashboard_id)) - if len(suggestions) >= self.runtime_config.related_dashboard_limit: - break - return suggestions - def _build_citations( self, + *, retrieved_documents: Sequence[DashboardChatRetrievedDocument], dashboard_export: dict[str, Any], allowlist: DashboardChatAllowlist, ) -> list[DashboardChatCitation]: - """Build structured citations from retrieved documents.""" + """Build citations from the retrieved tool-loop documents.""" dashboard_title = dashboard_export["dashboard"].get("title") or "Current dashboard" chart_lookup = { chart.get("id"): chart.get("title") or f"Chart {chart.get('id')}" for chart in dashboard_export.get("charts") or [] } citations: list[DashboardChatCitation] = [] - for document in retrieved_documents[:6]: chart_id = self._chart_id_from_source_identifier(document.source_identifier) table_name = None @@ -841,27 +1808,26 @@ def _build_citations( }: unique_id = self._unique_id_from_source_identifier(document.source_identifier) table_name = allowlist.unique_id_to_table.get(unique_id) if unique_id else None - title = self._citation_title( - document=document, - dashboard_title=dashboard_title, - chart_lookup=chart_lookup, - table_name=table_name, - ) citations.append( DashboardChatCitation( source_type=document.source_type, source_identifier=document.source_identifier, - title=title, + title=self._citation_title( + document=document, + dashboard_title=dashboard_title, + chart_lookup=chart_lookup, + table_name=table_name, + ), snippet=self._compact_snippet(document.content), dashboard_id=document.dashboard_id, table_name=table_name, ) ) - return citations @staticmethod def _citation_title( + *, document: DashboardChatRetrievedDocument, dashboard_title: str, chart_lookup: dict[int, str], @@ -885,99 +1851,966 @@ def _citation_title( return f"dbt catalog: {table_name or document.source_identifier}" return document.source_identifier + def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: + """Return the current session's frozen dashboard context snapshot.""" + session_id = state.get("session_id") + if not session_id: + return self._build_session_snapshot(state) + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is not None: + cached_dbt_index = cached_snapshot.get("dbt_index") + if cached_dbt_index is None and cached_snapshot.get("manifest_json") is not None: + cached_dbt_index = DashboardChatAllowlistBuilder.build_dbt_index( + cached_snapshot.get("manifest_json"), + deserialize_allowlist(cached_snapshot.get("allowlist")), + ) + return { + "dashboard_export": dict(cached_snapshot["dashboard_export"]), + "dbt_index": cached_dbt_index or {"resources_by_unique_id": {}}, + "allowlist": deserialize_allowlist(cached_snapshot.get("allowlist")), + "schema_cache": deserialize_schema_snippets(cached_snapshot.get("schema_cache")), + "distinct_cache": deserialize_distinct_cache( + cached_snapshot.get("distinct_cache") + ), + } + + snapshot = self._build_session_snapshot(state) + cache.set( + cache_key, + { + "dashboard_export": snapshot["dashboard_export"], + "dbt_index": snapshot["dbt_index"], + "allowlist": serialize_allowlist(snapshot["allowlist"]), + "schema_cache": serialize_schema_snippets(snapshot["schema_cache"]), + "distinct_cache": serialize_distinct_cache(snapshot["distinct_cache"]), + }, + DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, + ) + return snapshot + + def _build_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: + """Build one session-stable snapshot of dashboard-specific runtime context.""" + dashboard_export = DashboardService.export_dashboard_context( + state["dashboard_id"], + state["org"], + ) + manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) + allowlist = DashboardChatAllowlistBuilder.build( + dashboard_export, + manifest_json=manifest_json, + ) + return { + "dashboard_export": dashboard_export, + "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index( + manifest_json, + allowlist, + ), + "allowlist": allowlist, + "schema_cache": {}, + "distinct_cache": set(), + } + + def _persist_session_schema_cache( + self, + state: DashboardChatRuntimeState, + schema_cache: dict[str, DashboardChatSchemaSnippet], + ) -> None: + """Persist lazily loaded schema snippets back into the session snapshot cache.""" + session_id = state.get("session_id") + if not session_id: + state["session_schema_cache"] = dict(schema_cache) + return + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is None: + return + cached_snapshot["schema_cache"] = serialize_schema_snippets(schema_cache) + cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) + state["session_schema_cache"] = dict(schema_cache) + + def _persist_session_distinct_cache( + self, + state: DashboardChatRuntimeState, + distinct_cache: set[tuple[str, str, str]], + ) -> None: + """Persist validated distinct values back into the session snapshot cache.""" + session_id = state.get("session_id") + if not session_id: + state["session_distinct_cache"] = set(distinct_cache) + return + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is None: + return + cached_snapshot["distinct_cache"] = serialize_distinct_cache(distinct_cache) + cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) + state["session_distinct_cache"] = set(distinct_cache) + @staticmethod - def _build_dashboard_summary(dashboard_export: dict[str, Any]) -> str: - """Format the dashboard summary fed into the LLM.""" - dashboard_payload = dashboard_export["dashboard"] - lines = [ - f"Dashboard: {dashboard_payload.get('title')}", - f"Description: {dashboard_payload.get('description') or 'None'}", + def _compact_snippet(content: str, max_length: int = 220) -> str: + """Collapse whitespace and trim long snippets for citations and suggestions.""" + normalized = " ".join(content.split()) + if len(normalized) <= max_length: + return normalized + return normalized[: max_length - 3].rstrip() + "..." + + def _tool_document_payload( + self, + document: DashboardChatRetrievedDocument, + allowlist: DashboardChatAllowlist, + dashboard_export: dict[str, Any], + ) -> dict[str, Any]: + """Convert a runtime retrieval result into the prototype tool payload shape.""" + metadata: dict[str, Any] = { + "type": self._prototype_doc_type(document.source_type), + "source_type": document.source_type, + "source_identifier": document.source_identifier, + } + chart_id = self._chart_id_from_source_identifier(document.source_identifier) + if chart_id is not None: + metadata["chart_id"] = chart_id + metadata["dashboard_id"] = document.dashboard_id + chart_metadata = self._chart_tool_metadata(chart_id, dashboard_export) + if chart_metadata: + metadata.update(chart_metadata) + unique_id = self._unique_id_from_source_identifier(document.source_identifier) + if unique_id: + metadata["dbt_unique_id"] = unique_id + metadata["table_name"] = allowlist.unique_id_to_table.get(unique_id) + return { + "doc_id": document.document_id, + "content": document.content, + "metadata": metadata, + "similarity_score": document.distance, + } + + @classmethod + def _chart_tool_metadata( + cls, + chart_id: int, + dashboard_export: dict[str, Any], + ) -> dict[str, Any]: + """Return structured chart metadata that nudges the tool loop toward exact chart fields.""" + chart = next( + ( + candidate + for candidate in (dashboard_export.get("charts") or []) + if candidate.get("id") == chart_id + ), + None, + ) + if chart is None: + return {} + + preferred_table = build_dashboard_chat_table_name( + chart.get("schema_name"), + chart.get("table_name"), + ) + metric_columns = cls._chart_metric_columns(chart) + dimension_columns = cls._chart_dimension_columns(chart) + time_column = cls._chart_time_column(chart, dimension_columns) + payload: dict[str, Any] = { + "chart_title": str(chart.get("title") or ""), + "chart_type": str(chart.get("chart_type") or ""), + } + if preferred_table: + payload["preferred_table"] = preferred_table + if metric_columns: + payload["metric_columns"] = metric_columns + if dimension_columns: + payload["dimension_columns"] = dimension_columns + if time_column: + payload["time_column"] = time_column + return payload + + @staticmethod + def _prototype_doc_type(source_type: str) -> str: + """Map Dalgo source types into the prototype doc-type vocabulary.""" + if source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + return "chart" + if source_type in { + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + }: + return "dbt_model" + return "context" + + def _validate_sql_allowlist( + self, + sql: str, + allowlist: DashboardChatAllowlist, + ) -> dict[str, Any]: + """Validate that all referenced tables are in the dashboard allowlist.""" + referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) + invalid_tables = [ + table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) + ] + if invalid_tables: + return { + "valid": False, + "invalid_tables": invalid_tables, + "message": ( + "SQL references tables not available in the current dashboard: " + + ", ".join(invalid_tables) + + ". Use list_tables_by_keyword to find allowed tables." + ), + } + return {"valid": True, "invalid_tables": [], "message": ""} + + @staticmethod + def _primary_table_name(sql: str) -> str | None: + """Return the primary FROM table for single-query correction logic.""" + table_match = re.search(r"\bFROM\s+([`\"]?)([\w\.]+)\1", sql, re.IGNORECASE) + if not table_match: + return None + return normalize_dashboard_chat_table_name(table_match.group(2)) + + @classmethod + def _table_references(cls, sql: str) -> list[dict[str, str | None]]: + """Return normalized FROM/JOIN table references and aliases from one SQL statement.""" + references: list[dict[str, str | None]] = [] + for match in re.finditer( + r"\b(?:FROM|JOIN)\s+([`\"]?)([\w\.]+)\1(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", + sql, + flags=re.IGNORECASE, + ): + table_name = normalize_dashboard_chat_table_name(match.group(2)) + if not table_name: + continue + alias = str(match.group(3) or "").lower() or None + references.append( + { + "table_name": table_name, + "alias": alias, + "short_name": table_name.split(".")[-1], + } + ) + return references + + @classmethod + def _resolve_table_qualifier( + cls, + qualifier: str, + table_references: Sequence[dict[str, str | None]], + ) -> str | None: + """Resolve a qualifier like `f` or `analytics_table` to one query table.""" + normalized_qualifier = qualifier.lower().strip().strip('`"') + matches = [ + str(reference["table_name"]) + for reference in table_references + if normalized_qualifier + in { + str(reference.get("alias") or ""), + str(reference.get("short_name") or ""), + str(reference.get("table_name") or ""), + } + ] + deduped_matches = list(dict.fromkeys(match for match in matches if match)) + if len(deduped_matches) == 1: + return deduped_matches[0] + return None + + @staticmethod + def _table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: + """Return the normalized column names available on one schema snippet.""" + return { + str(column.get("name") or "").lower() + for column in getattr(snippet, "columns", []) or [] + } + + @classmethod + def _tables_with_column( + cls, + column_name: str, + table_names: Sequence[str], + schema_cache: dict[str, Any], + ) -> list[str]: + """Return the query tables that contain one column.""" + normalized_column_name = column_name.lower() + return [ + table_name + for table_name in table_names + if normalized_column_name in cls._table_columns(schema_cache.get(table_name)) ] - for chart in dashboard_export.get("charts") or []: - lines.append( - "{title} uses {schema}.{table} ({chart_type})".format( - title=chart.get("title"), - schema=chart.get("schema_name"), - table=chart.get("table_name"), - chart_type=chart.get("chart_type"), + + @classmethod + def _resolve_identifier_table( + cls, + *, + qualifier: str | None, + column_name: str, + table_references: Sequence[dict[str, str | None]], + schema_cache: dict[str, Any], + ) -> str | None: + """Resolve one referenced column to a concrete query table when it is unambiguous.""" + if qualifier is not None: + resolved_table = cls._resolve_table_qualifier(qualifier, table_references) + if not resolved_table: + return None + if column_name.lower() in cls._table_columns(schema_cache.get(resolved_table)): + return resolved_table + return None + + query_tables = [str(reference["table_name"]) for reference in table_references if reference.get("table_name")] + matching_tables = cls._tables_with_column(column_name, query_tables, schema_cache) + if len(matching_tables) == 1: + return matching_tables[0] + return None + + @classmethod + def _referenced_sql_identifier_refs(cls, sql: str) -> list[tuple[str | None, str]]: + """Extract likely physical identifier references from the outer SQL.""" + table_aliases = { + alias.lower() + for alias in re.findall( + r"\b(?:FROM|JOIN)\s+[`\"]?[\w\.]+[`\"]?(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", + sql, + flags=re.IGNORECASE, + ) + if alias + } + select_aliases = cls._select_aliases(sql) + referenced_identifiers: list[tuple[str | None, str]] = [] + + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if select_clause: + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + referenced_identifiers.extend( + cls._extract_identifier_refs_from_sql_segment(expression, table_aliases) + ) + + for pattern in [ + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", + r"\bORDER\s+BY\s+(.+?)(?:\bLIMIT\b|$)", + ]: + match = re.search(pattern, sql, flags=re.IGNORECASE | re.DOTALL) + if match: + referenced_identifiers.extend( + cls._extract_identifier_refs_from_sql_segment( + match.group(1), + table_aliases, + ignored_identifiers=select_aliases, + ) ) + + return list(dict.fromkeys(referenced_identifiers)) + + @staticmethod + def _select_aliases(sql: str) -> set[str]: + """Return aliases introduced by the outer SELECT clause.""" + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return set() + + aliases: set[str] = set() + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + alias_match = re.search( + r"\bAS\s+([A-Za-z_][A-Za-z0-9_]*)\s*$", + expression, + flags=re.IGNORECASE, ) - return "\n".join(lines) + if alias_match: + aliases.add(alias_match.group(1).lower()) + return aliases @staticmethod - def _normalize_query_plan( - query_plan: DashboardChatQueryPlan, - allowlist: DashboardChatAllowlist, - default_tables: Sequence[str], - ) -> DashboardChatQueryPlan: - """Drop out-of-bounds tables and backfill safe defaults.""" - relevant_tables = [ - table_name.lower() - for table_name in query_plan.relevant_tables - if allowlist.is_allowed(table_name) + def _extract_identifier_refs_from_sql_segment( + segment: str, + table_aliases: set[str], + ignored_identifiers: set[str] | None = None, + ) -> list[tuple[str | None, str]]: + """Pull qualified and unqualified column-like identifiers out of one SQL segment.""" + normalized_segment = re.sub(r"'[^']*'", " ", segment) + normalized_segment = re.sub( + r"\bAS\s+[A-Za-z_][A-Za-z0-9_]*", + " ", + normalized_segment, + flags=re.IGNORECASE, + ) + ignored_tokens = { + "SELECT", + "FROM", + "WHERE", + "GROUP", + "BY", + "ORDER", + "LIMIT", + "COUNT", + "SUM", + "AVG", + "MIN", + "MAX", + "DISTINCT", + "AND", + "OR", + "AS", + "IN", + "CASE", + "WHEN", + "THEN", + "ELSE", + "END", + "TRUE", + "FALSE", + "NULL", + "NOT", + "ASC", + "DESC", + "ON", + "JOIN", + } + ignored_identifiers = {identifier.lower() for identifier in (ignored_identifiers or set())} + identifiers: list[tuple[str | None, str]] = [] + for match in re.finditer( + r"(?:(?P[A-Za-z_][A-Za-z0-9_]*)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)", + normalized_segment, + ): + qualifier = match.group("qualifier") + identifier = match.group("identifier") + if not identifier: + continue + if identifier.upper() in ignored_tokens: + continue + if identifier.lower() in table_aliases or identifier.lower() in ignored_identifiers: + continue + trailing_segment = normalized_segment[match.end() :].lstrip() + if qualifier is None and trailing_segment.startswith("("): + continue + identifiers.append((qualifier.lower() if qualifier else None, identifier.lower())) + return identifiers + + @staticmethod + def _best_table_for_missing_columns( + missing_columns: Sequence[str], + schema_cache: dict[str, Any], + ) -> str | None: + """Return the first allowlisted table that covers all missing columns.""" + wanted_columns = {column_name.lower() for column_name in missing_columns} + for table_name, snippet in schema_cache.items(): + available_columns = { + str(column.get("name") or "").lower() for column in snippet.columns + } + if wanted_columns.issubset(available_columns): + return table_name + return None + + def _missing_distinct( + self, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + ) -> list[dict[str, Any]]: + """Detect text filters that require a prior distinct-values call.""" + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return [] + + table_references = self._table_references(sql) + query_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") ] - schema_lookup_tables = [ - table_name.lower() - for table_name in query_plan.schema_lookup_tables - if allowlist.is_allowed(table_name) + if not query_tables: + return [] + primary_table = self._primary_table_name(sql) or query_tables[0] + + full_schema_cache = self._schema_cache(state, execution_context, tables=query_tables) + all_schema_cache = self._schema_cache(state, execution_context) + + column_types = { + table_name: { + str(column.get("name") or "").lower(): str( + column.get("data_type") or column.get("type") or "" + ).lower() + for column in getattr(snippet, "columns", []) + } + for table_name, snippet in full_schema_cache.items() + } + missing: list[dict[str, Any]] = [] + for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_references=table_references, + schema_cache=full_schema_cache, + ) + if resolved_table is None and qualifier is None: + matching_tables = self._tables_with_column( + normalized_column, + query_tables, + full_schema_cache, + ) + if len(matching_tables) > 1: + continue + if resolved_table is None: + candidate_tables = self._find_tables_with_column( + normalized_column, + all_schema_cache, + ) + if qualifier is None and candidate_tables: + continue + missing.append( + { + "table": primary_table, + "column": column_name, + "error": "column_not_in_table", + "candidates": candidate_tables, + } + ) + continue + data_type = column_types.get(resolved_table, {}).get(normalized_column, "") + if not data_type: + continue + if not self._is_text_type(data_type): + continue + if ( + not self._has_validated_distinct_value( + execution_context["distinct_cache"], + table_name=resolved_table, + column_name=normalized_column, + value=value, + ) + ): + missing.append( + {"table": resolved_table, "column": column_name, "value": value} + ) + return missing + + @staticmethod + def _extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, str]]: + """Extract quoted text filter values from one WHERE clause.""" + extracted_values: list[tuple[str | None, str, str]] = [] + for qualifier, column_name, value in re.findall( + r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", + where_clause, + flags=re.IGNORECASE, + ): + extracted_values.append((qualifier.lower() if qualifier else None, column_name, value)) + + for match in re.finditer( + r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\(([^)]*)\)", + where_clause, + flags=re.IGNORECASE, + ): + qualifier = match.group(1) + column_name = match.group(2) + for value in re.findall(r"'([^']+)'", match.group(3)): + extracted_values.append( + (qualifier.lower() if qualifier else None, column_name, value) + ) + return extracted_values + + @staticmethod + def _normalize_distinct_value(value: Any) -> str: + """Normalize one distinct value for exact cache lookups.""" + return str(value).strip().lower() + + @classmethod + def _has_validated_distinct_value( + cls, + distinct_cache: set[tuple[Any, ...]], + *, + table_name: str, + column_name: str, + value: Any, + ) -> bool: + """Return whether this exact text filter value was already validated in-session.""" + normalized_value = cls._normalize_distinct_value(value) + normalized_column = column_name.lower() + normalized_table = table_name.lower() + return ( + (normalized_table, normalized_column, normalized_value) in distinct_cache + or ("*", normalized_column, normalized_value) in distinct_cache + or (normalized_table, normalized_column) in distinct_cache + or ("*", normalized_column) in distinct_cache + ) + + @staticmethod + def _find_tables_with_column( + column_name: str, + schema_cache: dict[str, Any], + limit: int = 10, + ) -> list[str]: + """Find allowlisted tables that contain one column.""" + matches: list[str] = [] + normalized_column_name = column_name.lower() + for table_name, snippet in schema_cache.items(): + if any( + normalized_column_name == str(column.get("name") or "").lower() + for column in snippet.columns + ): + matches.append(table_name) + if len(matches) >= limit: + break + return matches + + @staticmethod + def _is_text_type(data_type: str) -> bool: + """Treat common string-like warehouse types as requiring distinct-value lookup.""" + return any( + text_token in data_type + for text_token in ["char", "text", "string", "varchar"] + ) + + @staticmethod + def _preview_sql_rows(rows: list[dict[str, Any]], max_rows: int = 5) -> str: + """Render a compact human-readable preview for successful SQL executions.""" + if not rows: + return "No matching rows found." + preview_rows = rows[:max_rows] + preview_lines = [json.dumps(row, cls=DjangoJSONEncoder) for row in preview_rows] + if len(rows) > max_rows: + preview_lines.append(f"... {len(rows) - max_rows} more rows") + return "\n".join(preview_lines) + + def _record_validated_distinct_values( + self, + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + table_name: str, + column_name: str, + values: Sequence[Any], + ) -> None: + """Persist exact validated filter values for the current session.""" + normalized_table = table_name.lower() + normalized_column = column_name.lower() + distinct_cache = execution_context["distinct_cache"] + for value in values: + normalized_value = self._normalize_distinct_value(value) + distinct_cache.add((normalized_table, normalized_column, normalized_value)) + # Follow-ups often move to an upstream table with the same validated dimension. + distinct_cache.add(("*", normalized_column, normalized_value)) + self._persist_session_distinct_cache(state, distinct_cache) + + def _record_validated_filters_from_sql( + self, + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + sql: str, + ) -> None: + """Seed exact validated filter values from a successful SQL statement.""" + table_references = self._table_references(sql) + if not table_references: + return + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return + + query_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") ] - if query_plan.mode == DashboardChatPlanMode.SQL and not relevant_tables: - relevant_tables = [table_name.lower() for table_name in default_tables] - if query_plan.mode == DashboardChatPlanMode.SQL and not schema_lookup_tables: - schema_lookup_tables = relevant_tables - return DashboardChatQueryPlan( - mode=query_plan.mode, - reason=query_plan.reason, - relevant_tables=relevant_tables, - schema_lookup_tables=schema_lookup_tables, - text_filters=[ - text_filter - for text_filter in query_plan.text_filters - if allowlist.is_allowed(text_filter.table_name) - ], - answer_strategy=query_plan.answer_strategy, - clarification_question=query_plan.clarification_question, - ) - - @staticmethod - def _schema_prompt_for_plan( - schema_snippets: dict[str, Any], - query_plan: DashboardChatQueryPlan, + schema_cache = dict(execution_context.get("schema_cache") or {}) + values_by_target: dict[tuple[str, str], list[str]] = {} + for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_references=table_references, + schema_cache=schema_cache, + ) + if resolved_table is None and qualifier is None: + if schema_cache: + matching_tables = self._tables_with_column( + normalized_column, + query_tables, + schema_cache, + ) + if len(matching_tables) == 1: + resolved_table = matching_tables[0] + elif len(query_tables) == 1: + resolved_table = query_tables[0] + values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) + + if not values_by_target: + return + + for (table_name, column_name), values in values_by_target.items(): + self._record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=table_name, + column_name=column_name, + values=values, + ) + + @classmethod + def _structural_dimensions_from_sql(cls, sql: str) -> set[str]: + """Return normalized non-aggregate dimensions used by one SQL statement.""" + if not sql: + return set() + + dimensions: set[str] = set() + for dimension in cls._extract_dimensions_from_sql(sql): + identifier_refs = cls._extract_identifier_refs_from_sql_segment( + dimension, + table_aliases=set(), + ) + if identifier_refs: + dimensions.update( + cls._normalize_dimension_name(column_name) + for _, column_name in identifier_refs + ) + continue + dimensions.add(cls._normalize_dimension_name(dimension)) + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return {dimension for dimension in dimensions if dimension} + + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + normalized_expression = expression.strip() + if not normalized_expression or DashboardChatSqlGuard._contains_aggregate( + normalized_expression + ): + continue + for _, column_name in cls._extract_identifier_refs_from_sql_segment( + normalized_expression, + table_aliases=set(), + ignored_identifiers=cls._select_aliases(sql), + ): + dimensions.add(cls._normalize_dimension_name(column_name)) + return {dimension for dimension in dimensions if dimension} + + @staticmethod + def _normalize_dimension_name(value: str) -> str: + """Normalize dimension names from SQL expressions and natural-language follow-ups.""" + normalized_value = value.strip().strip('`"').lower() + normalized_value = normalized_value.split(".")[-1] + normalized_value = re.sub(r"[^a-z0-9_]+", "_", normalized_value) + normalized_value = re.sub(r"_+", "_", normalized_value).strip("_") + return normalized_value + + @classmethod + def _chart_metric_columns(cls, chart: dict[str, Any]) -> list[str]: + """Extract the most likely metric columns from one chart export payload.""" + extra_config = chart.get("extra_config") or {} + metrics: list[str] = [] + for metric in extra_config.get("metrics") or []: + if isinstance(metric, str) and metric.strip(): + metrics.append(metric.strip()) + continue + if isinstance(metric, dict): + for key in ["column", "name", "field", "metric", "metric_column"]: + value = metric.get(key) + if isinstance(value, str) and value.strip(): + metrics.append(value.strip()) + break + for key in [ + "metric_col", + "metric_column", + "measure_col", + "measure_column", + "value_column", + "y_axis_column", + ]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + metrics.append(value.strip()) + return list(dict.fromkeys(metrics)) + + @classmethod + def _chart_dimension_columns(cls, chart: dict[str, Any]) -> list[str]: + """Extract dimension-like fields from one chart export payload.""" + extra_config = chart.get("extra_config") or {} + dimensions: list[str] = [] + for key in ["dimension_col", "extra_dimension", "group_by", "category_column", "x_axis_column"]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + dimensions.append(value.strip()) + for value in extra_config.get("dimensions") or []: + if isinstance(value, str) and value.strip(): + dimensions.append(value.strip()) + return list(dict.fromkeys(dimensions)) + + @classmethod + def _chart_time_column( + cls, + chart: dict[str, Any], + dimension_columns: Sequence[str], + ) -> str | None: + """Extract or infer the chart's time dimension when one is present.""" + extra_config = chart.get("extra_config") or {} + for key in ["time_column", "time_dimension", "date_column"]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + for dimension in dimension_columns: + if cls._looks_like_time_dimension(dimension): + return dimension + return None + + @staticmethod + def _looks_like_time_dimension(column_name: str) -> bool: + """Return whether a dimension name probably represents time bucketing.""" + normalized_column = column_name.lower() + return any( + token in normalized_column + for token in ["date", "day", "week", "month", "quarter", "year", "time"] + ) + + @staticmethod + def _serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: + """Trim large tool payloads before feeding them back into the model.""" + serialized = dict(result) + docs = serialized.get("docs") + if isinstance(docs, list) and len(docs) > 6: + serialized["docs"] = docs[:6] + rows = serialized.get("rows") + if isinstance(rows, list) and len(rows) > 5: + serialized["rows"] = rows[:5] + values = serialized.get("values") + if isinstance(values, list) and len(values) > 20: + serialized["values"] = values[:20] + return serialized + + def _summarize_tool_call( + self, + *, + tool_name: str, + args: dict[str, Any], + result: dict[str, Any], + ) -> dict[str, Any]: + """Persist a compact execution trace for one tool call.""" + entry: dict[str, Any] = {"name": tool_name, "args": args} + if tool_name == "retrieve_docs": + entry["count"] = result.get("count", 0) + entry["doc_ids"] = [doc.get("doc_id") for doc in result.get("docs", [])[:6]] + elif tool_name == "get_schema_snippets": + entry["tables"] = [table.get("table") for table in result.get("tables", [])] + elif tool_name == "search_dbt_models": + entry["count"] = result.get("count", 0) + entry["models"] = [model.get("table") or model.get("name") for model in result.get("models", [])] + elif tool_name == "get_dbt_model_info": + entry["model"] = result.get("model") + entry["column_count"] = len(result.get("columns") or []) + elif tool_name == "get_distinct_values": + entry["error"] = result.get("error") + entry["count"] = result.get("count", 0) + entry["values_sample"] = (result.get("values") or [])[:10] + elif tool_name == "list_tables_by_keyword": + entry["tables"] = [table.get("table") for table in result.get("tables", [])] + elif tool_name == "check_table_row_count": + entry["row_count"] = result.get("row_count") + elif tool_name == "run_sql_query": + entry["success"] = result.get("success", False) + entry["row_count"] = result.get("row_count", 0) + entry["sql_used"] = result.get("sql_used") + entry["error"] = result.get("error") + else: + entry["result"] = result + return entry + + def _max_turns_message( + self, + user_query: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], ) -> str: - """Filter the schema prompt down to the planned tables when possible.""" - if not schema_snippets: - return "" - desired_tables = query_plan.schema_lookup_tables or query_plan.relevant_tables - if not desired_tables: - return "\n\n".join(snippet.to_prompt_text() for snippet in schema_snippets.values()) - return "\n\n".join( - schema_snippets[table_name].to_prompt_text() - for table_name in desired_tables - if table_name in schema_snippets + """Return a bounded fallback when the prototype tool loop exhausts its budget.""" + if retrieved_documents: + return ( + "I found relevant dashboard context, but I couldn't complete the analysis safely. " + "Please rephrase the question or ask about a metric shown on this dashboard." + ) + return ( + f"I couldn't find enough dashboard-backed context to answer: {user_query}. " + "Please rephrase or ask about a metric shown on this dashboard." ) + def _build_usage_summary(self) -> dict[str, Any]: + """Collect per-turn usage from the LLM client and embedding provider when supported.""" + usage: dict[str, Any] = {} + if hasattr(self.llm_client, "usage_summary"): + llm_usage = self.llm_client.usage_summary() + if llm_usage: + usage["llm"] = llm_usage + if hasattr(self.vector_store, "usage_summary"): + embedding_usage = self.vector_store.usage_summary() + if embedding_usage: + usage["embeddings"] = embedding_usage + return usage + + def _compose_small_talk_response(self, user_query: str) -> str: + """Generate the prototype small-talk response or fall back to a fixed helper.""" + if hasattr(self.llm_client, "compose_small_talk"): + try: + return self.llm_client.compose_small_talk(user_query) + except Exception: + logger.exception("Dashboard chat small-talk generation failed") + return "Hi! I can help with your program data and metrics. What would you like to know?" + + @staticmethod + def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: + """Handle obvious greetings and thanks without an LLM round trip.""" + if not GREETING_PATTERN.match(user_query.strip()): + return None + return DashboardChatIntentDecision( + intent=DashboardChatIntent.SMALL_TALK, + confidence=1.0, + reason="Obvious greeting or thanks", + ) + + @staticmethod + def _build_fast_path_small_talk_response(user_query: str) -> str: + """Keep greeting replies instant and deterministic.""" + normalized_query = user_query.strip().lower() + if "thank" in normalized_query: + return "You're welcome. Ask me anything about this dashboard or its data." + if "good morning" in normalized_query: + return "Good morning. Ask me anything about this dashboard or the data behind it." + if "good afternoon" in normalized_query: + return "Good afternoon. Ask me anything about this dashboard or the data behind it." + if "good evening" in normalized_query: + return "Good evening. Ask me anything about this dashboard or the data behind it." + return "Hi. Ask me anything about this dashboard or the data behind it." + + @staticmethod + def _clarification_fallback(missing_info: Sequence[str]) -> str: + """Mirror the prototype's specific clarification nudges when the router omits a question.""" + missing = {item.lower() for item in missing_info} + prompts: list[str] = [] + if "metric" in missing: + prompts.append("which metric") + if "time_range" in missing or "time period" in missing: + prompts.append("what time period") + if "dimension" in missing: + prompts.append("which breakdown or dimension") + if not prompts: + return "Could you be more specific about the metric, program, or time period you want?" + return "Could you clarify " + ", ".join(prompts) + "?" + @staticmethod def _fallback_answer_text( retrieved_documents: Sequence[DashboardChatRetrievedDocument], sql_results: list[dict[str, Any]] | None, ) -> str: - """Fallback response when answer composition fails.""" + """Fallback response when the model returns no final text.""" if sql_results is not None: if not sql_results: return "I didn't find any matching rows for that question." - return "Here are the matching results: " + json.dumps(sql_results[:3], default=str) + return DashboardChatRuntime._preview_sql_rows(sql_results) if retrieved_documents: return DashboardChatRuntime._compact_snippet(retrieved_documents[0].content) return "I couldn't find enough context to answer that." - @staticmethod - def _compact_snippet(content: str, max_length: int = 220) -> str: - """Collapse whitespace and trim long snippets for citations and suggestions.""" - normalized = " ".join(content.split()) - if len(normalized) <= max_length: - return normalized - return normalized[: max_length - 3].rstrip() + "..." - @staticmethod def _chart_id_from_source_identifier(source_identifier: str) -> int | None: """Extract chart ids from dashboard export source identifiers.""" diff --git a/ddpui/core/dashboard_chat/runtime_types.py b/ddpui/core/dashboard_chat/runtime_types.py index b6428bd2f..6eec55e2f 100644 --- a/ddpui/core/dashboard_chat/runtime_types.py +++ b/ddpui/core/dashboard_chat/runtime_types.py @@ -2,33 +2,56 @@ from dataclasses import asdict, dataclass, field from enum import Enum +import json from typing import Any +from django.core.serializers.json import DjangoJSONEncoder + class DashboardChatIntent(str, Enum): - """Supported high-level intents for dashboard chat.""" + """Prototype-aligned top-level intents for dashboard chat.""" - DATA_QUERY = "data_query" - CONTEXT_QUERY = "context_query" + QUERY_WITH_SQL = "query_with_sql" + QUERY_WITHOUT_SQL = "query_without_sql" + FOLLOW_UP_SQL = "follow_up_sql" + FOLLOW_UP_CONTEXT = "follow_up_context" NEEDS_CLARIFICATION = "needs_clarification" SMALL_TALK = "small_talk" IRRELEVANT = "irrelevant" -class DashboardChatPlanMode(str, Enum): - """Execution modes chosen after planning.""" - - SQL = "sql" - CONTEXT = "context" - CLARIFY = "clarify" - - @dataclass(frozen=True) class DashboardChatConversationMessage: """Single prior conversation message.""" role: str content: str + payload: dict[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True) +class DashboardChatConversationContext: + """Reusable context extracted from prior assistant turns.""" + + last_sql_query: str | None = None + last_tables_used: list[str] = field(default_factory=list) + last_chart_ids: list[str] = field(default_factory=list) + last_metrics: list[str] = field(default_factory=list) + last_dimensions: list[str] = field(default_factory=list) + last_filters: list[str] = field(default_factory=list) + last_response_type: str | None = None + last_answer_text: str | None = None + last_intent: str | None = None + + +@dataclass(frozen=True) +class DashboardChatFollowUpContext: + """Prototype-style follow-up metadata returned by the router.""" + + is_follow_up: bool + follow_up_type: str | None = None + reusable_elements: dict[str, Any] = field(default_factory=dict) + modification_instruction: str | None = None @dataclass(frozen=True) @@ -36,9 +59,14 @@ class DashboardChatIntentDecision: """Intent-routing outcome.""" intent: DashboardChatIntent + confidence: float reason: str - force_sql_path: bool = False + missing_info: list[str] = field(default_factory=list) + force_tool_usage: bool = False clarification_question: str | None = None + follow_up_context: DashboardChatFollowUpContext = field( + default_factory=lambda: DashboardChatFollowUpContext(is_follow_up=False) + ) @dataclass(frozen=True) @@ -53,38 +81,6 @@ class DashboardChatRetrievedDocument: distance: float | None = None -@dataclass(frozen=True) -class DashboardChatTextFilterPlan: - """Filter that requires a distinct-values lookup before SQL generation.""" - - table_name: str - column_name: str - requested_value: str - - -@dataclass(frozen=True) -class DashboardChatQueryPlan: - """Structured plan produced before SQL generation.""" - - mode: DashboardChatPlanMode - reason: str - relevant_tables: list[str] = field(default_factory=list) - schema_lookup_tables: list[str] = field(default_factory=list) - text_filters: list[DashboardChatTextFilterPlan] = field(default_factory=list) - answer_strategy: str | None = None - clarification_question: str | None = None - - -@dataclass(frozen=True) -class DashboardChatSqlDraft: - """LLM-produced SQL draft and metadata.""" - - sql: str | None - reason: str - warnings: list[str] = field(default_factory=list) - clarification_question: str | None = None - - @dataclass(frozen=True) class DashboardChatSchemaSnippet: """Schema description for a warehouse table.""" @@ -92,19 +88,6 @@ class DashboardChatSchemaSnippet: table_name: str columns: list[dict[str, Any]] - def to_prompt_text(self) -> str: - """Format a compact schema summary for prompts.""" - column_lines = [] - for column in self.columns: - column_lines.append( - "- {name} ({data_type}, nullable={nullable})".format( - name=column.get("name"), - data_type=column.get("data_type"), - nullable=column.get("nullable"), - ) - ) - return f"Table: {self.table_name}\n" + "\n".join(column_lines) - @dataclass(frozen=True) class DashboardChatSqlValidationResult: @@ -133,19 +116,6 @@ def to_dict(self) -> dict[str, Any]: return asdict(self) -@dataclass(frozen=True) -class DashboardChatRelatedDashboard: - """Dashboard suggestion when the current dashboard is not sufficient.""" - - dashboard_id: int - title: str - reason: str - - def to_dict(self) -> dict[str, Any]: - """Return a serializable suggestion payload.""" - return asdict(self) - - @dataclass(frozen=True) class DashboardChatResponse: """Final runtime response returned by the LangGraph runner.""" @@ -153,24 +123,24 @@ class DashboardChatResponse: answer_text: str intent: DashboardChatIntent citations: list[DashboardChatCitation] = field(default_factory=list) - related_dashboards: list[DashboardChatRelatedDashboard] = field(default_factory=list) warnings: list[str] = field(default_factory=list) sql: str | None = None sql_results: list[dict[str, Any]] | None = None + usage: dict[str, Any] = field(default_factory=dict) + tool_calls: list[dict[str, Any]] = field(default_factory=list) metadata: dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict[str, Any]: """Return a serializable payload.""" - return { + payload = { "answer_text": self.answer_text, "intent": self.intent.value, "citations": [citation.to_dict() for citation in self.citations], - "related_dashboards": [ - related_dashboard.to_dict() - for related_dashboard in self.related_dashboards - ], "warnings": self.warnings, "sql": self.sql, "sql_results": self.sql_results, + "usage": self.usage, + "tool_calls": self.tool_calls, "metadata": self.metadata, } + return json.loads(json.dumps(payload, cls=DjangoJSONEncoder)) diff --git a/ddpui/core/dashboard_chat/session_cache.py b/ddpui/core/dashboard_chat/session_cache.py new file mode 100644 index 000000000..d96216b20 --- /dev/null +++ b/ddpui/core/dashboard_chat/session_cache.py @@ -0,0 +1,101 @@ +"""Session-scoped cache helpers for dashboard chat runtime snapshots.""" + +from typing import Any + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet + +DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS = 24 * 60 * 60 + + +def build_dashboard_chat_session_snapshot_cache_key(session_id: str) -> str: + """Return the cache key used for one chat session's runtime snapshot.""" + return f"dashboard_chat:session_snapshot:{session_id}" + + +def serialize_allowlist(allowlist: DashboardChatAllowlist) -> dict[str, Any]: + """Convert an allowlist to a cache-safe dictionary payload.""" + return { + "chart_tables": sorted(allowlist.chart_tables), + "upstream_tables": sorted(allowlist.upstream_tables), + "allowed_tables": sorted(allowlist.allowed_tables), + "allowed_unique_ids": sorted(allowlist.allowed_unique_ids), + "unique_id_to_table": dict(allowlist.unique_id_to_table), + "table_to_unique_ids": { + table_name: sorted(unique_ids) + for table_name, unique_ids in allowlist.table_to_unique_ids.items() + }, + } + + +def deserialize_allowlist(payload: dict[str, Any] | None) -> DashboardChatAllowlist: + """Rebuild an allowlist from cached data.""" + payload = payload or {} + return DashboardChatAllowlist( + chart_tables=set(payload.get("chart_tables") or []), + upstream_tables=set(payload.get("upstream_tables") or []), + allowed_tables=set(payload.get("allowed_tables") or []), + allowed_unique_ids=set(payload.get("allowed_unique_ids") or []), + unique_id_to_table=dict(payload.get("unique_id_to_table") or {}), + table_to_unique_ids={ + table_name: set(unique_ids) + for table_name, unique_ids in (payload.get("table_to_unique_ids") or {}).items() + }, + ) + + +def serialize_schema_snippets( + snippets: dict[str, DashboardChatSchemaSnippet], +) -> dict[str, Any]: + """Convert schema snippets to a cache-safe dictionary payload.""" + return { + table_name: { + "table_name": snippet.table_name, + "columns": list(snippet.columns), + } + for table_name, snippet in snippets.items() + } + + +def deserialize_schema_snippets( + payload: dict[str, Any] | None, +) -> dict[str, DashboardChatSchemaSnippet]: + """Rebuild schema snippets from cached data.""" + snippets: dict[str, DashboardChatSchemaSnippet] = {} + for table_name, snippet_payload in (payload or {}).items(): + snippets[table_name.lower()] = DashboardChatSchemaSnippet( + table_name=str(snippet_payload.get("table_name") or table_name), + columns=list(snippet_payload.get("columns") or []), + ) + return snippets + + +def serialize_distinct_cache( + distinct_cache: set[tuple[str, str, str]], +) -> dict[str, Any]: + """Convert validated distinct values to a cache-safe nested payload.""" + serialized: dict[str, dict[str, list[str]]] = {} + for table_name, column_name, value in distinct_cache: + serialized.setdefault(table_name, {}).setdefault(column_name, []).append(value) + + return { + table_name: { + column_name: sorted(set(values)) + for column_name, values in column_map.items() + } + for table_name, column_map in serialized.items() + } + + +def deserialize_distinct_cache( + payload: dict[str, Any] | None, +) -> set[tuple[str, str, str]]: + """Rebuild validated distinct values from cached data.""" + distinct_cache: set[tuple[str, str, str]] = set() + for table_name, column_map in (payload or {}).items(): + for column_name, values in (column_map or {}).items(): + for value in values or []: + distinct_cache.add( + (str(table_name).lower(), str(column_name).lower(), str(value)) + ) + return distinct_cache diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/session_service.py index 92eba7760..84317e394 100644 --- a/ddpui/core/dashboard_chat/session_service.py +++ b/ddpui/core/dashboard_chat/session_service.py @@ -1,5 +1,6 @@ """Session and message persistence helpers for dashboard chat.""" +from dataclasses import dataclass from uuid import UUID from django.db import IntegrityError @@ -7,6 +8,7 @@ from django.db.models import Max from django.utils import timezone +from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import ( @@ -21,6 +23,14 @@ class DashboardChatSessionError(Exception): """Raised when a dashboard chat session cannot be created or reused.""" +@dataclass(frozen=True) +class DashboardChatMessageCreateResult: + """Outcome of creating or reusing one persisted chat message.""" + + message: DashboardChatMessage + created: bool + + def get_or_create_dashboard_chat_session( *, orguser: OrgUser, @@ -29,10 +39,17 @@ def get_or_create_dashboard_chat_session( ) -> DashboardChatSession: """Create a new session or validate an existing one for the current dashboard.""" if session_id is None: + collection_name = None + if orguser.org.dbt and orguser.org.dbt.vector_last_ingested_at is not None: + collection_name = build_dashboard_chat_collection_name( + orguser.org.id, + version=orguser.org.dbt.vector_last_ingested_at, + ) return DashboardChatSession.objects.create( org=orguser.org, orguser=orguser, dashboard=dashboard, + vector_collection_name=collection_name, ) try: @@ -58,6 +75,20 @@ def create_dashboard_chat_user_message( client_message_id: str | None, ) -> DashboardChatMessage: """Persist one user message and advance the session timestamp.""" + return create_dashboard_chat_user_message_with_status( + session=session, + content=content, + client_message_id=client_message_id, + ).message + + +def create_dashboard_chat_user_message_with_status( + *, + session: DashboardChatSession, + content: str, + client_message_id: str | None, +) -> DashboardChatMessageCreateResult: + """Persist one user message and report whether a new row was created.""" return _create_dashboard_chat_message( session=session, role=DashboardChatMessageRole.USER.value, @@ -80,7 +111,7 @@ def create_dashboard_chat_assistant_message( content=content, client_message_id=None, payload=payload, - ) + ).message def list_dashboard_chat_history( @@ -93,7 +124,11 @@ def list_dashboard_chat_history( if exclude_message_id is not None: query = query.exclude(id=exclude_message_id) return [ - DashboardChatConversationMessage(role=message.role, content=message.content) + DashboardChatConversationMessage( + role=message.role, + content=message.content, + payload=message.payload or {}, + ) for message in query ] @@ -109,6 +144,22 @@ def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: } +def find_dashboard_chat_assistant_reply( + *, + session: DashboardChatSession, + user_message: DashboardChatMessage, +) -> DashboardChatMessage | None: + """Return the first assistant reply that follows a user turn, if it exists.""" + return ( + session.messages.filter( + role=DashboardChatMessageRole.ASSISTANT.value, + sequence_number__gt=user_message.sequence_number, + ) + .order_by("sequence_number") + .first() + ) + + def _create_dashboard_chat_message( *, session: DashboardChatSession, @@ -116,8 +167,9 @@ def _create_dashboard_chat_message( content: str, client_message_id: str | None, payload: dict | None, -) -> DashboardChatMessage: +) -> DashboardChatMessageCreateResult: """Create a session-scoped chat message with a stable next sequence number.""" + created = False with transaction.atomic(): locked_session = DashboardChatSession.objects.select_for_update().get(id=session.id) if client_message_id: @@ -126,7 +178,10 @@ def _create_dashboard_chat_message( client_message_id=client_message_id, ).first() if existing_message is not None: - return existing_message + return DashboardChatMessageCreateResult( + message=existing_message, + created=False, + ) next_sequence_number = ( locked_session.messages.aggregate(max_sequence_number=Max("sequence_number"))[ @@ -143,6 +198,7 @@ def _create_dashboard_chat_message( client_message_id=client_message_id, payload=payload, ) + created = True except IntegrityError: if not client_message_id: raise @@ -153,4 +209,4 @@ def _create_dashboard_chat_message( if message is None: raise DashboardChatSession.objects.filter(id=locked_session.id).update(updated_at=timezone.now()) - return message + return DashboardChatMessageCreateResult(message=message, created=created) diff --git a/ddpui/core/dashboard_chat/vector_documents.py b/ddpui/core/dashboard_chat/vector_documents.py index 2f6c2dd27..8c25be46f 100644 --- a/ddpui/core/dashboard_chat/vector_documents.py +++ b/ddpui/core/dashboard_chat/vector_documents.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from datetime import datetime +from datetime import timezone from enum import Enum from hashlib import sha256 from typing import Any @@ -17,11 +18,40 @@ class DashboardChatSourceType(str, Enum): DBT_CATALOG = "dbt_catalog" -def build_dashboard_chat_collection_name(org_id: int, prefix: str = "org_") -> str: - """Build the per-org Chroma collection name.""" +def build_dashboard_chat_collection_base_name(org_id: int, prefix: str = "org_") -> str: + """Build the unversioned base name used for one org's dashboard chat collections.""" return f"{prefix}{org_id}" +def build_dashboard_chat_collection_version(versioned_at: datetime | None) -> str | None: + """Build a stable UTC version suffix from a timestamp.""" + if versioned_at is None: + return None + normalized = versioned_at + if normalized.tzinfo is None: + normalized = normalized.replace(tzinfo=timezone.utc) + normalized = normalized.astimezone(timezone.utc) + return normalized.strftime("%Y%m%dT%H%M%S%fZ") + + +def build_dashboard_chat_collection_name( + org_id: int, + prefix: str = "org_", + version: datetime | str | None = None, +) -> str: + """Build the Chroma collection name for an org, optionally versioned.""" + base_name = build_dashboard_chat_collection_base_name(org_id, prefix) + if version is None: + return base_name + if isinstance(version, datetime): + version_suffix = build_dashboard_chat_collection_version(version) + else: + version_suffix = str(version).strip() or None + if not version_suffix: + return base_name + return f"{base_name}__{version_suffix}" + + def compute_dashboard_chat_document_hash(content: str) -> str: """Compute a stable content hash used in vector metadata and IDs.""" return sha256(content.encode("utf-8")).hexdigest() diff --git a/ddpui/core/dashboard_chat/vector_store.py b/ddpui/core/dashboard_chat/vector_store.py index 1c33e539c..928fb66b6 100644 --- a/ddpui/core/dashboard_chat/vector_store.py +++ b/ddpui/core/dashboard_chat/vector_store.py @@ -8,6 +8,7 @@ from ddpui.core.dashboard_chat.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, + build_dashboard_chat_collection_base_name, build_dashboard_chat_collection_name, ) @@ -21,6 +22,9 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]: def embed_query(self, text: str) -> list[float]: """Embed a single query.""" + def reset_usage(self) -> None: + """Reset per-turn embedding usage before a new runtime invocation.""" + class OpenAIEmbeddingProvider: """OpenAI embeddings adapter for dashboard chat retrieval.""" @@ -33,25 +37,61 @@ def __init__( ): self.api_key = api_key or os.getenv("OPENAI_API_KEY") self.model = model + self.usage_events: list[dict[str, Any]] = [] if client is None: if not self.api_key: raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") from openai import OpenAI - client = OpenAI(api_key=self.api_key) + client = OpenAI(api_key=self.api_key, max_retries=2) self.client = client + def reset_usage(self) -> None: + """Reset aggregated embedding usage before one new chat turn.""" + self.usage_events = [] + def embed_documents(self, texts: list[str]) -> list[list[float]]: """Embed a batch of documents using OpenAI.""" if not texts: return [] response = self.client.embeddings.create(model=self.model, input=texts) + self._record_usage("embed_documents", response, len(texts)) return [item.embedding for item in response.data] def embed_query(self, text: str) -> list[float]: """Embed a single query using the document embedding path.""" return self.embed_documents([text])[0] + def usage_summary(self) -> dict[str, Any]: + """Return aggregated embedding usage for the current turn.""" + totals = { + "prompt_tokens": 0, + "total_tokens": 0, + } + for event in self.usage_events: + totals["prompt_tokens"] += event.get("prompt_tokens", 0) + totals["total_tokens"] += event.get("total_tokens", 0) + return { + "model": self.model, + "calls": list(self.usage_events), + "totals": totals, + } + + def _record_usage(self, operation: str, response: Any, input_count: int) -> None: + """Capture embedding usage from one OpenAI embeddings response.""" + usage = getattr(response, "usage", None) + if usage is None: + return + self.usage_events.append( + { + "operation": operation, + "model": self.model, + "input_count": input_count, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + ) + @dataclass(frozen=True) class DashboardChatVectorQueryResult: @@ -97,42 +137,107 @@ def _build_client(self) -> Any: ssl=self.config.chroma_ssl, ) - def collection_name(self, org_id: int) -> str: + def collection_name( + self, + org_id: int, + *, + version: Any = None, + ) -> str: """Return the Chroma collection name for an org.""" - return build_dashboard_chat_collection_name(org_id, self.config.collection_prefix) + return build_dashboard_chat_collection_name( + org_id, + self.config.collection_prefix, + version=version, + ) - def create_collection(self, org_id: int) -> Any: + def create_collection( + self, + org_id: int, + *, + collection_name: str | None = None, + ) -> Any: """Create or load the Chroma collection for an org.""" + resolved_collection_name = collection_name or self.collection_name(org_id) return self.client.get_or_create_collection( - name=self.collection_name(org_id), + name=resolved_collection_name, metadata={"org_id": str(org_id)}, ) - def load_collection(self, org_id: int) -> Any | None: + def load_collection( + self, + org_id: int, + *, + collection_name: str | None = None, + allow_legacy_fallback: bool = True, + ) -> Any | None: """Load an existing Chroma collection for an org.""" from chromadb.errors import InvalidCollectionException try: - return self.client.get_collection(name=self.collection_name(org_id)) + resolved_collection_name = collection_name or self.collection_name(org_id) + return self.client.get_collection(name=resolved_collection_name) except (InvalidCollectionException, ValueError): - return None + if collection_name is None or not allow_legacy_fallback: + return None + try: + return self.client.get_collection( + name=build_dashboard_chat_collection_base_name( + org_id, + self.config.collection_prefix, + ) + ) + except (InvalidCollectionException, ValueError): + return None - def delete_collection(self, org_id: int) -> bool: + def delete_collection( + self, + org_id: int, + *, + collection_name: str | None = None, + ) -> bool: """Delete the Chroma collection for an org if it exists.""" - if self.load_collection(org_id) is None: + resolved_collection_name = collection_name or self.collection_name(org_id) + if self.load_collection( + org_id, + collection_name=resolved_collection_name, + allow_legacy_fallback=False, + ) is None: return False - self.client.delete_collection(name=self.collection_name(org_id)) + self.client.delete_collection(name=resolved_collection_name) return True + def list_collection_names(self) -> list[str]: + """Return all Chroma collection names for the current client.""" + raw_collections = self.client.list_collections() + collection_names: list[str] = [] + for collection in raw_collections: + if isinstance(collection, str): + collection_names.append(collection) + continue + name = getattr(collection, "name", None) + if name: + collection_names.append(str(name)) + return collection_names + + def list_org_collection_names(self, org_id: int) -> list[str]: + """Return all collection names that belong to one org.""" + base_name = build_dashboard_chat_collection_base_name(org_id, self.config.collection_prefix) + return [ + collection_name + for collection_name in self.list_collection_names() + if collection_name == base_name or collection_name.startswith(f"{base_name}__") + ] + def get_documents( self, org_id: int, source_types: list[DashboardChatSourceType | str] | None = None, dashboard_id: int | None = None, include_documents: bool = False, + collection_name: str | None = None, ) -> list[DashboardChatStoredDocument]: """Load stored documents for an org using metadata filters.""" - collection = self.load_collection(org_id) + collection = self.load_collection(org_id, collection_name=collection_name) if collection is None: return [] @@ -152,9 +257,14 @@ def delete_documents( ids: list[str] | None = None, source_types: list[DashboardChatSourceType | str] | None = None, dashboard_id: int | None = None, + collection_name: str | None = None, ) -> int: """Delete matching documents from an org collection.""" - collection = self.load_collection(org_id) + collection = self.load_collection( + org_id, + collection_name=collection_name, + allow_legacy_fallback=False, + ) if collection is None: return 0 @@ -171,6 +281,7 @@ def delete_documents( source_types=source_types, dashboard_id=dashboard_id, include_documents=False, + collection_name=collection_name, ) ) ) @@ -181,12 +292,13 @@ def upsert_documents( self, org_id: int, documents: list[DashboardChatVectorDocument], + collection_name: str | None = None, ) -> list[str]: """Upsert documents into the org-specific Chroma collection.""" if not documents: return [] - collection = self.create_collection(org_id) + collection = self.create_collection(org_id, collection_name=collection_name) contents = [document.content for document in documents] document_ids = [document.document_id for document in documents] metadatas = [document.metadata() for document in documents] @@ -200,6 +312,21 @@ def upsert_documents( ) return document_ids + def embed_query(self, query_text: str) -> list[float]: + """Build one query embedding that can be reused across filtered retrieval calls.""" + return self.embedding_provider.embed_query(query_text) + + def reset_usage(self) -> None: + """Reset embedding usage counters before one new runtime invocation.""" + if hasattr(self.embedding_provider, "reset_usage"): + self.embedding_provider.reset_usage() + + def usage_summary(self) -> dict[str, Any]: + """Return embedding usage from the configured provider when supported.""" + if hasattr(self.embedding_provider, "usage_summary"): + return self.embedding_provider.usage_summary() + return {} + def query( self, org_id: int, @@ -207,15 +334,17 @@ def query( n_results: int = 5, source_types: list[DashboardChatSourceType | str] | None = None, dashboard_id: int | None = None, + query_embedding: list[float] | None = None, + collection_name: str | None = None, ) -> list[DashboardChatVectorQueryResult]: """Query the org-specific Chroma collection.""" - collection = self.load_collection(org_id) + collection = self.load_collection(org_id, collection_name=collection_name) if collection is None: return [] where = self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id) result = collection.query( - query_embeddings=[self.embedding_provider.embed_query(query_text)], + query_embeddings=[query_embedding or self.embed_query(query_text)], n_results=n_results, where=where, include=["documents", "metadatas", "distances"], diff --git a/ddpui/core/dashboard_chat/warehouse_tools.py b/ddpui/core/dashboard_chat/warehouse_tools.py index 9748768bd..c708693d6 100644 --- a/ddpui/core/dashboard_chat/warehouse_tools.py +++ b/ddpui/core/dashboard_chat/warehouse_tools.py @@ -1,6 +1,7 @@ """Warehouse access helpers used by dashboard chat runtime.""" import json +import logging from typing import Any from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet @@ -8,6 +9,8 @@ from ddpui.utils import secretsmanager from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory +logger = logging.getLogger(__name__) + class DashboardChatWarehouseToolsError(Exception): """Raised when a warehouse-backed dashboard chat action cannot complete.""" @@ -42,7 +45,16 @@ def get_schema_snippets(self, tables: list[str]) -> dict[str, DashboardChatSchem if parsed_table is None: continue schema_name, bare_table_name = parsed_table - columns = self.warehouse_client.get_table_columns(schema_name, bare_table_name) + try: + columns = self.warehouse_client.get_table_columns(schema_name, bare_table_name) + except Exception as error: + logger.warning( + "dashboard chat schema lookup failed for %s.%s: %s", + schema_name, + bare_table_name, + error, + ) + continue if not columns: continue snippets[table_name.lower()] = DashboardChatSchemaSnippet( diff --git a/ddpui/migrations/0154_dashboardchatprompttemplate.py b/ddpui/migrations/0154_dashboardchatprompttemplate.py new file mode 100644 index 000000000..65f996bd2 --- /dev/null +++ b/ddpui/migrations/0154_dashboardchatprompttemplate.py @@ -0,0 +1,240 @@ +# Generated by Django 4.2 on 2026-03-21 10:44 + +from django.db import migrations, models +import django.utils.timezone + + +def seed_dashboard_chat_prompt_templates(apps, schema_editor): + DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") + + defaults = { + "intent_classification": """# Enhanced Intent Classification System Prompt + +You are an intent classification agent for a "Chat with Dashboards" system. Your job is to classify user queries about the CURRENT dashboard, its charts, its datasets, the dbt models that power it, and the organization/dashboard context attached to it. Questions about other dashboards, similar dashboards, or dashboards beyond the current one are **irrelevant**. + +## Intent Categories + +1. **query_with_sql** - Needs data analysis (numbers, trends, rankings, breakdowns, comparisons) +2. **query_without_sql** - Can be answered from metadata (definitions, calculation logic, chart explanations) +3. **follow_up_sql** - Follow-up query that modifies previous SQL query (add dimension, filter, timeframe) +4. **follow_up_context** - Follow-up requesting more explanation about previous results +5. **needs_clarification** - Question is too vague or ambiguous +6. **small_talk** - Greetings, jokes, non-business conversation +7. **irrelevant** - Questions outside the current dashboard's scope, including requests about other dashboards + +## Classification Guidelines + +**query_with_sql** examples: +- "How many students are in the EcoChamps program?" +- "Show me session completion trends over time" +- "Top 10 schools by assessment performance" +- "Compare reading comprehension by city" +- "What's the monthly breakdown of planned vs conducted sessions?" + +**query_without_sql** examples: +- "What does 'planned_session' mean?" +- "How is reading comprehension calculated?" +- "Which dataset powers the student count chart?" +- "What metrics are available in this dashboard?" +- "Explain what this chart shows" +- "What is the mission and vision of Bhumi?" +- "Summarize the Bhumi programs described in the context file" + +**follow_up_sql** examples (requires previous SQL context): +- "Now split by chapter" (add dimension) +- "Filter to CGI donors only" (add filter) +- "Same but for last quarter" (modify timeframe) +- "Show weekly instead" (change aggregation) + +**follow_up_context** examples (requires previous context): +- "Explain that metric" +- "How is that calculated?" +- "What does that mean?" +- "Tell me more about that" + +**needs_clarification** examples: +- "Is performance improving?" (missing: which metric, time period) +- "Show me the data" (missing: which data, program) +- "What's the biggest issue?" (missing: context, metric) + +## Follow-up Detection + +When conversation history is available, classify as follow-up **only if the new query depends on the previous turn**. Use all three tests: +1. Explicit reference to prior output ("that", "same", "those results", "the previous query"). +2. Modification language applied to prior query ("now split by", "filter that", "same but", "add chapter", "remove donor"). +3. Explanations about prior output ("explain that", "what does that mean"). + +If the question can stand alone and be answered without previous context, treat it as a new `query_with_sql` or `query_without_sql`, **not** follow_up_sql/follow_up_context. + +If so, classify as follow_up_sql or follow_up_context based on whether SQL modification is needed. + +## Current-Dashboard Boundary + +- Treat requests about "other dashboards", "related dashboards", "similar dashboards", or "which dashboard should I look at" as **irrelevant**. +- Treat requests that compare this dashboard to some other dashboard as **irrelevant** unless the question can be answered entirely from the current dashboard's own data and context. +- The assistant is scoped to one dashboard only. + +## Output Format + +Respond with valid JSON only: + +For new queries: +```json +{ + "intent": "query_with_sql", + "confidence": 0.9, + "reason": "User is asking for specific numbers requiring data analysis", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": false, + "follow_up_type": null, + "reusable_elements": {}, + "modification_instruction": null + } +} +``` + +For follow-up queries: +```json +{ + "intent": "follow_up_sql", + "confidence": 0.95, + "reason": "User wants to modify previous query by adding dimension", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": true, + "follow_up_type": "add_dimension", + "reusable_elements": { + "previous_sql": "from conversation context", + "previous_tables": ["staging.eco_student25_26_stg"], + "add_instruction": "group by chapter" + }, + "modification_instruction": "split by chapter" + } +} +``` + +## Tool Usage Rules + +Set `force_tool_usage: true` for: +- All query_with_sql intents +- All follow_up_sql intents +- query_without_sql when specific chart/dataset lookup needed + +Set `force_tool_usage: false` for: +- small_talk, needs_clarification, irrelevant +- query_without_sql for general explanation questions + +## Context Awareness + +Use conversation history to: +- Detect follow-up patterns +- Understand context references ("that metric", "same query") +- Determine if SQL modification or explanation is needed +- Extract reusable elements (tables, metrics, filters) from previous queries + +Classify the following user query:""", + "new_query_system": """You are a data analysis assistant with access to tools. Your job is to help users understand program data and answer their questions accurately. + +IMPORTANT RULES: +1. For data questions: ALWAYS start by searching for relevant charts using retrieve_docs +2. Use chart metadata to identify which datasets/tables to query - charts are your roadmap to data +3. For definition questions: You may use tools to get context or answer from human context +4. Never guess table names, column names, or data values +5. Always call get_distinct_values before using WHERE clauses on text columns +6. Only write SELECT queries, never INSERT/UPDATE/DELETE +7. CRITICAL: When list_tables_by_keyword returns tables, you MUST use the EXACT table names returned - never modify schema or table names +8. NEVER assume tables exist in specific schemas - always discover them using list_tables_by_keyword first +9. When counting entities (students, people, sites, states, programs, cases, etc.), avoid COUNT(*). Prefer COUNT(DISTINCT ) using the most specific ID/name field available (e.g., student_id, roll_no, state_name). If unsure which field uniquely identifies the entity, inspect schema first, and fetch distinct values for candidate ID columns before writing SQL. +9. When you propose SQL, immediately call run_sql_query to execute it. Do not ask for confirmation. +10. Call get_distinct_values only for columns you plan to filter in the current query. +11. Limit get_schema_snippets to the tables you intend to query (avoid extra tables). +12. If a requested geographic/location field is missing, choose the most specific available location dimension (e.g., city → chapter → school) and answer using that, explicitly noting the substitution in the response. +13. When someone asks for "changes" in metrics, look for increases and decreases by comparing values across time periods (baseline vs midline vs endline) or comparing current vs previous periods. +14. Only use the EXACT schema-qualified table names returned by the tools. Do not rewrite schemas or table names. +15. IMPORTANT: Only tables relevant to the current dashboard are accessible. If a table is not found, it may not be relevant to this dashboard. Use charts from the current dashboard to guide your analysis. +16. Do not suggest other dashboards. If the question asks about dashboards beyond the current one, stay within the current dashboard context and answer only with data available here. + +Available tools: +- retrieve_docs: Find relevant charts, datasets, context, or dbt models +- search_dbt_models: Search for dbt models by keyword +- get_dbt_model_info: Get detailed info about a specific dbt model +- get_schema_snippets: Get column names and types for tables +- get_distinct_values: Get actual values in a column (required before WHERE clauses) +- check_table_row_count: Check if a table has data before querying +- run_sql_query: Execute a read-only SQL query + +Tool usage flow for data questions: +1. FIRST: Call retrieve_docs to find relevant CHARTS that match the question +2. If charts found: Use the dataset/table names from chart metadata to guide your queries +3. If no relevant chart datasets found: ALWAYS call list_tables_by_keyword with the main entity (e.g. "students", "fellowship", "baseline") +4. Call get_schema_snippets ONLY for the exact table names returned by list_tables_by_keyword +5. Use the EXACT table names from step 3/4 in your SQL queries - do not change schema or table names +6. If filtering: Call get_distinct_values for filter columns +7. ALWAYS call run_sql_query with validated SQL - NEVER give up without trying""", + "follow_up_system": """You are handling a follow-up query that modifies a previous question. + +FOLLOW-UP RULES: +1. Reuse context from the previous query when possible (tables, metrics, base SQL) +2. For SQL modifications: modify the previous SQL rather than starting from scratch +3. For new filters: ALWAYS call get_distinct_values first +4. For new dimensions: ensure the column exists in the schema +5. When you generate SQL, execute it by calling run_sql_query immediately; do not ask for confirmation. +6. Only fetch distinct values for columns you will filter, and limit schema lookups to tables you plan to query. +7. Stay within the current dashboard only. Do not suggest or switch to other dashboards.""", + "small_talk_capabilities": ( + "You are a helpful assistant for questions about the current dashboard. " + "Briefly explain what you can do: retrieve dashboard/chart/dbt context, " + "run safe read-only SQL for counts/trends/breakdowns, and clarify metrics from this dashboard. " + "Keep answers concise, friendly, and non-technical when possible." + ), + } + + for key, prompt in defaults.items(): + DashboardChatPromptTemplate.objects.update_or_create( + key=key, + defaults={"prompt": prompt}, + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0153_alter_dashboardchatmessage_created_at_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="DashboardChatPromptTemplate", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "key", + models.CharField( + choices=[ + ("intent_classification", "Intent Classification"), + ("new_query_system", "New Query System"), + ("follow_up_system", "Follow-up System"), + ("small_talk_capabilities", "Small Talk Capabilities"), + ], + max_length=64, + unique=True, + ), + ), + ("prompt", models.TextField()), + ("created_at", models.DateTimeField(default=django.utils.timezone.now)), + ("updated_at", models.DateTimeField(auto_now=True)), + ], + options={ + "ordering": ["key"], + }, + ), + migrations.RunPython( + seed_dashboard_chat_prompt_templates, + migrations.RunPython.noop, + ), + ] diff --git a/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py b/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py new file mode 100644 index 000000000..6095fed39 --- /dev/null +++ b/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2 on 2026-03-23 08:42 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0154_dashboardchatprompttemplate"), + ] + + operations = [ + migrations.AddField( + model_name="dashboardchatsession", + name="vector_collection_name", + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index 2aee1c5bd..7abed4c81 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -1,12 +1,14 @@ import uuid from enum import Enum +from django.core.cache import cache from django.db import models from django.utils import timezone from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org from ddpui.models.org_user import OrgUser +from ddpui.core.dashboard_chat.prompt_cache import build_dashboard_chat_prompt_cache_key class DashboardChatMessageRole(str, Enum): @@ -20,6 +22,54 @@ def choices(cls): return [(key.value, key.name) for key in cls] +class DashboardChatPromptTemplateKey(models.TextChoices): + """Runtime-editable prompt templates used by the dashboard chat LLM client.""" + + INTENT_CLASSIFICATION = ( + "intent_classification", + "Intent Classification", + ) + NEW_QUERY_SYSTEM = ( + "new_query_system", + "New Query System", + ) + FOLLOW_UP_SYSTEM = ( + "follow_up_system", + "Follow-up System", + ) + SMALL_TALK_CAPABILITIES = ( + "small_talk_capabilities", + "Small Talk Capabilities", + ) + + +class DashboardChatPromptTemplate(models.Model): + """Database-backed prompt template for dashboard chat LLM calls.""" + + key = models.CharField( + max_length=64, + unique=True, + choices=DashboardChatPromptTemplateKey.choices, + ) + prompt = models.TextField() + created_at = models.DateTimeField(default=timezone.now) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ["key"] + + def save(self, *args, **kwargs): + """Persist the prompt template and invalidate its runtime cache entry.""" + super().save(*args, **kwargs) + cache.delete(build_dashboard_chat_prompt_cache_key(self.key)) + + def delete(self, *args, **kwargs): + """Delete the prompt template and invalidate its runtime cache entry.""" + cache_key = build_dashboard_chat_prompt_cache_key(self.key) + super().delete(*args, **kwargs) + cache.delete(cache_key) + + class OrgAIContext(models.Model): """Organization-level markdown context used by dashboard chat.""" @@ -61,6 +111,7 @@ class DashboardChatSession(models.Model): org = models.ForeignKey(Org, on_delete=models.CASCADE) orguser = models.ForeignKey(OrgUser, null=True, on_delete=models.SET_NULL) dashboard = models.ForeignKey(Dashboard, on_delete=models.SET_NULL, null=True) + vector_collection_name = models.CharField(max_length=255, null=True, blank=True) created_at = models.DateTimeField(default=timezone.now) updated_at = models.DateTimeField(auto_now=True) diff --git a/ddpui/tests/core/dashboard_chat/test_ingestion.py b/ddpui/tests/core/dashboard_chat/test_ingestion.py index a1ae5e44c..32c0efb09 100644 --- a/ddpui/tests/core/dashboard_chat/test_ingestion.py +++ b/ddpui/tests/core/dashboard_chat/test_ingestion.py @@ -1,17 +1,11 @@ -import os from dataclasses import dataclass from datetime import datetime from pathlib import Path from unittest.mock import Mock, patch import json -import django import pytest -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") -os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" -django.setup() - from django.contrib.auth.models import User from django.utils import timezone @@ -22,12 +16,13 @@ ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService +from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name from ddpui.core.dashboard_chat.vector_store import DashboardChatStoredDocument from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpprefect import DBTCLIPROFILE from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext -from ddpui.models.org import Org, OrgDbt, OrgPrefectBlockv1 +from ddpui.models.org import Org, OrgDbt, OrgPrefectBlockv1, TransformType from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role from ddpui.models.visualization import Chart @@ -40,18 +35,47 @@ class FakeDashboardChatVectorStore: """In-memory vector store used to exercise ingest diffing logic.""" def __init__(self): - self.documents_by_org = {} + self.documents_by_collection = {} self.upsert_calls = [] self.delete_calls = [] + def collection_name(self, org_id, *, version=None): + return build_dashboard_chat_collection_name(org_id, version=version) + + def load_collection(self, org_id, *, collection_name=None, allow_legacy_fallback=True): + resolved_collection_name = collection_name or self.collection_name(org_id) + if resolved_collection_name in self.documents_by_collection: + return {"name": resolved_collection_name} + if collection_name and allow_legacy_fallback: + legacy_collection_name = self.collection_name(org_id) + if legacy_collection_name in self.documents_by_collection: + return {"name": legacy_collection_name} + return None + + def delete_collection(self, org_id, *, collection_name=None): + resolved_collection_name = collection_name or self.collection_name(org_id) + existed = resolved_collection_name in self.documents_by_collection + self.documents_by_collection.pop(resolved_collection_name, None) + return existed + + def list_org_collection_names(self, org_id): + base_name = self.collection_name(org_id) + return [ + collection_name + for collection_name in self.documents_by_collection + if collection_name == base_name or collection_name.startswith(f"{base_name}__") + ] + def get_documents( self, org_id, source_types=None, dashboard_id=None, include_documents=False, + collection_name=None, ): - rows = list(self.documents_by_org.get(org_id, {}).values()) + resolved_collection_name = collection_name or self.collection_name(org_id) + rows = list(self.documents_by_collection.get(resolved_collection_name, {}).values()) if source_types: allowed = { source_type.value if hasattr(source_type, "value") else source_type @@ -69,9 +93,10 @@ def get_documents( for row in rows ] - def upsert_documents(self, org_id, documents): + def upsert_documents(self, org_id, documents, collection_name=None): self.upsert_calls.append([document.document_id for document in documents]) - org_documents = self.documents_by_org.setdefault(org_id, {}) + resolved_collection_name = collection_name or self.collection_name(org_id) + org_documents = self.documents_by_collection.setdefault(resolved_collection_name, {}) for document in documents: org_documents[document.document_id] = DashboardChatStoredDocument( document_id=document.document_id, @@ -80,16 +105,25 @@ def upsert_documents(self, org_id, documents): ) return [document.document_id for document in documents] - def delete_documents(self, org_id, ids=None, source_types=None, dashboard_id=None): + def delete_documents( + self, + org_id, + ids=None, + source_types=None, + dashboard_id=None, + collection_name=None, + ): self.delete_calls.append( { "org_id": org_id, "ids": list(ids) if ids is not None else None, "source_types": source_types, "dashboard_id": dashboard_id, + "collection_name": collection_name, } ) - org_documents = self.documents_by_org.setdefault(org_id, {}) + resolved_collection_name = collection_name or self.collection_name(org_id) + org_documents = self.documents_by_collection.setdefault(resolved_collection_name, {}) if ids is None: return 0 for document_id in ids: @@ -250,6 +284,66 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb assert orgdbt.docs_generated_at is not None +def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_generating( + org, + orgdbt, + tmp_path, +): + """Git-backed dbt projects should refresh the local checkout before docs generation.""" + project_dir = tmp_path / "dashchat" / "dbtrepo" + target_dir = project_dir / "target" + target_dir.mkdir(parents=True) + (target_dir / "manifest.json").write_text( + json.dumps({"metadata": {"project_name": "dashchat"}, "nodes": {}, "sources": {}}), + encoding="utf-8", + ) + (target_dir / "catalog.json").write_text( + json.dumps({"nodes": {}, "sources": {}}), + encoding="utf-8", + ) + orgdbt.transform_type = TransformType.GIT + orgdbt.gitrepo_access_token_secret = "pat-secret" + orgdbt.save(update_fields=["transform_type", "gitrepo_access_token_secret"]) + + mock_git_manager = Mock() + + with patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.gather_dbt_project_params", + return_value=DbtProjectParams( + dbt_binary="/mock/dbt", + dbt_env_dir="/mock/env", + venv_binary="/mock/bin", + target="analytics", + project_dir=str(project_dir), + org_project_dir=str(project_dir.parent), + ), + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.prefect_service.get_dbt_cli_profile_block", + return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.run_dbt_command", + return_value=Mock(stdout="ok", returncode=0), + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.get_dbt_project_dir", + return_value=str(project_dir), + ), patch( + "ddpui.core.dashboard_chat.dbt_docs.secretsmanager.retrieve_github_pat", + return_value="actual-pat", + ) as mock_retrieve_pat, patch( + "ddpui.core.dashboard_chat.dbt_docs.GitManager", + return_value=mock_git_manager, + ) as mock_git_manager_class: + generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) + + mock_retrieve_pat.assert_called_once_with("pat-secret") + mock_git_manager_class.assert_called_once_with( + repo_local_path=str(project_dir), + pat="actual-pat", + validate_git=True, + ) + mock_git_manager.pull_changes.assert_called_once_with() + + def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, dashboard): """A repeated identical build should skip writes, and a removed source should be deleted.""" OrgAIContext.objects.create( @@ -330,17 +424,25 @@ def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, d dashboard_context.save(update_fields=["markdown", "updated_at"]) third_result = service.ingest_org(org) + active_collection_name = build_dashboard_chat_collection_name( + org.id, + version=org.dbt.vector_last_ingested_at, + ) stored_source_types = { document.metadata["source_type"] - for document in vector_store.get_documents(org.id, include_documents=False) + for document in vector_store.get_documents( + org.id, + include_documents=False, + collection_name=active_collection_name, + ) } assert first_result.source_document_counts["dashboard_context"] == 1 - assert second_result.upserted_document_ids == [] + assert second_result.upserted_document_ids assert second_result.deleted_document_ids == [] - assert len(vector_store.upsert_calls) == upsert_count_after_first_ingest + assert len(vector_store.upsert_calls) == upsert_count_after_first_ingest + 2 assert third_result.source_document_counts["dashboard_context"] == 0 - assert third_result.deleted_document_ids + assert third_result.deleted_document_ids == [] assert "dashboard_context" not in stored_source_types @@ -393,9 +495,12 @@ def test_ingest_org_keeps_collections_isolated_per_org(org, orgdbt, orguser, das service.ingest_org(org) service.ingest_org(other_org) - assert set(vector_store.documents_by_org.keys()) == {org.id, other_org.id} - assert vector_store.documents_by_org[org.id] - assert vector_store.documents_by_org[other_org.id] + org_collection_names = vector_store.list_org_collection_names(org.id) + other_collection_names = vector_store.list_org_collection_names(other_org.id) + assert len(org_collection_names) == 1 + assert len(other_collection_names) == 1 + assert vector_store.documents_by_collection[org_collection_names[0]] + assert vector_store.documents_by_collection[other_collection_names[0]] other_orguser.delete() other_user.delete() @@ -430,7 +535,7 @@ def test_ingest_org_keeps_last_good_context_when_upsert_fails(org, orgdbt, orgus org.ai_context.updated_at = timezone.now() org.ai_context.save(update_fields=["markdown", "updated_at"]) - def _raise_on_upsert(org_id, documents): + def _raise_on_upsert(org_id, documents, collection_name=None): raise RuntimeError("upsert failed") vector_store.upsert_documents = _raise_on_upsert @@ -440,7 +545,8 @@ def _raise_on_upsert(org_id, documents): remaining_ids = { document.document_id - for document in vector_store.get_documents(org.id, include_documents=False) + for documents in vector_store.documents_by_collection.values() + for document in documents.values() } assert remaining_ids == original_ids assert vector_store.delete_calls == [] diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py new file mode 100644 index 000000000..755f83ffe --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -0,0 +1,189 @@ +"""Tests for dashboard chat OpenAI client helpers.""" + +import json + +import ddpui.core.dashboard_chat.llm_client as llm_client_module +from ddpui.core.dashboard_chat.llm_client import OpenAIDashboardChatLlmClient +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationContext, + DashboardChatIntent, + DashboardChatIntentDecision, +) + + +class FakePromptStore: + """Minimal prompt store stub for unit tests.""" + + def get(self, key): + return f"prompt:{key}" + + +class FakeCompletions: + """Capture outgoing chat completion payloads.""" + + def __init__(self): + self.calls = [] + self.response_content = "Composed answer" + + def create(self, **kwargs): + self.calls.append(kwargs) + return FakeResponse(self.response_content) + + +class FakeChat: + """Expose a chat.completions surface matching the OpenAI client.""" + + def __init__(self): + self.completions = FakeCompletions() + + +class FakeClient: + """Minimal fake OpenAI client.""" + + def __init__(self): + self.chat = FakeChat() + + +class FakeUsage: + """Minimal usage payload.""" + + prompt_tokens = 10 + completion_tokens = 5 + total_tokens = 15 + + +class FakeMessage: + """Minimal assistant message wrapper.""" + + def __init__(self, content): + self.content = content + + +class FakeChoice: + """Minimal choice wrapper.""" + + def __init__(self, content): + self.message = FakeMessage(content) + + +class FakeResponse: + """Minimal OpenAI response wrapper.""" + + def __init__(self, content): + self.choices = [FakeChoice(content)] + self.usage = FakeUsage() + + +class RaisingCompletions: + """Fake completions client that always raises.""" + + def create(self, **kwargs): + raise RuntimeError("boom") + + +class RaisingClient: + """Minimal fake client whose completions always fail.""" + + def __init__(self): + self.chat = type("Chat", (), {"completions": RaisingCompletions()})() + + +def test_classify_intent_uses_prototype_router_message_shape(): + """Intent classification should use the prototype router prompt contract.""" + fake_client = FakeClient() + fake_client.chat.completions.response_content = json.dumps( + { + "intent": "follow_up_sql", + "confidence": 0.95, + "reason": "Follow-up detected", + "force_tool_usage": True, + "missing_info": [], + "follow_up_context": { + "is_follow_up": True, + "follow_up_type": "add_dimension", + "reusable_elements": {"previous_sql": "SELECT COUNT(*) FROM analytics.program_reach"}, + "modification_instruction": "split by donor_type", + }, + } + ) + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=fake_client, + prompt_store=FakePromptStore(), + ) + + decision = llm_client.classify_intent( + user_query="Now split that by donor type", + conversation_context=DashboardChatConversationContext( + last_sql_query="SELECT COUNT(*) FROM analytics.program_reach", + last_tables_used=["analytics.program_reach"], + last_chart_ids=["2"], + last_response_type="sql_result", + ), + ) + + assert decision.intent == DashboardChatIntent.FOLLOW_UP_SQL + messages = fake_client.chat.completions.calls[0]["messages"] + assert messages[0]["role"] == "system" + assert "CONVERSATION CONTEXT" in messages[0]["content"] + assert "Previous SQL: SELECT COUNT(*) FROM analytics.program_reach" in messages[0]["content"] + assert messages[1] == { + "role": "user", + "content": "Classify this query: Now split that by donor type", + } + + +def test_compose_small_talk_uses_capabilities_prompt(): + """Small talk should use the DB-backed prototype capabilities prompt.""" + fake_client = FakeClient() + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=fake_client, + prompt_store=FakePromptStore(), + ) + + answer = llm_client.compose_small_talk("hi") + + assert answer == "Composed answer" + assert fake_client.chat.completions.calls[0]["messages"] == [ + {"role": "system", "content": "prompt:small_talk_capabilities"}, + {"role": "user", "content": "hi"}, + ] + + +def test_classify_intent_falls_back_to_needs_clarification_on_openai_failure(monkeypatch): + """Router failures should degrade safely instead of crashing the whole turn.""" + monkeypatch.setattr(llm_client_module, "sleep", lambda *_args: None) + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=RaisingClient(), + prompt_store=FakePromptStore(), + ) + + decision = llm_client.classify_intent( + user_query="Why did funding drop?", + conversation_context=DashboardChatConversationContext(), + ) + + assert decision.intent == DashboardChatIntent.NEEDS_CLARIFICATION + assert ( + decision.clarification_question + == OpenAIDashboardChatLlmClient.TECHNICAL_DIFFICULTIES_MESSAGE + ) + + +def test_reset_usage_clears_previous_usage_events(): + """Usage logging must be scoped to one dashboard-chat turn.""" + fake_client = FakeClient() + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=fake_client, + prompt_store=FakePromptStore(), + ) + + llm_client.compose_small_talk("hi") + assert llm_client.usage_summary()["totals"]["total_tokens"] == 15 + + llm_client.reset_usage() + + assert llm_client.usage_summary()["totals"]["total_tokens"] == 0 diff --git a/ddpui/tests/core/dashboard_chat/test_prompt_store.py b/ddpui/tests/core/dashboard_chat/test_prompt_store.py new file mode 100644 index 000000000..6d99b8182 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_prompt_store.py @@ -0,0 +1,69 @@ +"""Tests for dashboard chat prompt template storage and caching.""" + +import pytest +from django.core.cache import cache + +from ddpui.core.dashboard_chat.prompt_store import ( + DEFAULT_DASHBOARD_CHAT_PROMPTS, + DashboardChatPromptStore, +) +from ddpui.models.dashboard_chat import ( + DashboardChatPromptTemplate, + DashboardChatPromptTemplateKey, +) + +pytestmark = pytest.mark.django_db + + +@pytest.fixture(autouse=True) +def clear_cache(): + cache.clear() + yield + cache.clear() + + +def test_prompt_store_returns_default_when_no_db_override_exists(): + """Missing prompt rows should fall back to the built-in default prompt text.""" + store = DashboardChatPromptStore() + + prompt = store.get(DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION) + + assert ( + prompt + == DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION] + ) + + +def test_prompt_store_uses_db_override_and_invalidates_cache_on_save(): + """Saving a prompt template should invalidate the cached prompt immediately.""" + prompt_template = DashboardChatPromptTemplate.objects.get( + key=DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM, + ) + prompt_template.prompt = "first prompt" + prompt_template.save() + store = DashboardChatPromptStore() + + assert store.get(DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM) == "first prompt" + + prompt_template.prompt = "updated prompt" + prompt_template.save() + + assert store.get(DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM) == "updated prompt" + + +def test_prompt_store_falls_back_to_default_after_delete(): + """Deleting a prompt template should invalidate the cache and restore the default prompt.""" + prompt_template = DashboardChatPromptTemplate.objects.get( + key=DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES, + ) + prompt_template.prompt = "custom answer prompt" + prompt_template.save() + store = DashboardChatPromptStore() + + assert store.get(DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES) == "custom answer prompt" + + prompt_template.delete() + + assert store.get(DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES) == ( + DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES] + ) diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 085d9887e..58c683d26 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -1,8 +1,10 @@ -"""Tests for dashboard chat LangGraph runtime, allowlist, and SQL guard.""" +"""Tests for the prototype-faithful dashboard chat runtime.""" + +from decimal import Decimal import pytest from django.contrib.auth.models import User -from django.db import transaction +from django.core.cache import cache from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.core.dashboard_chat.allowlist import ( @@ -12,14 +14,16 @@ from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationContext, DashboardChatConversationMessage, + DashboardChatFollowUpContext, DashboardChatIntent, - DashboardChatPlanMode, - DashboardChatQueryPlan, - DashboardChatSqlDraft, - DashboardChatTextFilterPlan, + DashboardChatIntentDecision, + DashboardChatRetrievedDocument, + DashboardChatResponse, ) from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType from ddpui.core.dashboard_chat.vector_store import DashboardChatVectorQueryResult from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org @@ -37,8 +41,22 @@ class FakeVectorStore: def __init__(self, rows): self.rows = list(rows) self.calls = [] + self.embed_query_calls = [] + + def embed_query(self, query_text): + self.embed_query_calls.append(query_text) + return [0.1, 0.2, 0.3] - def query(self, org_id, query_text, n_results=5, source_types=None, dashboard_id=None): + def query( + self, + org_id, + query_text, + n_results=5, + source_types=None, + dashboard_id=None, + query_embedding=None, + collection_name=None, + ): self.calls.append( { "org_id": org_id, @@ -46,6 +64,8 @@ def query(self, org_id, query_text, n_results=5, source_types=None, dashboard_id "n_results": n_results, "source_types": list(source_types) if source_types else [], "dashboard_id": dashboard_id, + "query_embedding": query_embedding, + "collection_name": collection_name, } ) results = [] @@ -57,32 +77,8 @@ def query(self, org_id, query_text, n_results=5, source_types=None, dashboard_id results.append(row) return results[:n_results] - -class ContextOnlyLlm: - """Minimal LLM stub for context-answer tests.""" - - def plan_query(self, *args, **kwargs): - raise AssertionError("plan_query should not be called for context-only heuristics") - - def classify_intent(self, *args, **kwargs): - raise AssertionError("classify_intent should not be called for context-only heuristics") - - def generate_sql(self, *args, **kwargs): - raise AssertionError("generate_sql should not be called for context-only heuristics") - - def compose_answer( - self, - user_query, - dashboard_summary, - retrieved_documents, - sql, - sql_results, - warnings, - related_dashboard_titles, - ): - assert sql is None - assert sql_results is None - return "The reach metric shows how many beneficiaries were served over time." + def usage_summary(self): + return {} class FakeWarehouseTools: @@ -92,26 +88,151 @@ def __init__(self): self.schema_requests = [] self.distinct_requests = [] self.executed_sql = [] - - def get_schema_snippets(self, tables): - self.schema_requests.append(list(tables)) - return { + self.schemas = { "analytics.program_reach": self._schema_snippet( "analytics.program_reach", [ {"name": "program_name", "data_type": "text", "nullable": False}, {"name": "beneficiaries", "data_type": "integer", "nullable": False}, ], - ) + ), + "analytics.stg_program_reach": self._schema_snippet( + "analytics.stg_program_reach", + [ + {"name": "program_name", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "beneficiaries", "data_type": "integer", "nullable": False}, + ], + ), + "analytics.donor_funding_quarterly": self._schema_snippet( + "analytics.donor_funding_quarterly", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + { + "name": "total_realized_funding_usd", + "data_type": "numeric", + "nullable": False, + }, + {"name": "donor_count", "data_type": "integer", "nullable": False}, + ], + ), + "analytics.stg_donor_funding_clean": self._schema_snippet( + "analytics.stg_donor_funding_clean", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "realized_amount_usd", "data_type": "numeric", "nullable": False}, + {"name": "donation_id", "data_type": "text", "nullable": False}, + {"name": "is_realized", "data_type": "boolean", "nullable": False}, + ], + ), + "analytics.facilitator_effectiveness_quarterly": self._schema_snippet( + "analytics.facilitator_effectiveness_quarterly", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "facilitator_name", "data_type": "text", "nullable": False}, + {"name": "district_name", "data_type": "text", "nullable": False}, + {"name": "program_area", "data_type": "text", "nullable": False}, + { + "name": "cost_per_improved_outcome_usd", + "data_type": "numeric", + "nullable": False, + }, + ], + ), + "analytics.district_funding_efficiency_quarterly": self._schema_snippet( + "analytics.district_funding_efficiency_quarterly", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "district_name", "data_type": "text", "nullable": False}, + {"name": "program_area", "data_type": "text", "nullable": False}, + { + "name": "spend_per_student_usd", + "data_type": "numeric", + "nullable": False, + }, + ], + ), + } + + def get_schema_snippets(self, tables): + self.schema_requests.append(list(tables)) + return { + table_name: self.schemas[table_name] + for table_name in tables + if table_name in self.schemas } def get_distinct_values(self, table_name, column_name, limit=50): self.distinct_requests.append((table_name, column_name, limit)) - return ["Education", "Health"] + if table_name == "analytics.program_reach" and column_name == "program_name": + return ["Education", "Health"] + if table_name == "analytics.stg_program_reach" and column_name == "donor_type": + return ["Grant", "Corporate"] + return [] def execute_sql(self, sql): self.executed_sql.append(sql) - return [{"program_name": "Education", "beneficiary_count": 120}] + if "SELECT email" in sql: + raise AssertionError("PII queries should not reach warehouse execution") + if ( + "analytics.stg_program_reach" in sql + and "GROUP BY donor_type" in sql + and "beneficiaries" in sql + ): + raise Exception( + 'column "analytics.stg_program_reach.beneficiaries" must appear in the GROUP BY clause or be used in an aggregate function' + ) + if "analytics.stg_program_reach" in sql: + return [ + {"donor_type": "Grant", "beneficiary_count": 80}, + {"donor_type": "Corporate", "beneficiary_count": 40}, + ] + if "analytics.stg_donor_funding_clean" in sql and "GROUP BY quarter_label, donor_type" in sql: + return [ + { + "quarter_label": "2025 Q1", + "donor_type": "Grant", + "total_realized_funding_usd": 258000, + "donor_count": 3, + }, + { + "quarter_label": "2025 Q1", + "donor_type": "Corporate", + "total_realized_funding_usd": 35000, + "donor_count": 1, + }, + { + "quarter_label": "2025 Q2", + "donor_type": "Grant", + "total_realized_funding_usd": 46000, + "donor_count": 2, + }, + { + "quarter_label": "2025 Q2", + "donor_type": "Corporate", + "total_realized_funding_usd": 59000, + "donor_count": 2, + }, + ] + if ( + "analytics.facilitator_effectiveness_quarterly f" in sql + and "analytics.district_funding_efficiency_quarterly d" in sql + ): + return [ + { + "facilitator_name": "Farah Ali", + "district_name": "South", + "program_area": "Literacy Boost", + "cost_per_improved_outcome_usd": 740.25, + "spend_per_student_usd": 158.4, + } + ] + if "analytics.program_reach" in sql and "program_name = 'Education'" in sql: + return [{"program_name": "Education", "beneficiary_count": 120}] + if "COUNT(*) AS row_count" in sql: + return [{"row_count": 42}] + return [{"beneficiary_count": 120}] @staticmethod def _schema_snippet(table_name, columns): @@ -120,99 +241,345 @@ def _schema_snippet(table_name, columns): return DashboardChatSchemaSnippet(table_name=table_name, columns=columns) -class SqlPathLlm: - """LLM stub that forces the runtime through planning, distinct lookup, and SQL execution.""" +class PrototypeLlmBase: + """Base LLM stub implementing the runtime contract needed by the tool loop.""" + + def __init__(self): + self.turn = 0 + + def get_prompt(self, prompt_key): + return f"prompt:{prompt_key}" + + def usage_summary(self): + return {} + + def compose_small_talk(self, user_query): + return "Hi! I can help with your program data and metrics. What would you like to know?" + + +class ContextToolLoopLlm(PrototypeLlmBase): + """LLM stub for a context-only question that still uses retrieval.""" def classify_intent(self, *args, **kwargs): - raise AssertionError("Heuristic data routing should handle this test case") + return DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITHOUT_SQL, + confidence=0.9, + reason="Needs metadata/context, not SQL", + ) - def plan_query( - self, - user_query, - conversation_history, - dashboard_summary, - retrieved_documents, - schema_prompt, - allowlisted_tables, - ): - assert "analytics.program_reach" in allowlisted_tables - assert "program_name" in schema_prompt - return DashboardChatQueryPlan( - mode=DashboardChatPlanMode.SQL, - reason="Needs aggregate data", - relevant_tables=["analytics.program_reach"], - schema_lookup_tables=["analytics.program_reach"], - text_filters=[ - DashboardChatTextFilterPlan( - table_name="analytics.program_reach", - column_name="program_name", - requested_value="Education", - ) - ], + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + if self.turn == 0: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "name": "retrieve_docs", + "args": {"query": "Explain the reach metric", "types": ["context"]}, + } + ], + } + + tool_messages = [message for message in messages if message["role"] == "tool"] + assert any("doc-dashboard-context" in message["content"] for message in tool_messages) + return { + "content": "The reach metric shows how many beneficiaries were served over time.", + "tool_calls": [], + } + + +class SqlToolLoopLlm(PrototypeLlmBase): + """LLM stub for a fresh SQL-backed question.""" + + def classify_intent(self, *args, **kwargs): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITH_SQL, + confidence=0.92, + reason="Needs data analysis", + force_tool_usage=True, ) - def generate_sql( - self, - user_query, - dashboard_summary, - query_plan, - schema_prompt, - distinct_values, - allowlisted_tables, - ): - assert distinct_values["analytics.program_reach.program_name"] == ["Education", "Health"] - return DashboardChatSqlDraft( - sql=( - "SELECT program_name, COUNT(*) AS beneficiary_count " - "FROM analytics.program_reach " - "WHERE program_name = 'Education' " - "GROUP BY program_name " - "LIMIT 50" + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + responses = [ + { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "name": "retrieve_docs", + "args": {"query": "How many beneficiaries are in Education?", "types": ["chart"]}, + } + ], + }, + { + "content": "", + "tool_calls": [ + { + "id": "call-2", + "name": "get_schema_snippets", + "args": {"tables": ["analytics.program_reach"]}, + } + ], + }, + { + "content": "", + "tool_calls": [ + { + "id": "call-3", + "name": "get_distinct_values", + "args": { + "table": "analytics.program_reach", + "column": "program_name", + "limit": 20, + }, + } + ], + }, + { + "content": "", + "tool_calls": [ + { + "id": "call-4", + "name": "run_sql_query", + "args": { + "sql": ( + "SELECT program_name, COUNT(*) AS beneficiary_count " + "FROM analytics.program_reach " + "WHERE program_name = 'Education' " + "GROUP BY program_name" + ) + }, + } + ], + }, + ] + response = responses[self.turn] + self.turn += 1 + return response + + +class FollowUpCorrectionLlm(PrototypeLlmBase): + """LLM stub that corrects itself after the runtime rejects the wrong follow-up table/column choice.""" + + def classify_intent(self, *args, **kwargs): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.FOLLOW_UP_SQL, + confidence=0.95, + reason="User is modifying the previous SQL result", + force_tool_usage=True, + follow_up_context=DashboardChatFollowUpContext( + is_follow_up=True, + follow_up_type="add_dimension", + reusable_elements={ + "previous_sql": "SELECT COUNT(*) FROM analytics.program_reach", + "previous_tables": ["analytics.program_reach"], + }, + modification_instruction="Split the previous result by donor_type", ), - reason="Uses the allowlisted chart table with an exact filter value.", ) - def compose_answer( - self, - user_query, - dashboard_summary, - retrieved_documents, - sql, - sql_results, - warnings, - related_dashboard_titles, - ): - assert sql is not None - assert sql_results == [{"program_name": "Education", "beneficiary_count": 120}] - return "Education has 120 beneficiaries on the current dashboard." + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + if self.turn == 0: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "name": "get_schema_snippets", + "args": {"tables": ["analytics.program_reach"]}, + } + ], + } + if self.turn == 1: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-2", + "name": "run_sql_query", + "args": { + "sql": ( + "SELECT donor_type, COUNT(*) AS beneficiary_count " + "FROM analytics.program_reach " + "GROUP BY donor_type" + ) + }, + } + ], + } + if self.turn == 2: + tool_messages = [message for message in messages if message["role"] == "tool"] + assert any("column_not_in_table" in message["content"] for message in tool_messages) + assert any("analytics.stg_program_reach" in message["content"] for message in tool_messages) + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-3", + "name": "run_sql_query", + "args": { + "sql": ( + "SELECT donor_type, COUNT(*) AS beneficiary_count " + "FROM analytics.stg_program_reach " + "GROUP BY donor_type" + ) + }, + }, + ], + } + raise AssertionError("Follow-up correction LLM exceeded expected turns") -class PiiSqlPathLlm(SqlPathLlm): - """LLM stub that generates an unsafe row-level PII query.""" +class FollowUpDimensionGuardLlm(PrototypeLlmBase): + """LLM stub that first ignores the requested dimension, then corrects after the guard fires.""" - def generate_sql( - self, - user_query, - dashboard_summary, - query_plan, - schema_prompt, - distinct_values, - allowlisted_tables, - ): - return DashboardChatSqlDraft( - sql="SELECT email FROM analytics.program_reach LIMIT 25", - reason="Returns raw email addresses.", + def classify_intent(self, *args, **kwargs): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.FOLLOW_UP_SQL, + confidence=0.95, + reason="User is modifying the previous SQL result", + force_tool_usage=True, + follow_up_context=DashboardChatFollowUpContext( + is_follow_up=True, + follow_up_type="add_dimension", + reusable_elements={ + "previous_sql": "SELECT quarter_label, total_realized_funding_usd FROM analytics.donor_funding_quarterly", + "previous_tables": ["analytics.donor_funding_quarterly"], + }, + modification_instruction="split by donor_type", + ), ) + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + if self.turn == 0: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "name": "get_schema_snippets", + "args": {"tables": ["analytics.donor_funding_quarterly"]}, + } + ], + } + if self.turn == 1: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-2", + "name": "run_sql_query", + "args": { + "sql": ( + "SELECT quarter_label, SUM(total_realized_funding_usd) AS total_realized_funding_usd " + "FROM analytics.donor_funding_quarterly " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "GROUP BY quarter_label ORDER BY quarter_label" + ) + }, + } + ], + } + if self.turn == 2: + tool_messages = [message for message in messages if message["role"] == "tool"] + assert any("requested_dimension_missing" in message["content"] for message in tool_messages) + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-3", + "name": "list_tables_by_keyword", + "args": {"keyword": "donor_funding", "limit": 10}, + } + ], + } + if self.turn == 3: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-4", + "name": "get_schema_snippets", + "args": {"tables": ["analytics.stg_donor_funding_clean"]}, + } + ], + } + if self.turn == 4: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-5", + "name": "run_sql_query", + "args": { + "sql": ( + "SELECT quarter_label, donor_type, SUM(realized_amount_usd) AS total_realized_funding_usd, " + "COUNT(DISTINCT donation_id) AS donor_count " + "FROM analytics.stg_donor_funding_clean " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') AND is_realized = TRUE " + "GROUP BY quarter_label, donor_type ORDER BY quarter_label, donor_type" + ) + }, + } + ], + } + raise AssertionError("Follow-up dimension guard LLM exceeded expected turns") + -def test_heuristic_intent_does_not_treat_summary_as_sum_keyword(): - """Substring matches like sum->summary should not force the SQL path.""" - decision = DashboardChatRuntime._heuristic_intent_decision( - user_query="Give me a summary of this dashboard", - conversation_history=[], - ) +class PiiToolLoopLlm(PrototypeLlmBase): + """LLM stub that needs a safe failure response after SQL guard rejection.""" + + def classify_intent(self, *args, **kwargs): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITH_SQL, + confidence=0.9, + reason="Needs data analysis", + force_tool_usage=True, + ) + + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + if self.turn == 0: + self.turn += 1 + return { + "content": "", + "tool_calls": [ + { + "id": "call-1", + "name": "run_sql_query", + "args": {"sql": "SELECT email FROM analytics.program_reach LIMIT 25"}, + } + ], + } + + tool_messages = [message for message in messages if message["role"] == "tool"] + assert any("aggregate the results or rephrase" in message["content"] for message in tool_messages) + return { + "content": "I couldn't answer that safely. Please aggregate the results or rephrase.", + "tool_calls": [], + } - assert decision is None or decision.intent != DashboardChatIntent.DATA_QUERY + +class SmallTalkLlm(PrototypeLlmBase): + """LLM stub for prototype-style small talk.""" + + def classify_intent(self, *args, **kwargs): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.SMALL_TALK, + confidence=0.97, + reason="Greeting or pleasantry", + ) + + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + raise AssertionError("Small talk should not enter the tool loop") @pytest.fixture @@ -259,22 +626,6 @@ def primary_chart(org, orguser): chart.delete() -@pytest.fixture -def related_chart(org, orguser): - chart = Chart.objects.create( - title="Funding by Donor", - description="Donor funding mix", - chart_type="bar", - schema_name="analytics", - table_name="donor_funding", - created_by=orguser, - last_modified_by=orguser, - org=org, - ) - yield chart - chart.delete() - - @pytest.fixture def primary_dashboard(org, orguser, primary_chart): dashboard = Dashboard.objects.create( @@ -295,130 +646,464 @@ def primary_dashboard(org, orguser, primary_chart): yield dashboard dashboard.delete() - -@pytest.fixture -def related_dashboard(org, orguser, related_chart): - dashboard = Dashboard.objects.create( - title="Funding Overview", - description="Funding KPIs and donor mix", - dashboard_type="native", - components={ - "chart-2": { - "id": "chart-2", - "type": "chart", - "config": {"chartId": related_chart.id}, - } - }, - created_by=orguser, - last_modified_by=orguser, - org=org, +def test_extract_conversation_context_reads_previous_sql_payload(): + """Follow-up routing should recover prior SQL context from assistant payloads.""" + conversation_context = DashboardChatRuntime._extract_conversation_context( + [ + DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="assistant", + content="There are 120 beneficiaries.", + payload={ + "intent": "query_with_sql", + "sql": "SELECT COUNT(*) FROM analytics.program_reach", + "metadata": {"query_plan_tables": ["analytics.program_reach"]}, + "citations": [ + { + "source_type": "warehouse_table", + "table_name": "analytics.program_reach", + } + ], + }, + ), + ] ) - yield dashboard - dashboard.delete() + assert conversation_context.last_sql_query == "SELECT COUNT(*) FROM analytics.program_reach" + assert conversation_context.last_tables_used == ["analytics.program_reach"] + assert conversation_context.last_response_type == "sql_result" + assert conversation_context.last_intent == "query_with_sql" -def test_allowlist_adds_upstream_dbt_tables(): - """Allowlist should include chart tables and their upstream dbt lineage.""" - export_payload = { - "dashboard": {"title": "Impact Overview"}, - "charts": [{"id": 1, "schema_name": "analytics", "table_name": "fact_reach"}], + +def test_seed_distinct_cache_reuses_previous_text_filters(primary_dashboard): + """Follow-up turns should reuse text-filter validations from the previous successful SQL.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "dashboard_id": primary_dashboard.id, + "conversation_context": DashboardChatRuntime._extract_conversation_context( + [ + DashboardChatConversationMessage( + role="assistant", + content="Previous answer", + payload={ + "intent": "query_with_sql", + "sql": ( + "SELECT quarter_label, SUM(total_realized_funding_usd) " + "FROM analytics.donor_funding_quarterly " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "GROUP BY quarter_label" + ), + }, + ) + ] + ), } - manifest_json = { - "nodes": { - "model.dalgo.fact_reach": { - "resource_type": "model", - "schema": "analytics", - "name": "fact_reach", - "depends_on": {"nodes": ["model.dalgo.dim_program", "source.dalgo.raw_students"]}, - }, - "model.dalgo.dim_program": { - "resource_type": "model", - "schema": "analytics", - "name": "dim_program", - "depends_on": {"nodes": []}, - }, - }, - "sources": { - "source.dalgo.raw_students": { - "resource_type": "source", - "schema": "raw", - "name": "students", + execution_context = {"distinct_cache": set()} + + runtime._seed_distinct_cache_from_previous_sql(state, execution_context) + + assert ( + "analytics.donor_funding_quarterly", + "quarter_label", + "2025 q1", + ) in execution_context["distinct_cache"] + assert ("*", "quarter_label", "2025 q2") in execution_context["distinct_cache"] + + +def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(primary_dashboard): + """Follow-up SQL should reuse validated text filters even after moving to an upstream table.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.donor_funding_quarterly", + "analytics.stg_donor_funding_clean", } + ), + "conversation_context": DashboardChatRuntime._extract_conversation_context( + [ + DashboardChatConversationMessage( + role="assistant", + content="Previous answer", + payload={ + "intent": "query_with_sql", + "sql": ( + "SELECT quarter_label, total_realized_funding_usd " + "FROM analytics.donor_funding_quarterly " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "ORDER BY quarter_label" + ), + }, + ) + ] + ), + "org": primary_dashboard.org, + } + execution_context = { + "distinct_cache": set(), + "schema_cache": { + "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( + "analytics.stg_donor_funding_clean", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "realized_amount_usd", "data_type": "numeric", "nullable": False}, + {"name": "is_realized", "data_type": "boolean", "nullable": False}, + ], + ) }, + "warehouse_tools": None, } - allowlist = DashboardChatAllowlistBuilder.build(export_payload, manifest_json=manifest_json) + runtime._seed_distinct_cache_from_previous_sql(state, execution_context) + missing = runtime._missing_distinct( + ( + "SELECT quarter_label, donor_type, SUM(realized_amount_usd) AS total_realized_funding_usd " + "FROM analytics.stg_donor_funding_clean " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "AND is_realized = TRUE " + "GROUP BY quarter_label, donor_type" + ), + state, + execution_context, + ) - assert allowlist.chart_tables == {"analytics.fact_reach"} - assert "analytics.dim_program" in allowlist.upstream_tables - assert "raw.students" in allowlist.allowed_tables - assert allowlist.is_allowed("analytics.fact_reach") is True - assert allowlist.is_unique_id_allowed("model.dalgo.dim_program") is True + assert missing == [] -def test_sql_guard_enforces_single_statement_allowlist_and_limit(): - """SQL guard should block unsafe queries and add a row limit when absent.""" - allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) - guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) +def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_dashboard): + """Follow-up correction should surface candidate tables when a distinct lookup targets the wrong table.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.donor_funding_quarterly", + "analytics.stg_donor_funding_clean", + } + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.donor_funding_quarterly": FakeWarehouseTools._schema_snippet( + "analytics.donor_funding_quarterly", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "total_realized_funding_usd", "data_type": "numeric", "nullable": False}, + ], + ), + "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( + "analytics.stg_donor_funding_clean", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "realized_amount_usd", "data_type": "numeric", "nullable": False}, + ], + ), + }, + "warehouse_tools": None, + } - multi_statement = guard.validate( - "SELECT * FROM analytics.program_reach; DELETE FROM analytics.program_reach" + result = runtime._tool_get_distinct_values( + { + "table": "analytics.donor_funding_quarterly", + "column": "donor_type", + "limit": 50, + }, + state, + execution_context, ) - assert multi_statement.is_valid is False - assert multi_statement.errors == ["Multiple statements are not allowed"] - disallowed_table = guard.validate("SELECT * FROM analytics.other_table") - assert disallowed_table.is_valid is False - assert any("not accessible" in error for error in disallowed_table.errors) + assert result["error"] == "column_not_in_table" + assert result["table"] == "analytics.donor_funding_quarterly" + assert result["column"] == "donor_type" + assert "analytics.stg_donor_funding_clean" in result["candidates"] - allowed_query = guard.validate( - "SELECT COUNT(*) AS beneficiary_count FROM analytics.program_reach" + +def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): + """Boolean literals in WHERE clauses should not be misread as missing columns.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), ) - assert allowed_query.is_valid is True - assert allowed_query.sanitized_sql.endswith("LIMIT 200") - assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={"analytics.stg_donor_funding_clean"} + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( + "analytics.stg_donor_funding_clean", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "realized_amount_usd", "data_type": "numeric", "nullable": False}, + {"name": "donation_id", "data_type": "text", "nullable": False}, + {"name": "is_realized", "data_type": "boolean", "nullable": False}, + ], + ) + }, + "warehouse_tools": None, + } + missing = runtime._missing_columns_in_primary_table( + sql=( + "SELECT quarter_label, donor_type, SUM(realized_amount_usd) AS total_realized_funding_usd, " + "COUNT(DISTINCT donation_id) AS donor_count " + "FROM analytics.stg_donor_funding_clean " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') AND is_realized = TRUE " + "GROUP BY quarter_label, donor_type ORDER BY quarter_label, donor_type LIMIT 200" + ), + state=state, + execution_context=execution_context, + ) -def test_sql_guard_rejects_row_level_pii_queries(): - """SQL guard should reject row-level projections of sensitive fields.""" - allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) - guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + assert missing is None - pii_query = guard.validate( - "SELECT email, COUNT(*) AS beneficiary_count " - "FROM analytics.program_reach " - "GROUP BY email " - "LIMIT 50" - ) - assert pii_query.is_valid is False - assert pii_query.sanitized_sql is None - assert pii_query.errors == [ - "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." +def test_run_sql_keeps_join_tables_intact(primary_dashboard): + """Join queries should execute the model's SQL as written and let the tool loop correct errors.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + warehouse_tools_factory=lambda org: FakeWarehouseTools(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.facilitator_effectiveness_quarterly", + "analytics.district_funding_efficiency_quarterly", + } + ), + "org": primary_dashboard.org, + "intent_decision": DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITH_SQL, + confidence=0.9, + reason="Join-heavy data analysis", + force_tool_usage=True, + ), + "user_query": "Join facilitator outcomes to district funding efficiency.", + } + execution_context = { + "schema_cache": { + "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ + "analytics.facilitator_effectiveness_quarterly" + ], + "analytics.district_funding_efficiency_quarterly": FakeWarehouseTools().schemas[ + "analytics.district_funding_efficiency_quarterly" + ], + }, + "warehouse_tools": FakeWarehouseTools(), + "distinct_cache": {("*", "quarter_label")}, + "last_sql": None, + "last_sql_results": None, + "last_sql_validation": None, + "warnings": [], + } + + result = runtime._run_sql_with_distinct_guard( + { + "sql": ( + "SELECT " + "f.facilitator_name, f.district_name, f.program_area, " + "f.cost_per_improved_outcome_usd, d.spend_per_student_usd " + "FROM analytics.facilitator_effectiveness_quarterly f " + "JOIN analytics.district_funding_efficiency_quarterly d " + "ON f.quarter_label = d.quarter_label " + "AND f.district_name = d.district_name " + "AND f.program_area = d.program_area " + "WHERE f.quarter_label = '2025 Q2' " + "ORDER BY f.cost_per_improved_outcome_usd ASC" + ) + }, + state, + execution_context, + ) + + assert result["success"] is True + assert "analytics.facilitator_effectiveness_quarterly f" in result["sql_used"] + assert "analytics.district_funding_efficiency_quarterly d" in result["sql_used"] + + +def test_missing_distinct_resolves_join_filter_to_qualified_table(primary_dashboard): + """Distinct validation should inspect the joined table referenced by a qualified WHERE filter.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.facilitator_effectiveness_quarterly", + "analytics.district_funding_efficiency_quarterly", + } + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ + "analytics.facilitator_effectiveness_quarterly" + ], + "analytics.district_funding_efficiency_quarterly": FakeWarehouseTools().schemas[ + "analytics.district_funding_efficiency_quarterly" + ], + }, + "warehouse_tools": None, + "distinct_cache": set(), + } + + missing = runtime._missing_distinct( + ( + "SELECT f.facilitator_name, d.spend_per_student_usd " + "FROM analytics.facilitator_effectiveness_quarterly f " + "JOIN analytics.district_funding_efficiency_quarterly d " + "ON f.quarter_label = d.quarter_label " + "AND f.district_name = d.district_name " + "AND f.program_area = d.program_area " + "WHERE d.program_area = 'Literacy'" + ), + state, + execution_context, + ) + + assert missing == [ + { + "table": "analytics.district_funding_efficiency_quarterly", + "column": "program_area", + "value": "Literacy", + } ] -def test_sql_guard_rejects_select_into_queries(): - """SQL guard should reject SELECT ... INTO statements.""" - allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) - guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) +def test_missing_columns_check_is_join_aware_for_qualified_columns(primary_dashboard): + """Qualified join columns should be validated against the referenced joined table.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.facilitator_effectiveness_quarterly", + "analytics.district_funding_efficiency_quarterly", + } + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ + "analytics.facilitator_effectiveness_quarterly" + ], + "analytics.district_funding_efficiency_quarterly": FakeWarehouseTools().schemas[ + "analytics.district_funding_efficiency_quarterly" + ], + }, + "warehouse_tools": None, + } - select_into_query = guard.validate( - "SELECT program_name INTO temp_programs FROM analytics.program_reach LIMIT 50" + missing = runtime._missing_columns_in_primary_table( + sql=( + "SELECT f.facilitator_name, d.fake_dimension " + "FROM analytics.facilitator_effectiveness_quarterly f " + "JOIN analytics.district_funding_efficiency_quarterly d " + "ON f.quarter_label = d.quarter_label " + "AND f.district_name = d.district_name " + "AND f.program_area = d.program_area " + "WHERE f.quarter_label = '2025 Q2'" + ), + state=state, + execution_context=execution_context, ) - assert select_into_query.is_valid is False - assert select_into_query.sanitized_sql is None - assert "SELECT INTO is not allowed" in select_into_query.errors + assert missing["error"] == "column_not_in_table" + assert missing["table"] == "analytics.district_funding_efficiency_quarterly" + assert missing["column"] == "fake_dimension" -def test_runtime_context_query_returns_citations_and_related_dashboards( +def test_missing_columns_check_ignores_order_by_select_alias(primary_dashboard): + """ORDER BY aliases from the SELECT clause should not be treated as missing physical columns.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": DashboardChatAllowlist( + allowed_tables={"analytics.facilitator_effectiveness_quarterly"} + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools._schema_snippet( + "analytics.facilitator_effectiveness_quarterly", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "facilitator_name", "data_type": "text", "nullable": False}, + { + "name": "cost_per_improved_outcome_usd", + "data_type": "numeric", + "nullable": False, + }, + ], + ) + }, + "warehouse_tools": None, + } + + missing = runtime._missing_columns_in_primary_table( + sql=( + "SELECT facilitator_name, AVG(cost_per_improved_outcome_usd) AS avg_cost_per_improved_outcome " + "FROM analytics.facilitator_effectiveness_quarterly " + "WHERE quarter_label = '2025 Q2' " + "GROUP BY facilitator_name " + "ORDER BY avg_cost_per_improved_outcome ASC " + "LIMIT 1" + ), + state=state, + execution_context=execution_context, + ) + + assert missing is None + + +def test_small_talk_turn_returns_without_citations(primary_dashboard): + """Greeting turns should skip retrieval and finalize cleanly.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + + response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="hello", + ) + + assert response.intent == DashboardChatIntent.SMALL_TALK + assert "this dashboard" in response.answer_text + assert response.citations == [] + assert response.warnings == [] + assert response.metadata["allowlisted_tables"] == ["analytics.program_reach"] + + +def test_runtime_query_without_sql_returns_dashboard_scoped_citations( org, primary_dashboard, - related_dashboard, ): - """Context questions should return citations and cross-dashboard suggestions.""" - transaction.commit() + """Context questions should use retrieval without suggesting other dashboards.""" vector_store = FakeVectorStore( [ DashboardChatVectorQueryResult( @@ -440,25 +1125,14 @@ def test_runtime_context_query_returns_citations_and_related_dashboards( }, distance=0.04, ), - DashboardChatVectorQueryResult( - document_id="doc-related-dashboard", - content="This dashboard shows donor-wise funding and cashflow trends.", - metadata={ - "source_type": "dashboard_export", - "source_identifier": f"dashboard:{related_dashboard.id}:summary", - "dashboard_id": related_dashboard.id, - }, - distance=0.05, - ), ] ) runtime = DashboardChatRuntime( vector_store=vector_store, - llm_client=ContextOnlyLlm(), + llm_client=ContextToolLoopLlm(), runtime_config=DashboardChatRuntimeConfig( retrieval_limit=6, - related_dashboard_limit=2, max_query_rows=200, max_distinct_values=20, max_schema_tables=4, @@ -471,21 +1145,43 @@ def test_runtime_context_query_returns_citations_and_related_dashboards( user_query="Explain the reach metric", ) - assert response.intent == DashboardChatIntent.CONTEXT_QUERY + assert response.intent == DashboardChatIntent.QUERY_WITHOUT_SQL assert response.sql is None - assert response.metadata["query_plan_mode"] == "context" assert len(response.citations) >= 2 - assert response.citations[0].source_type == "dashboard_context" - assert response.related_dashboards[0].dashboard_id == related_dashboard.id - assert response.related_dashboards[0].title == "Funding Overview" + assert response.citations[0].source_type in {"dashboard_context", "org_context"} + assert response.tool_calls[0]["name"] == "retrieve_docs" + + +def test_runtime_prompt_messages_do_not_inline_raw_human_context(primary_dashboard): + """Raw org/dashboard markdown should reach the model through retrieval, not prompt duplication.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + + new_query_messages = runtime._build_new_query_messages( + { + "user_query": "Explain the reach metric", + "human_context": "Organization context: duplicated markdown", + } + ) + follow_up_messages = runtime._build_follow_up_messages( + { + "user_query": "Explain that metric", + "human_context": "Organization context: duplicated markdown", + "conversation_context": DashboardChatRuntime._extract_conversation_context([]), + } + ) + + assert new_query_messages[0]["content"] == "prompt:new_query_system" + assert "Human context" not in follow_up_messages[0]["content"] -def test_runtime_data_query_uses_distinct_values_before_sql_execution( +def test_runtime_query_with_sql_uses_distinct_values_before_sql_execution( org, primary_dashboard, ): - """Data questions should fetch distinct values before generating and executing SQL.""" - transaction.commit() + """Data questions should fetch distinct values before executing SQL.""" vector_store = FakeVectorStore( [ DashboardChatVectorQueryResult( @@ -504,11 +1200,10 @@ def test_runtime_data_query_uses_distinct_values_before_sql_execution( runtime = DashboardChatRuntime( vector_store=vector_store, - llm_client=SqlPathLlm(), + llm_client=SqlToolLoopLlm(), warehouse_tools_factory=lambda org: fake_warehouse, runtime_config=DashboardChatRuntimeConfig( retrieval_limit=6, - related_dashboard_limit=2, max_query_rows=200, max_distinct_values=20, max_schema_tables=4, @@ -519,23 +1214,14 @@ def test_runtime_data_query_uses_distinct_values_before_sql_execution( org=org, dashboard_id=primary_dashboard.id, user_query="How many beneficiaries are in Education?", - conversation_history=[ - DashboardChatConversationMessage(role="user", content="Show me beneficiary data") - ], ) assert fake_warehouse.distinct_requests == [("analytics.program_reach", "program_name", 20)] - assert len(fake_warehouse.executed_sql) == 1 - assert "WHERE program_name = 'Education'" in fake_warehouse.executed_sql[0] - assert response.intent == DashboardChatIntent.DATA_QUERY - assert response.sql is not None - assert response.metadata["query_plan_mode"] == "sql" - assert any(citation.source_type == "warehouse_table" for citation in response.citations) -def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_dashboard): - """Unsafe PII SQL should never reach warehouse execution.""" - transaction.commit() +def test_runtime_reuses_session_snapshot_across_turns(org, primary_dashboard): + """Session snapshots should freeze dashboard context and reuse schema within one chat.""" + cache.clear() vector_store = FakeVectorStore( [ DashboardChatVectorQueryResult( @@ -551,18 +1237,399 @@ def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_das ] ) fake_warehouse = FakeWarehouseTools() + def build_runtime(): + return DashboardChatRuntime( + vector_store=vector_store, + llm_client=SqlToolLoopLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, + runtime_config=DashboardChatRuntimeConfig( + retrieval_limit=6, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), + ) + + first_response = build_runtime().run( + org=org, + dashboard_id=primary_dashboard.id, + session_id="session-cache-test", + user_query="How many beneficiaries are in Education?", + ) + second_response = build_runtime().run( + org=org, + dashboard_id=primary_dashboard.id, + session_id="session-cache-test", + user_query="How many beneficiaries are in Education?", + ) + + assert first_response.intent == DashboardChatIntent.QUERY_WITH_SQL + assert second_response.intent == DashboardChatIntent.QUERY_WITH_SQL + assert fake_warehouse.schema_requests == [["analytics.program_reach"]] + assert "WHERE program_name = 'Education'" in fake_warehouse.executed_sql[0] + assert first_response.sql is not None + assert second_response.sql is not None + assert '"beneficiary_count": 120' in first_response.answer_text + assert any(citation.source_type == "warehouse_table" for citation in first_response.citations) + assert [call["name"] for call in first_response.tool_calls] == [ + "retrieve_docs", + "get_schema_snippets", + "get_distinct_values", + "run_sql_query", + ] + + +def test_runtime_persists_distinct_validations_in_session_snapshot(org, primary_dashboard): + """Validated text filter values should survive across turns in the same chat session.""" + cache.clear() + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + session_id = "session-distinct-cache-test" + snapshot_state = { + "org": org, + "dashboard_id": primary_dashboard.id, + "session_id": session_id, + } + + snapshot = runtime._load_session_snapshot(snapshot_state) + state = { + "org": org, + "dashboard_id": primary_dashboard.id, + "session_id": session_id, + "allowlist": snapshot["allowlist"], + "session_distinct_cache": snapshot["distinct_cache"], + } + execution_context = {"distinct_cache": set(snapshot["distinct_cache"])} + + runtime._record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name="analytics.program_reach", + column_name="program_name", + values=["Education"], + ) + + reloaded_snapshot = runtime._load_session_snapshot(snapshot_state) + missing = runtime._missing_distinct( + "SELECT COUNT(*) FROM analytics.program_reach WHERE program_name = 'Education'", + { + "allowlist": snapshot["allowlist"], + "org": org, + }, + { + "distinct_cache": set(reloaded_snapshot["distinct_cache"]), + "schema_cache": { + "analytics.program_reach": FakeWarehouseTools._schema_snippet( + "analytics.program_reach", + [ + {"name": "program_name", "data_type": "text", "nullable": False}, + {"name": "beneficiaries", "data_type": "integer", "nullable": False}, + ], + ) + }, + "warehouse_tools": None, + }, + ) + + assert ( + "analytics.program_reach", + "program_name", + "education", + ) in reloaded_snapshot["distinct_cache"] + assert missing == [] + + +def test_runtime_follow_up_sql_corrects_after_failed_sql_attempt( + monkeypatch, + org, + primary_dashboard, +): + """Follow-up SQL turns should self-correct within the prototype tool loop.""" + vector_store = FakeVectorStore([]) + fake_warehouse = FakeWarehouseTools() + + manifest_json = { + "nodes": { + "model.dalgo.program_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "program_reach", + "depends_on": {"nodes": ["model.dalgo.stg_program_reach"]}, + }, + "model.dalgo.stg_program_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "stg_program_reach", + "depends_on": {"nodes": []}, + }, + }, + "sources": {}, + } + monkeypatch.setattr( + DashboardChatAllowlistBuilder, + "load_manifest_json", + staticmethod(lambda orgdbt: manifest_json), + ) runtime = DashboardChatRuntime( vector_store=vector_store, - llm_client=PiiSqlPathLlm(), + llm_client=FollowUpCorrectionLlm(), warehouse_tools_factory=lambda org: fake_warehouse, - runtime_config=DashboardChatRuntimeConfig( - retrieval_limit=6, - related_dashboard_limit=2, - max_query_rows=200, - max_distinct_values=20, - max_schema_tables=4, + ) + + response = runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="Now split that by donor type.", + conversation_history=[ + DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="assistant", + content="There are 120 beneficiaries.", + payload={ + "intent": "query_with_sql", + "sql": "SELECT COUNT(*) FROM analytics.program_reach", + "metadata": {"query_plan_tables": ["analytics.program_reach"]}, + }, + ), + ], + ) + + assert response.intent == DashboardChatIntent.FOLLOW_UP_SQL + assert response.sql is not None + assert "analytics.stg_program_reach" in response.sql + assert len(fake_warehouse.executed_sql) == 1 + run_sql_calls = [ + tool_call for tool_call in response.tool_calls if tool_call["name"] == "run_sql_query" + ] + assert run_sql_calls[0]["success"] is False + assert run_sql_calls[-1]["success"] is True + assert '"donor_type": "Grant"' in response.answer_text + + +def test_runtime_dbt_tools_use_compact_allowlisted_index(): + """Deterministic dbt tools should run from the compact allowlisted index, not a full manifest blob.""" + export_payload = { + "dashboard": {"title": "Impact Overview"}, + "charts": [{"id": 1, "schema_name": "analytics", "table_name": "program_reach"}], + } + manifest_json = { + "nodes": { + "model.dalgo.program_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "program_reach", + "description": "Program-level reach fact table", + "columns": { + "program_name": { + "name": "program_name", + "description": "Program dimension", + "data_type": "text", + } + }, + "depends_on": {"nodes": ["model.dalgo.stg_program_reach"]}, + }, + "model.dalgo.stg_program_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "stg_program_reach", + "description": "Staging model for program reach", + "columns": {}, + "depends_on": {"nodes": []}, + }, + }, + "sources": {}, + "parent_map": { + "model.dalgo.program_reach": ["model.dalgo.stg_program_reach"], + "model.dalgo.stg_program_reach": [], + }, + "child_map": { + "model.dalgo.program_reach": [], + "model.dalgo.stg_program_reach": ["model.dalgo.program_reach"], + }, + } + allowlist = DashboardChatAllowlistBuilder.build(export_payload, manifest_json=manifest_json) + dbt_index = DashboardChatAllowlistBuilder.build_dbt_index(manifest_json, allowlist) + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "allowlist": allowlist, + "dbt_index": dbt_index, + } + + search_result = runtime._tool_search_dbt_models( + {"query": "program reach", "limit": 5}, + state, + {}, + ) + info_result = runtime._tool_get_dbt_model_info( + {"model_name": "analytics.program_reach"}, + state, + {}, + ) + + assert search_result["count"] >= 1 + assert { + model["table"] for model in search_result["models"] + } <= {"analytics.program_reach", "analytics.stg_program_reach"} + assert info_result["model"] == "program_reach" + assert info_result["upstream"] == ["analytics.stg_program_reach"] + + +def test_runtime_follow_up_sql_rejects_query_that_ignores_requested_dimension( + monkeypatch, + org, + primary_dashboard, +): + """Follow-up add-dimension turns should not succeed without using the requested dimension.""" + vector_store = FakeVectorStore([]) + fake_warehouse = FakeWarehouseTools() + + manifest_json = { + "nodes": { + "model.dalgo.donor_funding_quarterly": { + "resource_type": "model", + "schema": "analytics", + "name": "donor_funding_quarterly", + "depends_on": {"nodes": ["model.dalgo.stg_donor_funding_clean"]}, + }, + "model.dalgo.stg_donor_funding_clean": { + "resource_type": "model", + "schema": "analytics", + "name": "stg_donor_funding_clean", + "depends_on": {"nodes": []}, + }, + }, + "sources": {}, + } + monkeypatch.setattr( + DashboardChatAllowlistBuilder, + "load_manifest_json", + staticmethod(lambda orgdbt: manifest_json), + ) + monkeypatch.setattr( + DashboardChatAllowlistBuilder, + "build", + staticmethod( + lambda dashboard_export, manifest_json: DashboardChatAllowlist( + allowed_tables={ + "analytics.donor_funding_quarterly", + "analytics.stg_donor_funding_clean", + } + ) + ), + ) + + runtime = DashboardChatRuntime( + vector_store=vector_store, + llm_client=FollowUpDimensionGuardLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, + ) + + response = runtime.run( + org=org, + dashboard_id=primary_dashboard.id, + user_query="Now split that by donor type.", + conversation_history=[ + DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="assistant", + content="There are 120 beneficiaries.", + payload={ + "intent": "query_with_sql", + "sql": ( + "SELECT quarter_label, total_realized_funding_usd " + "FROM analytics.donor_funding_quarterly " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "ORDER BY quarter_label" + ), + "metadata": { + "query_plan_tables": ["analytics.donor_funding_quarterly"] + }, + }, + ), + ], + ) + + assert response.intent == DashboardChatIntent.FOLLOW_UP_SQL + assert response.sql is not None + assert "analytics.stg_donor_funding_clean" in response.sql + assert any(call.get("error") == "requested_dimension_missing" for call in response.tool_calls) + assert '"Grant"' in response.answer_text + + +def test_follow_up_dimension_validation_accepts_structural_granularity_change(primary_dashboard): + """Follow-up add-dimension validation should accept structural SQL rewrites, not only exact token reuse.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + state = { + "intent_decision": DashboardChatIntentDecision( + intent=DashboardChatIntent.FOLLOW_UP_SQL, + confidence=0.9, + reason="Follow-up SQL", + force_tool_usage=True, + follow_up_context=DashboardChatFollowUpContext( + is_follow_up=True, + follow_up_type="add_dimension", + modification_instruction="Now split that by donor type.", + ), + ), + "conversation_context": DashboardChatConversationContext( + last_sql_query=( + "SELECT quarter_label, SUM(realized_amount_usd) AS total_realized_funding_usd " + "FROM analytics.stg_donor_funding_clean " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "GROUP BY quarter_label" + ), + ), + "user_query": "Now split that by donor type.", + "allowlist": DashboardChatAllowlist( + allowed_tables={"analytics.stg_donor_funding_clean"} + ), + "org": primary_dashboard.org, + } + execution_context = { + "schema_cache": { + "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( + "analytics.stg_donor_funding_clean", + [ + {"name": "quarter_label", "data_type": "text", "nullable": False}, + {"name": "donor_type", "data_type": "text", "nullable": False}, + {"name": "realized_amount_usd", "data_type": "numeric", "nullable": False}, + ], + ) + }, + "warehouse_tools": None, + } + + validation = runtime._validate_follow_up_dimension_usage( + sql=( + "SELECT quarter_label, COALESCE(donor_type, 'Unknown') AS donor_type, " + "SUM(realized_amount_usd) AS total_realized_funding_usd " + "FROM analytics.stg_donor_funding_clean " + "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " + "GROUP BY quarter_label, COALESCE(donor_type, 'Unknown')" ), + state=state, + execution_context=execution_context, + ) + + assert validation is None + + +def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_dashboard): + """Unsafe PII SQL should be rejected by the SQL guard before warehouse execution.""" + fake_warehouse = FakeWarehouseTools() + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=PiiToolLoopLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, ) response = runtime.run( @@ -580,11 +1647,8 @@ def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_das ] -def test_runtime_skips_disabled_source_types_during_retrieval( - org, primary_dashboard, related_dashboard -): - """Disabled source types should not be queried from the vector store.""" - transaction.commit() +def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashboard): + """Disabled source types should not be queried by the retrieve_docs tool.""" vector_store = FakeVectorStore( [ DashboardChatVectorQueryResult( @@ -606,29 +1670,12 @@ def test_runtime_skips_disabled_source_types_during_retrieval( }, distance=0.04, ), - DashboardChatVectorQueryResult( - document_id="doc-related-dashboard", - content="This dashboard shows donor-wise funding and cashflow trends.", - metadata={ - "source_type": "dashboard_export", - "source_identifier": f"dashboard:{related_dashboard.id}:summary", - "dashboard_id": related_dashboard.id, - }, - distance=0.05, - ), ] ) runtime = DashboardChatRuntime( vector_store=vector_store, - llm_client=ContextOnlyLlm(), - runtime_config=DashboardChatRuntimeConfig( - retrieval_limit=6, - related_dashboard_limit=2, - max_query_rows=200, - max_distinct_values=20, - max_schema_tables=4, - ), + llm_client=ContextToolLoopLlm(), source_config=DashboardChatSourceConfig( enabled_source_types=( "dashboard_context", @@ -644,5 +1691,193 @@ def test_runtime_skips_disabled_source_types_during_retrieval( ) queried_source_groups = [tuple(call["source_types"]) for call in vector_store.calls] - assert ("org_context",) not in queried_source_groups - assert ("dbt_manifest", "dbt_catalog") not in queried_source_groups + assert all("org_context" not in source_group for source_group in queried_source_groups) + + +def test_list_tables_by_keyword_matches_allowlisted_table_names_without_schema_lookup(org): + """Keyword table lookup should work even when schema snippets are not yet cached.""" + fake_warehouse = FakeWarehouseTools() + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=ContextToolLoopLlm(), + warehouse_tools_factory=lambda org: fake_warehouse, + ) + state = { + "org": org, + "allowlist": DashboardChatAllowlist( + allowed_tables={ + "analytics.district_funding_efficiency_quarterly", + "analytics.facilitator_effectiveness_quarterly", + } + ), + } + execution_context = {"schema_cache": {}, "warnings": []} + + result = runtime._tool_list_tables_by_keyword( + {"keyword": "district_funding_efficiency_quarterly", "limit": 10}, + state, + execution_context, + ) + + assert result["tables"][0]["table"] == "analytics.district_funding_efficiency_quarterly" + + +def test_dashboard_chat_response_to_dict_serializes_decimal_sql_results(): + """Final response payloads must be JSON-safe before they are persisted.""" + response = DashboardChatResponse( + answer_text="Answer", + intent=DashboardChatIntent.QUERY_WITH_SQL, + sql_results=[{"quarter": "2025 Q2", "funding": Decimal("105000.00")}], + ) + + payload = response.to_dict() + + assert payload["sql_results"] == [{"quarter": "2025 Q2", "funding": "105000.00"}] + + +def test_allowlist_adds_upstream_dbt_tables(): + """Allowlist should include chart tables and their upstream dbt lineage.""" + export_payload = { + "dashboard": {"title": "Impact Overview"}, + "charts": [{"id": 1, "schema_name": "analytics", "table_name": "fact_reach"}], + } + manifest_json = { + "nodes": { + "model.dalgo.fact_reach": { + "resource_type": "model", + "schema": "analytics", + "name": "fact_reach", + "depends_on": {"nodes": ["model.dalgo.dim_program", "source.dalgo.raw_students"]}, + }, + "model.dalgo.dim_program": { + "resource_type": "model", + "schema": "analytics", + "name": "dim_program", + "depends_on": {"nodes": []}, + }, + }, + "sources": { + "source.dalgo.raw_students": { + "resource_type": "source", + "schema": "raw", + "name": "students", + } + }, + } + + allowlist = DashboardChatAllowlistBuilder.build(export_payload, manifest_json=manifest_json) + + assert allowlist.chart_tables == {"analytics.fact_reach"} + assert "analytics.dim_program" in allowlist.upstream_tables + assert "raw.students" in allowlist.allowed_tables + assert allowlist.is_allowed("analytics.fact_reach") is True + assert allowlist.is_unique_id_allowed("model.dalgo.dim_program") is True + + +def test_tool_document_payload_exposes_structured_chart_metadata(): + """Chart retrieval payloads should surface exact table, metric, and dimension hints.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=SmallTalkLlm(), + ) + + payload = runtime._tool_document_payload( + DashboardChatRetrievedDocument( + document_id="doc-chart", + source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, + source_identifier="dashboard:6:chart:7", + content="Facilitator outcomes chart", + dashboard_id=6, + distance=0.02, + ), + DashboardChatAllowlist( + allowed_tables={"analytics.facilitator_effectiveness_quarterly"} + ), + { + "dashboard": {"title": "Facilitator Effectiveness Studio"}, + "charts": [ + { + "id": 7, + "title": "Facilitator Outcomes", + "chart_type": "bar", + "schema_name": "analytics", + "table_name": "facilitator_effectiveness_quarterly", + "extra_config": { + "dimension_col": "quarter_label", + "extra_dimension": "facilitator_name", + "metrics": [ + {"column": "cost_per_improved_outcome_usd"}, + {"column": "improved_literacy_students"}, + ], + }, + } + ], + }, + ) + + assert payload["metadata"]["preferred_table"] == "analytics.facilitator_effectiveness_quarterly" + assert payload["metadata"]["metric_columns"] == [ + "cost_per_improved_outcome_usd", + "improved_literacy_students", + ] + assert payload["metadata"]["dimension_columns"] == [ + "quarter_label", + "facilitator_name", + ] + assert payload["metadata"]["time_column"] == "quarter_label" + + +def test_sql_guard_enforces_single_statement_allowlist_and_limit(): + """SQL guard should block unsafe queries and add a row limit when absent.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + multi_statement = guard.validate( + "SELECT * FROM analytics.program_reach; DELETE FROM analytics.program_reach" + ) + assert multi_statement.is_valid is False + assert multi_statement.errors == ["Multiple statements are not allowed"] + + disallowed_table = guard.validate("SELECT * FROM analytics.other_table") + assert disallowed_table.is_valid is False + assert any("not accessible" in error for error in disallowed_table.errors) + + allowed_query = guard.validate( + "SELECT COUNT(*) AS beneficiary_count FROM analytics.program_reach" + ) + assert allowed_query.is_valid is True + assert allowed_query.sanitized_sql.endswith("LIMIT 200") + assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) + + +def test_sql_guard_rejects_row_level_pii_queries(): + """SQL guard should reject row-level projections of sensitive fields.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + pii_query = guard.validate( + "SELECT email, COUNT(*) AS beneficiary_count " + "FROM analytics.program_reach " + "GROUP BY email " + "LIMIT 50" + ) + + assert pii_query.is_valid is False + assert pii_query.sanitized_sql is None + assert pii_query.errors == [ + "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." + ] + + +def test_sql_guard_rejects_select_into_queries(): + """SQL guard should reject SELECT ... INTO statements.""" + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) + + select_into_query = guard.validate( + "SELECT program_name INTO temp_programs FROM analytics.program_reach LIMIT 50" + ) + + assert select_into_query.is_valid is False + assert select_into_query.sanitized_sql is None + assert "SELECT INTO is not allowed" in select_into_query.errors diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index ca057461d..9a2ad5ab5 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -1,25 +1,22 @@ """Tests for dashboard chat session creation and reuse rules.""" -import os +from datetime import datetime, timezone -import django import pytest -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") -os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" -django.setup() - from django.contrib.auth.models import User from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.core.dashboard_chat.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message, + create_dashboard_chat_user_message_with_status, get_or_create_dashboard_chat_session, ) +from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession -from ddpui.models.org import Org +from ddpui.models.org import Org, OrgDbt from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role from ddpui.tests.api_tests.test_user_org_api import seed_db @@ -107,6 +104,33 @@ def test_get_or_create_dashboard_chat_session_creates_new_session(session_owner, assert isinstance(session, DashboardChatSession) assert session.orguser == session_owner assert session.dashboard == dashboard + assert session.vector_collection_name is None + + +def test_get_or_create_dashboard_chat_session_pins_active_vector_collection( + session_owner, + dashboard, +): + """New chat sessions should pin the active org vector collection at creation time.""" + org_dbt = OrgDbt.objects.create( + project_dir="client_dbt/dashchat", + target_type="postgres", + default_schema="analytics", + vector_last_ingested_at=datetime(2026, 3, 23, 12, 0, tzinfo=timezone.utc), + ) + session_owner.org.dbt = org_dbt + session_owner.org.save(update_fields=["dbt"]) + + session = get_or_create_dashboard_chat_session( + orguser=session_owner, + dashboard=dashboard, + session_id=None, + ) + + assert session.vector_collection_name == build_dashboard_chat_collection_name( + session_owner.org.id, + version=org_dbt.vector_last_ingested_at, + ) def test_get_or_create_dashboard_chat_session_rejects_other_user_session( @@ -156,3 +180,30 @@ def test_create_dashboard_chat_user_message_is_idempotent_for_client_message_id( assert first_message.id == second_message.id assert first_message.sequence_number == 1 assert DashboardChatMessage.objects.filter(session=session).count() == 1 + + +def test_create_dashboard_chat_user_message_with_status_marks_reused_message( + session_owner, + dashboard, +): + """The duplicate-detection path must report that the second write reused the row.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + + first_result = create_dashboard_chat_user_message_with_status( + session=session, + content="Why did funding drop?", + client_message_id="client-1", + ) + second_result = create_dashboard_chat_user_message_with_status( + session=session, + content="Why did funding drop?", + client_message_id="client-1", + ) + + assert first_result.created is True + assert second_result.created is False + assert first_result.message.id == second_result.message.id diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 60e65d24c..2ba10442e 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -1,13 +1,7 @@ -import os from unittest.mock import Mock, patch -import django import pytest -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") -os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" -django.setup() - from django.contrib.auth.models import User from django.utils import timezone @@ -119,9 +113,9 @@ def test_build_dashboard_chat_context_for_org_skips_when_locked(orguser): redis_client = Mock() redis_client.lock.return_value = redis_lock - with patch("ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client), patch( - "ddpui.celeryworkers.tasks.DashboardChatIngestionService" - ) as ingestion_service: + with patch( + "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client + ), patch("ddpui.celeryworkers.tasks.DashboardChatIngestionService") as ingestion_service: result = build_dashboard_chat_context_for_org.run(org.id) assert result == {"status": "skipped_locked", "org_id": org.id} @@ -151,7 +145,9 @@ def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): ingestion_service = Mock() ingestion_service.ingest_org.return_value = result_payload - with patch("ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client), patch( + with patch( + "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client + ), patch( "ddpui.celeryworkers.tasks.DashboardChatIngestionService", return_value=ingestion_service, ): @@ -186,7 +182,7 @@ def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( ) runtime_class.return_value.run.return_value = DashboardChatResponse( answer_text="Funding dropped because donor inflows slowed this quarter.", - intent=DashboardChatIntent.DATA_QUERY, + intent=DashboardChatIntent.QUERY_WITH_SQL, warnings=["Example warning"], sql="SELECT 1", sql_results=[{"value": 1}], @@ -229,3 +225,42 @@ def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( assert DashboardChatMessage.objects.filter(session=session, role="assistant").count() == 0 publish_event.assert_called_once() + + +@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") +@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +def test_run_dashboard_chat_turn_reuses_existing_assistant_reply( + runtime_class, + publish_event, + orguser, +): + _create_org_dbt(orguser.org) + dashboard = _create_dashboard(orguser) + session = DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + assistant_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=2, + role="assistant", + content="Existing answer", + payload={"intent": "query_without_sql"}, + ) + + result = run_dashboard_chat_turn(str(session.session_id), user_message.id) + + assert result == { + "status": "skipped_existing_reply", + "session_id": str(session.session_id), + "assistant_message_id": assistant_message.id, + } + runtime_class.return_value.run.assert_not_called() + publish_event.assert_not_called() diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index 51b44e7f8..b97c58bed 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -116,6 +116,9 @@ def delete_collection(self, name): self.deleted_collections.append(name) del self.collections[name] + def list_collections(self): + return list(self.collections.keys()) + def test_dashboard_chat_vector_store_config_reads_env(): """Vector store config should read the dedicated dashboard chat env vars.""" @@ -142,6 +145,10 @@ def test_collection_name_uses_org_prefix(): """Collections should be split by org using the configured prefix.""" assert build_dashboard_chat_collection_name(42) == "org_42" assert build_dashboard_chat_collection_name(42, prefix="tenant_") == "tenant_42" + assert ( + build_dashboard_chat_collection_name(42, version="20260323T120000Z") + == "org_42__20260323T120000Z" + ) def test_vector_document_has_stable_id_and_required_metadata(): diff --git a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py index 3cca9d879..4676d1308 100644 --- a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py +++ b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py @@ -1,17 +1,11 @@ """Unit tests for dashboard chat warehouse helpers.""" import json -import os from types import SimpleNamespace from unittest.mock import patch -import django import pytest -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ddpui.settings") -os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" -django.setup() - from ddpui.core.dashboard_chat.warehouse_tools import ( DashboardChatWarehouseTools, DashboardChatWarehouseToolsError, diff --git a/ddpui/tests/websockets/test_dashboard_chat_consumer.py b/ddpui/tests/websockets/test_dashboard_chat_consumer.py index 4af5ca03f..fe9508522 100644 --- a/ddpui/tests/websockets/test_dashboard_chat_consumer.py +++ b/ddpui/tests/websockets/test_dashboard_chat_consumer.py @@ -38,24 +38,28 @@ def test_dashboard_chat_consumer_send_message_requires_available_chat(): assert payload["data"]["message"] == "Chat unavailable" -@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_event") -@patch("ddpui.websockets.dashboard_chat_consumer.run_dashboard_chat_turn.delay") -@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message") +@patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") +@patch("ddpui.websockets.dashboard_chat_consumer.execute_dashboard_chat_turn") +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") @patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") -def test_dashboard_chat_consumer_send_message_creates_session_and_dispatches_task( +def test_dashboard_chat_consumer_send_message_creates_session_and_runs_inline( mock_get_or_create_session, mock_create_user_message, - mock_delay, - mock_publish_event, + mock_execute_turn, + mock_serialize_message, ): session = Mock(session_id="session-123") user_message = Mock(id=17) + assistant_message = Mock(id=18) mock_get_or_create_session.return_value = session - mock_create_user_message.return_value = user_message + mock_create_user_message.return_value = Mock(message=user_message, created=True) + mock_execute_turn.return_value = {"status": "completed", "assistant_message": assistant_message} + mock_serialize_message.return_value = {"id": "18", "role": "assistant"} consumer = DashboardChatConsumer() consumer.dashboard = Mock(id=42) consumer.orguser = Mock() + consumer.send = Mock() consumer._chat_available = Mock(return_value=(True, "")) consumer._subscribe_to_session = Mock() @@ -74,27 +78,29 @@ def test_dashboard_chat_consumer_send_message_creates_session_and_dispatches_tas mock_get_or_create_session.assert_called_once() mock_create_user_message.assert_called_once() consumer._subscribe_to_session.assert_called_once_with("session-123") - mock_publish_event.assert_called_once() - mock_delay.assert_called_once_with("session-123", 17) + mock_execute_turn.assert_called_once_with("session-123", 17) + + first_payload = json.loads(consumer.send.call_args_list[0].kwargs["text_data"]) + second_payload = json.loads(consumer.send.call_args_list[1].kwargs["text_data"]) + assert first_payload["event_type"] == "progress" + assert second_payload["event_type"] == "assistant_message" -@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_event") @patch( - "ddpui.websockets.dashboard_chat_consumer.run_dashboard_chat_turn.delay", - side_effect=RuntimeError("enqueue failed"), + "ddpui.websockets.dashboard_chat_consumer.execute_dashboard_chat_turn", + side_effect=RuntimeError("inline failed"), ) -@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message") +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") @patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") -def test_dashboard_chat_consumer_send_message_returns_error_when_enqueue_fails( +def test_dashboard_chat_consumer_send_message_returns_error_when_inline_turn_fails( mock_get_or_create_session, mock_create_user_message, - mock_delay, - mock_publish_event, + mock_execute_turn, ): session = Mock(session_id="session-123") user_message = Mock(id=17) mock_get_or_create_session.return_value = session - mock_create_user_message.return_value = user_message + mock_create_user_message.return_value = Mock(message=user_message, created=True) consumer = DashboardChatConsumer() consumer.dashboard = Mock(id=42) @@ -115,10 +121,50 @@ def test_dashboard_chat_consumer_send_message_returns_error_when_enqueue_fails( } ) - mock_delay.assert_called_once_with("session-123", 17) - consumer._subscribe_to_session.assert_not_called() - mock_publish_event.assert_not_called() + mock_execute_turn.assert_called_once_with("session-123", 17) + consumer._subscribe_to_session.assert_called_once_with("session-123") - payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + payload = json.loads(consumer.send.call_args_list[-1].kwargs["text_data"]) assert payload["event_type"] == "error" - assert payload["data"]["message"] == "Unable to start chat right now" + assert payload["data"]["message"] == "Something went wrong while generating the response" + + +@patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") +@patch("ddpui.websockets.dashboard_chat_consumer.find_dashboard_chat_assistant_reply") +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_reuses_existing_turn_without_running_duplicate_turn( + mock_get_or_create_session, + mock_create_user_message, + mock_find_assistant_reply, + mock_serialize_message, +): + session = Mock(session_id="session-123") + user_message = Mock(id=17) + assistant_message = Mock(id=22) + mock_get_or_create_session.return_value = session + mock_create_user_message.return_value = Mock(message=user_message, created=False) + mock_find_assistant_reply.return_value = assistant_message + mock_serialize_message.return_value = {"id": "22", "role": "assistant"} + + consumer = DashboardChatConsumer() + consumer.dashboard = Mock(id=42) + consumer.orguser = Mock() + consumer._chat_available = Mock(return_value=(True, "")) + consumer._subscribe_to_session = Mock() + + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + consumer._subscribe_to_session.assert_called_once_with("session-123") + payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + assert payload["event_type"] == "assistant_message" diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 90b5b87fa..717a592f3 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -6,14 +6,14 @@ from ddpui.core.dashboard_chat.events import ( build_dashboard_chat_event, dashboard_chat_group_name, - publish_dashboard_chat_event, ) from ddpui.core.dashboard_chat.session_service import ( DashboardChatSessionError, - create_dashboard_chat_user_message, + create_dashboard_chat_user_message_with_status, + find_dashboard_chat_assistant_reply, get_or_create_dashboard_chat_session, + serialize_dashboard_chat_message, ) -from ddpui.celeryworkers.tasks import run_dashboard_chat_turn from ddpui.models.dashboard import Dashboard from ddpui.models.org_preferences import OrgPreferences from ddpui.models.role_based_access import RolePermission @@ -77,33 +77,76 @@ def websocket_receive(self, message): self._respond_error(str(error)) return - user_message = create_dashboard_chat_user_message( + user_message_result = create_dashboard_chat_user_message_with_status( session=session, content=raw_message, client_message_id=payload.get("client_message_id"), ) - try: - run_dashboard_chat_turn.delay(str(session.session_id), user_message.id) - except Exception: - logger.exception( - "dashboard chat turn could not be enqueued for session=%s", - session.session_id, + user_message = user_message_result.message + self._subscribe_to_session(str(session.session_id)) + + if not user_message_result.created: + assistant_message = find_dashboard_chat_assistant_reply( + session=session, + user_message=user_message, ) - self._respond_error("Unable to start chat right now") + if assistant_message is not None: + self._send_event( + build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ), + ) return - self._subscribe_to_session(str(session.session_id)) - publish_dashboard_chat_event( - str(session.session_id), + self._send_event( build_dashboard_chat_event( event_type="progress", session_id=str(session.session_id), dashboard_id=self.dashboard.id, message_id=str(user_message.id), data={"label": "thinking"}, - ), + ) ) + try: + from ddpui.celeryworkers.tasks import execute_dashboard_chat_turn + + result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) + except Exception: + logger.exception( + "dashboard chat turn failed inline for session=%s", + session.session_id, + ) + self._respond_error("Something went wrong while generating the response") + return + + assistant_message = result.get("assistant_message") + if result["status"] in {"completed", "skipped_existing_reply"} and assistant_message is not None: + self._send_event( + build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ) + ) + return + + if result["status"] == "skipped_missing_session": + self._respond_error("Chat session could not be found") + return + + if result["status"] == "skipped_missing_message": + self._respond_error("Chat message could not be found") + return + + self._respond_error("Something went wrong while generating the response") + def websocket_disconnect(self, message): """Remove the socket from any joined session groups on disconnect.""" if getattr(self, "channel_layer", None) is None: @@ -125,16 +168,20 @@ def _subscribe_to_session(self, session_id: str) -> None: def _respond_error(self, message: str) -> None: """Send one direct websocket error event.""" - self.send( - text_data=json.dumps( - build_dashboard_chat_event( - event_type="error", - dashboard_id=self.dashboard.id if getattr(self, "dashboard", None) else None, - data={"message": message}, - ) + self._send_event( + build_dashboard_chat_event( + event_type="error", + dashboard_id=self.dashboard.id if getattr(self, "dashboard", None) else None, + data={"message": message}, ) ) + def _send_event(self, event: dict) -> None: + """Send one websocket event directly to the current socket.""" + self.send( + text_data=json.dumps(event) + ) + def _chat_available(self) -> tuple[bool, str]: """Return whether the current org is ready for dashboard chat.""" feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get( From 0a7be4af328ef2f9d8976b3092fa642350b3c6f2 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 25 Mar 2026 12:26:06 +0530 Subject: [PATCH 15/49] fix(ai-chat): improve answer formatting contract --- ddpui/core/dashboard_chat/llm_client.py | 124 ++++++++++ ddpui/core/dashboard_chat/prompt_store.py | 30 +++ ddpui/core/dashboard_chat/runtime.py | 219 ++++++++++++++++-- ...prompttemplate_final_answer_composition.py | 66 ++++++ ddpui/models/dashboard_chat.py | 4 + .../core/dashboard_chat/test_llm_client.py | 108 +++++++++ .../core/dashboard_chat/test_prompt_store.py | 5 + .../tests/core/dashboard_chat/test_runtime.py | 141 ++++++++++- 8 files changed, 673 insertions(+), 24 deletions(-) create mode 100644 ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py diff --git a/ddpui/core/dashboard_chat/llm_client.py b/ddpui/core/dashboard_chat/llm_client.py index fd936c0ad..52e4acb64 100644 --- a/ddpui/core/dashboard_chat/llm_client.py +++ b/ddpui/core/dashboard_chat/llm_client.py @@ -12,6 +12,7 @@ DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, + DashboardChatRetrievedDocument, ) from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey @@ -47,6 +48,20 @@ def run_tool_loop_turn( ) -> dict[str, Any]: """Run one prototype-style tool-loop completion.""" + def compose_final_answer( + self, + *, + user_query: str, + intent: DashboardChatIntent, + response_format: str, + draft_answer: str | None, + retrieved_documents: list[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: list[str], + ) -> str: + """Compose the final user-facing markdown answer.""" + class OpenAIDashboardChatLlmClient: """Direct OpenAI SDK adapter with JSON-mode helpers.""" @@ -54,6 +69,21 @@ class OpenAIDashboardChatLlmClient: TECHNICAL_DIFFICULTIES_MESSAGE = ( "I'm experiencing technical difficulties. Please try again." ) + TABLE_SUMMARY_JSON_INSTRUCTIONS = """ +For table-like responses, return valid JSON only with this shape: +{ + "title": "short heading or null", + "summary": "1-2 sentence narrative summary", + "key_points": ["short point", "short point"] +} + +Rules: +- Do not include markdown tables. +- Do not include pipe characters or ASCII table formatting. +- Do not repeat every row from the result set. +- The UI will render the structured table separately from sql_results. +- Keep key_points to at most 3 concise bullets. +""".strip() def __init__( self, @@ -168,6 +198,71 @@ def compose_small_talk(self, user_query: str) -> str: answer = response.choices[0].message.content or "" return answer.strip() + def compose_final_answer( + self, + *, + user_query: str, + intent: DashboardChatIntent, + response_format: str, + draft_answer: str | None, + retrieved_documents: list[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: list[str], + ) -> str: + """Compose the final user-facing markdown answer from tool-loop outputs.""" + context_payload = { + "user_query": user_query, + "intent": intent.value, + "response_format": response_format, + "draft_answer": draft_answer or None, + "warnings": warnings[:5], + "sql": sql, + "sql_results": (sql_results or [])[:8], + "row_count": len(sql_results or []), + "retrieved_context": [ + { + "source_type": document.source_type, + "source_identifier": document.source_identifier, + "content": self._compact_snippet(document.content), + } + for document in retrieved_documents[:6] + ], + } + if response_format in {"text_with_table", "table"}: + result = self._complete_json( + operation="final_answer_table_summary", + system_prompt=( + self.prompt_store.get( + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION + ) + + "\n\n" + + self.TABLE_SUMMARY_JSON_INSTRUCTIONS + ), + user_prompt=json.dumps(context_payload, ensure_ascii=False), + ) + return self._format_table_summary_markdown(result) + + response = self._create_chat_completion( + messages=[ + { + "role": "system", + "content": self.prompt_store.get( + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION + ), + }, + { + "role": "user", + "content": json.dumps(context_payload, ensure_ascii=False), + }, + ], + temperature=0.1, + max_tokens=400, + ) + self._record_usage("final_answer_composition", response) + answer = response.choices[0].message.content or "" + return answer.strip() + def get_prompt(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: """Return one stored dashboard chat prompt.""" return self.prompt_store.get(prompt_key) @@ -266,3 +361,32 @@ def _create_chat_completion(self, **kwargs: Any) -> Any: sleep(min(2**attempt, 2)) assert last_error is not None raise last_error + + @staticmethod + def _compact_snippet(content: str, max_length: int = 320) -> str: + """Trim retrieved context before feeding it into the final answer prompt.""" + normalized_content = " ".join(content.split()) + if len(normalized_content) <= max_length: + return normalized_content + return normalized_content[: max_length - 1].rstrip() + "…" + + @staticmethod + def _format_table_summary_markdown(result: dict[str, Any]) -> str: + """Render a structured table summary into short markdown without any table body.""" + title = str(result.get("title") or "").strip() + summary = str(result.get("summary") or "").strip() + raw_key_points = result.get("key_points") or [] + key_points = [ + str(point).strip() + for point in raw_key_points + if isinstance(point, str) and point.strip() + ][:3] + + sections: list[str] = [] + if title: + sections.append(f"### {title}") + if summary: + sections.append(summary) + if key_points: + sections.append("\n".join(f"- {point}" for point in key_points)) + return "\n\n".join(section for section in sections if section).strip() diff --git a/ddpui/core/dashboard_chat/prompt_store.py b/ddpui/core/dashboard_chat/prompt_store.py index 84e79d914..31b6b5e37 100644 --- a/ddpui/core/dashboard_chat/prompt_store.py +++ b/ddpui/core/dashboard_chat/prompt_store.py @@ -195,10 +195,40 @@ "Keep answers concise, friendly, and non-technical when possible." ) +PROTOTYPE_FINAL_ANSWER_COMPOSITION_PROMPT = """You are the final answer writer for Chat with Dashboards. + +You will receive a JSON payload containing: +- the user query +- the routed intent +- a draft tool-loop answer, if any +- retrieved context snippets +- SQL used, if any +- SQL result rows or summaries, if any +- a response format hint +- warnings + +Write the final user-facing answer in markdown. + +CRITICAL RULES: +1. Never output raw JSON objects or raw tool payloads. +2. Never dump SQL result rows verbatim. +3. If `response_format` is `text_with_table` or `table`, write a short narrative summary only. The UI will render the structured table separately. +4. If `response_format` is `text`, answer fully in markdown using headings or bullets when helpful. +5. If the question is explanatory or contextual, answer directly from the provided context and draft answer. Do not append unrelated row data. +6. If no matching rows were found, say so plainly. +7. Use concise, analyst-quality language. Prefer clear interpretation over exhaustive repetition. +8. If the provided result values look like rates or percentages, describe them naturally as percentages when appropriate. +9. Mention important caveats only when they materially affect the answer. + +Return markdown only, with no code fences unless the user explicitly asked for code or SQL.""" + DEFAULT_DASHBOARD_CHAT_PROMPTS = { DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION: PROTOTYPE_INTENT_CLASSIFICATION_PROMPT, DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM: PROTOTYPE_NEW_QUERY_SYSTEM_PROMPT, DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM: PROTOTYPE_FOLLOW_UP_SYSTEM_PROMPT, + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION: ( + PROTOTYPE_FINAL_ANSWER_COMPOSITION_PROMPT + ), DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES: ( PROTOTYPE_SMALL_TALK_CAPABILITIES_PROMPT ), diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py index 689d5d087..fb783a097 100644 --- a/ddpui/core/dashboard_chat/runtime.py +++ b/ddpui/core/dashboard_chat/runtime.py @@ -493,8 +493,16 @@ def _run_prototype_intent( state["sql_validation"] = execution_result["sql_validation"] state["sql_results"] = execution_result["sql_results"] state["warnings"] = execution_result["warnings"] + response_format = self._determine_response_format( + user_query=state["user_query"], + sql_results=execution_result["sql_results"], + ) state["response"] = DashboardChatResponse( - answer_text=execution_result["answer_text"], + answer_text=self._compose_final_answer_text( + state, + execution_result, + response_format=response_format, + ), intent=state["intent_decision"].intent, citations=state["citations"], warnings=execution_result["warnings"], @@ -502,6 +510,10 @@ def _run_prototype_intent( sql_results=execution_result["sql_results"], usage=self._build_usage_summary(), tool_calls=execution_result["tool_calls"], + metadata={ + "response_format": response_format, + "table_columns": self._sql_result_columns(execution_result["sql_results"]), + }, ) return state @@ -648,13 +660,7 @@ def _execute_tool_loop( ) if str(tool_call.get("name") or "") == "run_sql_query" and result.get("success"): return self._build_execution_result( - answer_text=( - result.get("data_preview") - or self._fallback_answer_text( - execution_context["retrieved_documents"], - execution_context["last_sql_results"], - ) - ), + answer_text="", execution_context=execution_context, max_turns_reached=False, ) @@ -1361,16 +1367,9 @@ def _node_finalize_response( ) allowlist = state.get("allowlist") or DashboardChatAllowlist() - state["response"] = DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=list(dict.fromkeys(citations)), - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata={ + response_metadata = dict(response.metadata) + response_metadata.update( + { "dashboard_id": state["dashboard_id"], "retrieved_document_ids": [ document.document_id for document in state.get("retrieved_documents") or [] @@ -1380,7 +1379,18 @@ def _node_finalize_response( "intent_reason": state["intent_decision"].reason, "missing_info": state["intent_decision"].missing_info, "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, - }, + } + ) + state["response"] = DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, ) return state @@ -2735,6 +2745,83 @@ def _max_turns_message( "Please rephrase or ask about a metric shown on this dashboard." ) + def _compose_final_answer_text( + self, + state: DashboardChatRuntimeState, + execution_result: dict[str, Any], + *, + response_format: str, + ) -> str: + """Compose one final markdown answer for all non-trivial routes.""" + normalized_sql_results = self._normalize_sql_results_for_answer( + execution_result.get("sql_results") + ) + draft_answer = (execution_result.get("answer_text") or "").strip() or None + if hasattr(self.llm_client, "compose_final_answer"): + try: + answer_text = self.llm_client.compose_final_answer( + user_query=state["user_query"], + intent=state["intent_decision"].intent, + response_format=response_format, + draft_answer=draft_answer, + retrieved_documents=list(execution_result.get("retrieved_documents") or []), + sql=execution_result.get("sql"), + sql_results=normalized_sql_results, + warnings=list(execution_result.get("warnings") or []), + ) + if answer_text: + return answer_text + except Exception: + logger.exception("Dashboard chat final answer composition failed") + return self._fallback_answer_text( + execution_result.get("retrieved_documents") or [], + normalized_sql_results, + response_format=response_format, + draft_answer=draft_answer, + ) + + @staticmethod + def _determine_response_format( + *, + user_query: str, + sql_results: list[dict[str, Any]] | None, + ) -> str: + """Return how the frontend should present the final answer.""" + if not sql_results: + return "text" + first_row = sql_results[0] if sql_results else {} + column_count = len(first_row.keys()) if isinstance(first_row, dict) else 0 + normalized_query = user_query.lower() + tableish_keywords = [ + "breakdown", + "split by", + "list", + "table", + "tabular", + "rank", + "ranking", + "top ", + "bottom ", + "wise", + ] + if "table" in normalized_query and column_count > 0: + return "table" + if len(sql_results) > 1 and column_count > 1: + return "text_with_table" + if any(keyword in normalized_query for keyword in tableish_keywords) and column_count > 1: + return "text_with_table" + return "text" + + @staticmethod + def _sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: + """Return table columns for frontend rendering metadata.""" + if not sql_results: + return [] + first_row = sql_results[0] + if not isinstance(first_row, dict): + return [] + return list(first_row.keys()) + def _build_usage_summary(self) -> dict[str, Any]: """Collect per-turn usage from the LLM client and embedding provider when supported.""" usage: dict[str, Any] = {} @@ -2801,15 +2888,105 @@ def _clarification_fallback(missing_info: Sequence[str]) -> str: def _fallback_answer_text( retrieved_documents: Sequence[DashboardChatRetrievedDocument], sql_results: list[dict[str, Any]] | None, + *, + response_format: str = "text", + draft_answer: str | None = None, ) -> str: """Fallback response when the model returns no final text.""" + if draft_answer: + return draft_answer if sql_results is not None: if not sql_results: return "I didn't find any matching rows for that question." - return DashboardChatRuntime._preview_sql_rows(sql_results) + if response_format in {"text_with_table", "table"}: + return f"I found {len(sql_results)} matching rows. See the table below for the breakdown." + if len(sql_results) == 1: + return DashboardChatRuntime._single_row_summary(sql_results[0]) + return f"I found {len(sql_results)} matching rows." if retrieved_documents: return DashboardChatRuntime._compact_snippet(retrieved_documents[0].content) - return "I couldn't find enough context to answer that." + return "I couldn't find enough context to answer that." + + @staticmethod + def _single_row_summary(row: dict[str, Any]) -> str: + """Return a readable fallback when one structured row is available.""" + parts = [ + f"{DashboardChatRuntime._humanize_column_name(column)}: {value}" + for column, value in row.items() + ] + return "; ".join(parts) + + @staticmethod + def _humanize_column_name(column_name: str) -> str: + """Convert snake_case warehouse columns into human labels.""" + return str(column_name).replace("_", " ").strip().title() + + @classmethod + def _normalize_sql_results_for_answer( + cls, + sql_results: list[dict[str, Any]] | None, + ) -> list[dict[str, Any]] | None: + """Normalize SQL results into LLM-friendly values for final answer writing.""" + if sql_results is None: + return None + normalized_rows: list[dict[str, Any]] = [] + for row in sql_results: + normalized_row: dict[str, Any] = {} + for column_name, value in row.items(): + normalized_row[column_name] = cls._normalize_sql_value_for_answer( + column_name, + value, + ) + normalized_rows.append(normalized_row) + return normalized_rows + + @classmethod + def _normalize_sql_value_for_answer(cls, column_name: str, value: Any) -> Any: + """Format warehouse values into user-friendly forms for answer composition.""" + if value is None: + return None + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return cls._format_numeric_answer_value(column_name, value) + text_value = str(value) + numeric_value = cls._parse_numeric_string(text_value) + if numeric_value is None: + return text_value + return cls._format_numeric_answer_value(column_name, numeric_value) + + @classmethod + def _format_numeric_answer_value(cls, column_name: str, value: float | int) -> str | int | float: + """Format numeric values for answer composition.""" + if cls._looks_like_rate_metric(column_name) and 0 <= float(value) <= 1: + percentage_value = f"{float(value) * 100:.1f}".rstrip("0").rstrip(".") + return f"{percentage_value}%" + rounded_value = round(float(value), 2) + if float(rounded_value).is_integer(): + return int(rounded_value) + return f"{rounded_value:.2f}".rstrip("0").rstrip(".") + + @staticmethod + def _parse_numeric_string(value: str) -> float | None: + """Parse decimal-like strings emitted by DjangoJSONEncoder.""" + normalized_value = value.strip() + if not normalized_value: + return None + if not re.fullmatch(r"-?\d+(?:\.\d+)?(?:E-?\d+)?", normalized_value, flags=re.IGNORECASE): + return None + try: + return float(normalized_value) + except ValueError: + return None + + @staticmethod + def _looks_like_rate_metric(column_name: str) -> bool: + """Return whether a metric name likely represents a percentage/rate.""" + normalized_column = column_name.lower() + return any( + token in normalized_column + for token in ["rate", "ratio", "percentage", "percent", "share", "pct"] + ) @staticmethod def _chart_id_from_source_identifier(source_identifier: str) -> int | None: diff --git a/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py b/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py new file mode 100644 index 000000000..c7c63bb0c --- /dev/null +++ b/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py @@ -0,0 +1,66 @@ +# Generated by Django 4.2 on 2026-03-24 13:15 + +from django.db import migrations, models + +FINAL_ANSWER_COMPOSITION_PROMPT = """You are the final answer writer for Chat with Dashboards. + +You will receive a JSON payload containing: +- the user query +- the routed intent +- a draft tool-loop answer, if any +- retrieved context snippets +- SQL used, if any +- SQL result rows or summaries, if any +- a response format hint +- warnings + +Write the final user-facing answer in markdown. + +CRITICAL RULES: +1. Never output raw JSON objects or raw tool payloads. +2. Never dump SQL result rows verbatim. +3. If `response_format` is `text_with_table` or `table`, write a short narrative summary only. The UI will render the structured table separately. +4. If `response_format` is `text`, answer fully in markdown using headings or bullets when helpful. +5. If the question is explanatory or contextual, answer directly from the provided context and draft answer. Do not append unrelated row data. +6. If no matching rows were found, say so plainly. +7. Use concise, analyst-quality language. Prefer clear interpretation over exhaustive repetition. +8. If the provided result values look like rates or percentages, describe them naturally as percentages when appropriate. +9. Mention important caveats only when they materially affect the answer. + +Return markdown only, with no code fences unless the user explicitly asked for code or SQL.""" + + +def seed_final_answer_composition_prompt(apps, schema_editor): + DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") + DashboardChatPromptTemplate.objects.update_or_create( + key="final_answer_composition", + defaults={"prompt": FINAL_ANSWER_COMPOSITION_PROMPT}, + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0156_dashboardchatsession_vector_collection_name"), + ] + + operations = [ + migrations.AlterField( + model_name="dashboardchatprompttemplate", + name="key", + field=models.CharField( + choices=[ + ("intent_classification", "Intent Classification"), + ("new_query_system", "New Query System"), + ("follow_up_system", "Follow-up System"), + ("final_answer_composition", "Final Answer Composition"), + ("small_talk_capabilities", "Small Talk Capabilities"), + ], + max_length=64, + unique=True, + ), + ), + migrations.RunPython( + seed_final_answer_composition_prompt, + migrations.RunPython.noop, + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index 7abed4c81..cf7265976 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -37,6 +37,10 @@ class DashboardChatPromptTemplateKey(models.TextChoices): "follow_up_system", "Follow-up System", ) + FINAL_ANSWER_COMPOSITION = ( + "final_answer_composition", + "Final Answer Composition", + ) SMALL_TALK_CAPABILITIES = ( "small_talk_capabilities", "Small Talk Capabilities", diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py index 755f83ffe..83af4f2fa 100644 --- a/ddpui/tests/core/dashboard_chat/test_llm_client.py +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -8,6 +8,7 @@ DashboardChatConversationContext, DashboardChatIntent, DashboardChatIntentDecision, + DashboardChatRetrievedDocument, ) @@ -187,3 +188,110 @@ def test_reset_usage_clears_previous_usage_events(): llm_client.reset_usage() assert llm_client.usage_summary()["totals"]["total_tokens"] == 0 + + +def test_compose_final_answer_uses_structured_json_summary_for_table_responses(): + """Table-like answers should return narrative markdown and leave tabular rendering to the UI.""" + fake_client = FakeClient() + fake_client.chat.completions.response_content = json.dumps( + { + "title": "Top 5 facilitators with the best outcomes in Q2 2025", + "summary": "All five facilitators tied at 3 improved literacy students in Q2 2025.", + "key_points": [ + "No facilitator exceeded 3 improved literacy students.", + "The UI should render the full table separately.", + ], + } + ) + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=fake_client, + prompt_store=FakePromptStore(), + ) + + answer = llm_client.compose_final_answer( + user_query="Give me a district wise pass rate breakdown", + intent=DashboardChatIntent.QUERY_WITH_SQL, + response_format="text_with_table", + draft_answer=None, + retrieved_documents=[ + DashboardChatRetrievedDocument( + document_id="doc-1", + source_type="dashboard_export", + source_identifier="dashboard:1:chart:2", + content="District performance chart with literacy and numeracy pass rate metrics", + ) + ], + sql="SELECT district_name, avg_literacy_pass_rate FROM analytics.district_program_performance_quarterly", + sql_results=[ + { + "district_name": "North", + "avg_literacy_pass_rate": "25%", + } + ], + warnings=["Using current dashboard context only."], + ) + + assert answer == ( + "### Top 5 facilitators with the best outcomes in Q2 2025\n\n" + "All five facilitators tied at 3 improved literacy students in Q2 2025.\n\n" + "- No facilitator exceeded 3 improved literacy students.\n" + "- The UI should render the full table separately." + ) + messages = fake_client.chat.completions.calls[0]["messages"] + assert messages[0] == { + "role": "system", + "content": ( + "prompt:final_answer_composition\n\n" + "For table-like responses, return valid JSON only with this shape:\n" + "{\n" + ' "title": "short heading or null",\n' + ' "summary": "1-2 sentence narrative summary",\n' + ' "key_points": ["short point", "short point"]\n' + "}\n\n" + "Rules:\n" + "- Do not include markdown tables.\n" + "- Do not include pipe characters or ASCII table formatting.\n" + "- Do not repeat every row from the result set.\n" + "- The UI will render the structured table separately from sql_results.\n" + "- Keep key_points to at most 3 concise bullets." + ), + } + payload = json.loads(messages[1]["content"]) + assert payload["response_format"] == "text_with_table" + assert payload["row_count"] == 1 + assert payload["retrieved_context"][0]["source_type"] == "dashboard_export" + assert payload["retrieved_context"][0]["content"].startswith("District performance chart") + assert llm_client.usage_summary()["calls"][0]["operation"] == "final_answer_table_summary" + + +def test_compose_final_answer_keeps_freeform_markdown_for_text_responses(): + """Pure text answers should continue to use the freeform markdown composer path.""" + fake_client = FakeClient() + fake_client.chat.completions.response_content = "## Overview\n\nThis dashboard tracks literacy outcomes." + llm_client = OpenAIDashboardChatLlmClient( + api_key="test-key", + client=fake_client, + prompt_store=FakePromptStore(), + ) + + answer = llm_client.compose_final_answer( + user_query="Tell me about this dashboard", + intent=DashboardChatIntent.QUERY_WITHOUT_SQL, + response_format="text", + draft_answer="This dashboard tracks literacy outcomes.", + retrieved_documents=[], + sql=None, + sql_results=None, + warnings=[], + ) + + assert answer == "## Overview\n\nThis dashboard tracks literacy outcomes." + messages = fake_client.chat.completions.calls[0]["messages"] + assert messages[0] == { + "role": "system", + "content": "prompt:final_answer_composition", + } + payload = json.loads(messages[1]["content"]) + assert payload["response_format"] == "text" + assert llm_client.usage_summary()["calls"][0]["operation"] == "final_answer_composition" diff --git a/ddpui/tests/core/dashboard_chat/test_prompt_store.py b/ddpui/tests/core/dashboard_chat/test_prompt_store.py index 6d99b8182..88c0b6deb 100644 --- a/ddpui/tests/core/dashboard_chat/test_prompt_store.py +++ b/ddpui/tests/core/dashboard_chat/test_prompt_store.py @@ -27,11 +27,16 @@ def test_prompt_store_returns_default_when_no_db_override_exists(): store = DashboardChatPromptStore() prompt = store.get(DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION) + final_answer_prompt = store.get(DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION) assert ( prompt == DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION] ) + assert ( + final_answer_prompt + == DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION] + ) def test_prompt_store_uses_db_override_and_invalidates_cache_on_save(): diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 58c683d26..90113ba0b 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -582,6 +582,46 @@ def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): raise AssertionError("Small talk should not enter the tool loop") +class FinalAnswerComposerLlm(PrototypeLlmBase): + """LLM stub that only composes the final user-facing answer.""" + + def __init__(self): + super().__init__() + self.compose_calls = [] + + def classify_intent(self, *args, **kwargs): + raise AssertionError("This stub is only for direct final-answer composition tests") + + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + raise AssertionError("This stub should not enter the tool loop") + + def compose_final_answer( + self, + *, + user_query, + intent, + response_format, + draft_answer, + retrieved_documents, + sql, + sql_results, + warnings, + ): + self.compose_calls.append( + { + "user_query": user_query, + "intent": intent, + "response_format": response_format, + "draft_answer": draft_answer, + "retrieved_documents": retrieved_documents, + "sql": sql, + "sql_results": sql_results, + "warnings": warnings, + } + ) + return "## District-wise pass rates\nSee the table below for the breakdown." + + @pytest.fixture def org(): organization = Org.objects.create( @@ -1269,7 +1309,7 @@ def build_runtime(): assert "WHERE program_name = 'Education'" in fake_warehouse.executed_sql[0] assert first_response.sql is not None assert second_response.sql is not None - assert '"beneficiary_count": 120' in first_response.answer_text + assert "Beneficiary Count: 120" in first_response.answer_text assert any(citation.source_type == "warehouse_table" for citation in first_response.citations) assert [call["name"] for call in first_response.tool_calls] == [ "retrieve_docs", @@ -1406,7 +1446,12 @@ def test_runtime_follow_up_sql_corrects_after_failed_sql_attempt( ] assert run_sql_calls[0]["success"] is False assert run_sql_calls[-1]["success"] is True - assert '"donor_type": "Grant"' in response.answer_text + assert response.metadata["response_format"] == "text_with_table" + assert response.sql_results == [ + {"donor_type": "Grant", "beneficiary_count": 80}, + {"donor_type": "Corporate", "beneficiary_count": 40}, + ] + assert "See the table below for the breakdown" in response.answer_text def test_runtime_dbt_tools_use_compact_allowlisted_index(): @@ -1559,7 +1604,9 @@ def test_runtime_follow_up_sql_rejects_query_that_ignores_requested_dimension( assert response.sql is not None assert "analytics.stg_donor_funding_clean" in response.sql assert any(call.get("error") == "requested_dimension_missing" for call in response.tool_calls) - assert '"Grant"' in response.answer_text + assert response.metadata["response_format"] == "text_with_table" + assert response.sql_results[0]["donor_type"] == "Grant" + assert "See the table below for the breakdown" in response.answer_text def test_follow_up_dimension_validation_accepts_structural_granularity_change(primary_dashboard): @@ -1881,3 +1928,91 @@ def test_sql_guard_rejects_select_into_queries(): assert select_into_query.is_valid is False assert select_into_query.sanitized_sql is None assert "SELECT INTO is not allowed" in select_into_query.errors + + +def test_compose_final_answer_text_uses_llm_and_normalizes_rate_values(): + """Final answer composition should send normalized values and table hints to the composer.""" + llm = FinalAnswerComposerLlm() + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=llm, + ) + state = { + "user_query": "Give me a district wise pass rate breakdown", + "intent_decision": DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITH_SQL, + confidence=0.9, + reason="Needs grouped results", + force_tool_usage=True, + ), + } + execution_result = { + "answer_text": "", + "retrieved_documents": [ + DashboardChatRetrievedDocument( + document_id="doc-chart", + source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, + source_identifier="dashboard:1:chart:2", + content="District pass-rate chart", + ) + ], + "sql": ( + "SELECT district_name, avg_literacy_pass_rate, avg_numeracy_pass_rate " + "FROM analytics.district_program_performance_quarterly" + ), + "sql_results": [ + { + "district_name": "East", + "avg_literacy_pass_rate": Decimal("0E-20"), + "avg_numeracy_pass_rate": Decimal("0.25000000000000000000"), + }, + { + "district_name": "South", + "avg_literacy_pass_rate": Decimal("0.25000000000000000000"), + "avg_numeracy_pass_rate": Decimal("0E-20"), + }, + ], + "warnings": [], + } + + answer = runtime._compose_final_answer_text( + state, + execution_result, + response_format="text_with_table", + ) + + assert answer == "## District-wise pass rates\nSee the table below for the breakdown." + assert llm.compose_calls[0]["response_format"] == "text_with_table" + assert llm.compose_calls[0]["sql_results"] == [ + { + "district_name": "East", + "avg_literacy_pass_rate": "0%", + "avg_numeracy_pass_rate": "25%", + }, + { + "district_name": "South", + "avg_literacy_pass_rate": "25%", + "avg_numeracy_pass_rate": "0%", + }, + ] + + +def test_determine_response_format_prefers_table_for_grouped_breakdowns(): + """Grouped breakdowns should tell the frontend to render a structured table.""" + response_format = DashboardChatRuntime._determine_response_format( + user_query="Give me a district wise pass rate breakdown", + sql_results=[ + { + "district_name": "North", + "avg_literacy_pass_rate": "25%", + "avg_numeracy_pass_rate": "50%", + }, + { + "district_name": "South", + "avg_literacy_pass_rate": "25%", + "avg_numeracy_pass_rate": "0%", + }, + ], + ) + + assert response_format == "text_with_table" From 65c5fcaae48b309db25c5d57f0a168a7717e53fd Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 25 Mar 2026 15:53:05 +0530 Subject: [PATCH 16/49] refactor(ai-chat): modularize dashboard chat backend --- Docker/docker-compose.dev.yml | 32 - Docker/docker-compose.yml | 32 - README.md | 28 +- ddpui/api/org_preferences_api.py | 55 +- ddpui/celeryworkers/tasks.py | 9 +- ddpui/core/dashboard_chat/config.py | 46 +- ddpui/core/dashboard_chat/embeddings.py | 86 + ddpui/core/dashboard_chat/events.py | 4 +- ddpui/core/dashboard_chat/graph/__init__.py | 5 + ddpui/core/dashboard_chat/graph/bindings.py | 241 ++ .../core/dashboard_chat/graph/conversation.py | 223 ++ ddpui/core/dashboard_chat/graph/definition.py | 45 + .../dashboard_chat/graph/message_stack.py | 71 + ddpui/core/dashboard_chat/graph/nodes.py | 241 ++ .../core/dashboard_chat/graph/orchestrator.py | 88 + .../core/dashboard_chat/graph/presentation.py | 338 ++ ddpui/core/dashboard_chat/graph/retrieval.py | 308 ++ .../dashboard_chat/graph/session_snapshot.py | 116 + .../graph/source_identifiers.py | 22 + .../dashboard_chat/graph/sql_execution.py | 519 +++ .../core/dashboard_chat/graph/sql_parsing.py | 340 ++ ddpui/core/dashboard_chat/graph/state.py | 50 + .../dashboard_chat/graph/tool_handlers.py | 484 +++ ddpui/core/dashboard_chat/graph/tool_loop.py | 189 ++ .../graph/tool_specifications.py | 148 + .../dashboard_chat/llm_answer_formatting.py | 88 + ddpui/core/dashboard_chat/llm_client.py | 339 +- .../core/dashboard_chat/openai_llm_client.py | 303 ++ ddpui/core/dashboard_chat/runtime.py | 3010 ----------------- ddpui/core/dashboard_chat/session_service.py | 7 + ddpui/core/dashboard_chat/vector_building.py | 153 + ...ngestion.py => vector_document_builder.py} | 143 +- ddpui/core/dashboard_chat/vector_store.py | 326 +- ddpui/core/dashboard_chat/warehouse_tools.py | 54 +- .../core/dashboard_chat/test_llm_client.py | 4 +- .../tests/core/dashboard_chat/test_runtime.py | 27 +- .../dashboard_chat/test_session_service.py | 68 +- ddpui/tests/core/dashboard_chat/test_tasks.py | 42 +- ...t_ingestion.py => test_vector_building.py} | 67 +- .../dashboard_chat/test_warehouse_tools.py | 23 + .../test_dashboard_chat_consumer.py | 22 +- ddpui/utils/openai_client.py | 22 + ddpui/utils/vector/__init__.py | 1 + ddpui/utils/vector/chroma/__init__.py | 12 + ddpui/utils/vector/chroma/client.py | 11 + ddpui/utils/vector/chroma/store.py | 207 ++ ddpui/utils/vector/chroma/types.py | 23 + ddpui/websockets/dashboard_chat_consumer.py | 156 +- 48 files changed, 4833 insertions(+), 3995 deletions(-) create mode 100644 ddpui/core/dashboard_chat/embeddings.py create mode 100644 ddpui/core/dashboard_chat/graph/__init__.py create mode 100644 ddpui/core/dashboard_chat/graph/bindings.py create mode 100644 ddpui/core/dashboard_chat/graph/conversation.py create mode 100644 ddpui/core/dashboard_chat/graph/definition.py create mode 100644 ddpui/core/dashboard_chat/graph/message_stack.py create mode 100644 ddpui/core/dashboard_chat/graph/nodes.py create mode 100644 ddpui/core/dashboard_chat/graph/orchestrator.py create mode 100644 ddpui/core/dashboard_chat/graph/presentation.py create mode 100644 ddpui/core/dashboard_chat/graph/retrieval.py create mode 100644 ddpui/core/dashboard_chat/graph/session_snapshot.py create mode 100644 ddpui/core/dashboard_chat/graph/source_identifiers.py create mode 100644 ddpui/core/dashboard_chat/graph/sql_execution.py create mode 100644 ddpui/core/dashboard_chat/graph/sql_parsing.py create mode 100644 ddpui/core/dashboard_chat/graph/state.py create mode 100644 ddpui/core/dashboard_chat/graph/tool_handlers.py create mode 100644 ddpui/core/dashboard_chat/graph/tool_loop.py create mode 100644 ddpui/core/dashboard_chat/graph/tool_specifications.py create mode 100644 ddpui/core/dashboard_chat/llm_answer_formatting.py create mode 100644 ddpui/core/dashboard_chat/openai_llm_client.py delete mode 100644 ddpui/core/dashboard_chat/runtime.py create mode 100644 ddpui/core/dashboard_chat/vector_building.py rename ddpui/core/dashboard_chat/{ingestion.py => vector_document_builder.py} (76%) rename ddpui/tests/core/dashboard_chat/{test_ingestion.py => test_vector_building.py} (90%) create mode 100644 ddpui/utils/openai_client.py create mode 100644 ddpui/utils/vector/__init__.py create mode 100644 ddpui/utils/vector/chroma/__init__.py create mode 100644 ddpui/utils/vector/chroma/client.py create mode 100644 ddpui/utils/vector/chroma/store.py create mode 100644 ddpui/utils/vector/chroma/types.py diff --git a/Docker/docker-compose.dev.yml b/Docker/docker-compose.dev.yml index 26ff0f5ba..02ad35055 100644 --- a/Docker/docker-compose.dev.yml +++ b/Docker/docker-compose.dev.yml @@ -29,19 +29,6 @@ services: networks: - dalgo-network - chroma: - image: chromadb/chroma:0.6.3 - ports: - - "8003:8000" - environment: - - IS_PERSISTENT=TRUE - - ALLOW_RESET=TRUE - - ANONYMIZED_TELEMETRY=FALSE - volumes: - - chroma_data:/chroma/chroma - networks: - - dalgo-network - backend: image: dalgo_backend:0.1 restart: always @@ -50,8 +37,6 @@ services: condition: service_healthy redis_server: condition: service_started - chroma: - condition: service_started initdb: condition: service_completed_successfully ports: @@ -109,12 +94,6 @@ services: - DEMO_SUPERSET_PASSWORD=${DEMO_SUPERSET_PASSWORD} - FIRST_USER_PASSWORD=${FIRST_USER_PASSWORD} - FIRST_USER_ROLE=${FIRST_USER_ROLE} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma - - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 - - AI_DASHBOARD_CHAT_CHROMA_SSL=False - - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} - - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} command: backend networks: - dalgo-network @@ -128,8 +107,6 @@ services: condition: service_started redis_server: condition: service_started - chroma: - condition: service_started initdb: condition: service_completed_successfully networks: @@ -144,12 +121,6 @@ services: - DBPASSWORD=${DBPASSWORD} - DBADMINUSER=${DBADMINUSER} - DBADMINPASSWORD=${DBADMINPASSWORD} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma - - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 - - AI_DASHBOARD_CHAT_CHROMA_SSL=False - - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} - - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} celery_beat: image: dalgo_backend:0.1 command: beat @@ -158,8 +129,6 @@ services: condition: service_started redis_server: condition: service_started - chroma: - condition: service_started initdb: condition: service_completed_successfully @@ -192,7 +161,6 @@ services: volumes: redis_data: - chroma_data: celerybeat_volume: networks: diff --git a/Docker/docker-compose.yml b/Docker/docker-compose.yml index 28a7f1481..e4eca1d6d 100644 --- a/Docker/docker-compose.yml +++ b/Docker/docker-compose.yml @@ -9,19 +9,6 @@ services: networks: - dalgo-network - chroma: - image: chromadb/chroma:0.6.3 - ports: - - "8003:8000" - environment: - - IS_PERSISTENT=TRUE - - ALLOW_RESET=TRUE - - ANONYMIZED_TELEMETRY=FALSE - volumes: - - chroma_data:/chroma/chroma - networks: - - dalgo-network - backend: image: dalgo_backend:latest command: backend @@ -30,20 +17,10 @@ services: - "8002:8002" env_file: - .env.docker - depends_on: - - redis_server - - chroma volumes: - ${CLIENTS_DBT_MOUNT}:/data/clients_dbt - ${DEV_SECRETS_MOUNT}:/data/secrets - ${LOGS_MOUNT}:/usr/src/backend/ddpui/logs - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY} - - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma - - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 - - AI_DASHBOARD_CHAT_CHROMA_SSL=False - - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} - - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} networks: - dalgo-network celery_beat: @@ -65,26 +42,17 @@ services: depends_on: - backend - redis_server - - chroma env_file: - .env.docker volumes: - ${CLIENTS_DBT_MOUNT}:/data/clients_dbt - ${DEV_SECRETS_MOUNT}:/data/secrets - ${LOGS_MOUNT}:/usr/src/backend/ddpui/logs - environment: - - OPENAI_API_KEY=${OPENAI_API_KEY} - - AI_DASHBOARD_CHAT_CHROMA_HOST=chroma - - AI_DASHBOARD_CHAT_CHROMA_PORT=8000 - - AI_DASHBOARD_CHAT_CHROMA_SSL=False - - AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX=${AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX} - - AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL=${AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL} networks: - dalgo-network volumes: redis_data: - chroma_data: celerybeat_volume: networks: diff --git a/README.md b/README.md index b24912795..19d3a504d 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,25 @@ PREFECT_PROXY_API_URL= ### Step 6: Create secrets directory - Set `DEV_SECRETS_DIR` in `.env` unless you want to use Amazon's Secrets Manager -### Step 7: Install DBT +### Step 7: Install Chroma for dashboard chat + +Dashboard chat retrieval uses a local Chroma server in development. One simple way to run it is: + +```bash +docker run --rm \ + -p 8003:8000 \ + -v "$PWD/.local/chroma-data:/data" \ + chromadb/chroma:0.5.23 +``` + +Then point the backend env to it: + +```bash +AI_DASHBOARD_CHAT_CHROMA_HOST=localhost +AI_DASHBOARD_CHAT_CHROMA_PORT=8003 +``` + +### Step 8: Install DBT The platform now supports multiple DBT versions using `uv` and `pyproject.toml` for better dependency management. @@ -178,11 +196,11 @@ $DBT_VENV/ Organizations use either `venv` or `venv-1.9.8` in their `dbt_venv` database field. -### Step 8: Add SIGNUPCODE and FRONTEND_URL +### Step 9: Add SIGNUPCODE and FRONTEND_URL - The `SIGNUPCODE` in `.env` is for signing up using the frontend. If you are running the frontend, set its URL in `FRONTEND_URL` -### Step 9: Start Backend +### Step 10: Start Backend ``` DJANGOSECRET= @@ -199,11 +217,11 @@ DJANGOSECRET= - Start the server `uvicorn ddpui.asgi:application --port ` -### Step 10: Create first org and user +### Step 11: Create first org and user - Run `python manage.py createorganduser --role super-admin` - The above command creates a user with super admin role. If we don't provide any role, the default role is of account manager. -### Step11: Running celery +### Step 12: Running celery We use two separate Celery workers for better task isolation: diff --git a/ddpui/api/org_preferences_api.py b/ddpui/api/org_preferences_api.py index 9f4f65215..c6488c56e 100644 --- a/ddpui/api/org_preferences_api.py +++ b/ddpui/api/org_preferences_api.py @@ -33,35 +33,13 @@ orgpreference_router = Router() - -def _get_or_create_org_preferences(org): - org_preferences, _ = OrgPreferences.objects.get_or_create(org=org) - return org_preferences - - -def _get_or_create_org_ai_context(org): - context, _ = OrgAIContext.objects.get_or_create(org=org) - return context - - -def _is_dashboard_chat_feature_enabled(org) -> bool: - return get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) - - -def _ensure_dashboard_chat_feature_enabled(org) -> None: - """Hide dashboard chat management APIs unless the feature flag is enabled.""" - if not _is_dashboard_chat_feature_enabled(org): - raise HttpError(404, "Chat with dashboards is not enabled for this organization") - - -def _is_dbt_configured(org) -> bool: - return org.dbt is not None - - def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): org_dbt = org.dbt return OrgAIDashboardChatSettingsResponse( - feature_flag_enabled=_is_dashboard_chat_feature_enabled(org), + feature_flag_enabled=get_all_feature_flags_for_org(org).get( + "AI_DASHBOARD_CHAT", + False, + ), ai_data_sharing_enabled=bool(org_preferences.ai_data_sharing_enabled), ai_data_sharing_consented_by=( org_preferences.ai_data_sharing_consented_by.user.email @@ -74,7 +52,7 @@ def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): if org_context.updated_by else None, org_context_updated_at=org_context.updated_at, - dbt_configured=_is_dbt_configured(org), + dbt_configured=org_dbt is not None, docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, vector_last_ingested_at=org_dbt.vector_last_ingested_at if org_dbt else None, ) @@ -82,9 +60,12 @@ def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): def _serialize_ai_dashboard_chat_status(org, org_preferences): org_dbt = org.dbt - feature_flag_enabled = _is_dashboard_chat_feature_enabled(org) + feature_flag_enabled = get_all_feature_flags_for_org(org).get( + "AI_DASHBOARD_CHAT", + False, + ) ai_data_sharing_enabled = bool(org_preferences.ai_data_sharing_enabled) - dbt_configured = _is_dbt_configured(org) + dbt_configured = org_dbt is not None vector_last_ingested_at = org_dbt.vector_last_ingested_at if org_dbt else None return OrgAIDashboardChatStatusResponse( @@ -219,9 +200,10 @@ def get_ai_dashboard_chat_settings(request): orguser: OrgUser = request.orguser org = orguser.org - _ensure_dashboard_chat_feature_enabled(org) - org_preferences = _get_or_create_org_preferences(org) - org_context = _get_or_create_org_ai_context(org) + if not get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False): + raise HttpError(404, "Chat with dashboards is not enabled for this organization") + org_preferences, _ = OrgPreferences.objects.get_or_create(org=org) + org_context, _ = OrgAIContext.objects.get_or_create(org=org) return { "success": True, @@ -237,9 +219,10 @@ def update_ai_dashboard_chat_settings(request, payload: UpdateOrgAIDashboardChat orguser: OrgUser = request.orguser org = orguser.org - _ensure_dashboard_chat_feature_enabled(org) - org_preferences = _get_or_create_org_preferences(org) - org_context = _get_or_create_org_ai_context(org) + if not get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False): + raise HttpError(404, "Chat with dashboards is not enabled for this organization") + org_preferences, _ = OrgPreferences.objects.get_or_create(org=org) + org_context, _ = OrgAIContext.objects.get_or_create(org=org) target_ai_data_sharing_enabled = ( payload.ai_data_sharing_enabled if payload.ai_data_sharing_enabled is not None @@ -279,7 +262,7 @@ def get_ai_dashboard_chat_status(request): orguser: OrgUser = request.orguser org = orguser.org - org_preferences = _get_or_create_org_preferences(org) + org_preferences, _ = OrgPreferences.objects.get_or_create(org=org) return { "success": True, diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index a16ca8c9e..4f14b0c5b 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -75,6 +75,8 @@ ) from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError +from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.graph.orchestrator import get_dashboard_chat_runtime from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers from ddpui.ddpprefect.prefect_service import ( @@ -1308,8 +1310,6 @@ def schedule_dashboard_chat_context_builds(): @app.task(bind=True) def build_dashboard_chat_context_for_org(self, org_id: int): """Build dashboard chat retrieval context for one org if the org is eligible.""" - from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService - org = ( Org.objects.select_related("dbt", "preferences") .filter(id=org_id, dbt__isnull=False) @@ -1342,7 +1342,7 @@ def build_dashboard_chat_context_for_org(self, org_id: int): return {"status": "skipped_locked", "org_id": org_id} try: - result = DashboardChatIngestionService().ingest_org(org) + result = DashboardChatVectorBuildService().build_org_vector_context(org) return { "status": "completed", "org_id": org_id, @@ -1448,7 +1448,6 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: """Run one dashboard chat turn synchronously and persist the assistant reply.""" - from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession session = ( @@ -1479,7 +1478,7 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: "assistant_message": existing_assistant_message, } - response = DashboardChatRuntime().run( + response = get_dashboard_chat_runtime().run( org=session.org, dashboard_id=session.dashboard.id, user_query=user_message.content, diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 6fe63fdb4..43054efe2 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -1,7 +1,7 @@ """Configuration helpers for dashboard chat infrastructure.""" +from collections.abc import Sequence from dataclasses import dataclass -from enum import Enum import os from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType @@ -24,35 +24,55 @@ def _parse_csv_env(value: str | None) -> tuple[str, ...] | None: return parsed_values or None +def _default_enabled_source_types() -> tuple[DashboardChatSourceType, ...]: + """Return the default enabled source types for dashboard chat retrieval.""" + return tuple(DashboardChatSourceType) + + +def _parse_enabled_source_types_env( + value: str | None, +) -> tuple[DashboardChatSourceType, ...] | None: + """Parse the enabled source-types env var into enum values.""" + parsed_values = _parse_csv_env(value) + if parsed_values is None: + return None + + enabled_source_types: list[DashboardChatSourceType] = [] + for raw_source_type in parsed_values: + try: + enabled_source_types.append(DashboardChatSourceType(raw_source_type)) + except ValueError: + continue + return tuple(enabled_source_types) or None + + @dataclass(frozen=True) class DashboardChatSourceConfig: """Environment-backed enablement for retrieval source types.""" - enabled_source_types: tuple[str, ...] = tuple( - source_type.value for source_type in DashboardChatSourceType - ) + enabled_source_types: tuple[DashboardChatSourceType, ...] = _default_enabled_source_types() @classmethod def from_env(cls) -> "DashboardChatSourceConfig": """Build source-type config from environment variables.""" - env_value = _parse_csv_env(os.getenv("AI_DASHBOARD_CHAT_ENABLED_SOURCE_TYPES")) + env_value = _parse_enabled_source_types_env( + os.getenv("AI_DASHBOARD_CHAT_ENABLED_SOURCE_TYPES") + ) return cls( - enabled_source_types=env_value - or tuple(source_type.value for source_type in DashboardChatSourceType) + enabled_source_types=env_value or _default_enabled_source_types() ) - def is_enabled(self, source_type: DashboardChatSourceType | str) -> bool: + def is_enabled(self, source_type: DashboardChatSourceType) -> bool: """Return whether the given source type should participate in runtime work.""" - source_type_value = source_type.value if isinstance(source_type, Enum) else source_type - return source_type_value in self.enabled_source_types + return source_type in self.enabled_source_types def filter_enabled( self, - source_types: list[DashboardChatSourceType | str] | tuple[DashboardChatSourceType | str, ...], - ) -> list[str]: + source_types: Sequence[DashboardChatSourceType], + ) -> list[DashboardChatSourceType]: """Keep only the configured source types from a requested set.""" return [ - source_type.value if isinstance(source_type, Enum) else source_type + source_type for source_type in source_types if self.is_enabled(source_type) ] diff --git a/ddpui/core/dashboard_chat/embeddings.py b/ddpui/core/dashboard_chat/embeddings.py new file mode 100644 index 000000000..407eacade --- /dev/null +++ b/ddpui/core/dashboard_chat/embeddings.py @@ -0,0 +1,86 @@ +"""Embedding providers used by dashboard chat retrieval.""" + +import os +from typing import Any, Protocol + +from openai import OpenAI + +from ddpui.utils.openai_client import get_shared_openai_client + + +class DashboardChatEmbeddingProvider(Protocol): + """Embedding provider interface used by the vector store wrapper.""" + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of texts.""" + + def embed_query(self, text: str) -> list[float]: + """Embed a single query.""" + + def reset_usage(self) -> None: + """Reset per-turn embedding usage before a new runtime invocation.""" + + +class OpenAIEmbeddingProvider: + """OpenAI embeddings adapter for dashboard chat retrieval.""" + + def __init__( + self, + api_key: str | None = None, + model: str = "text-embedding-3-small", + client: OpenAI | None = None, + ): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.model = model + self.usage_events: list[dict[str, Any]] = [] + if client is None: + if not self.api_key: + raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") + client = get_shared_openai_client(self.api_key, max_retries=2) + self.client = client + + def reset_usage(self) -> None: + """Reset aggregated embedding usage before one new chat turn.""" + self.usage_events = [] + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of documents using OpenAI.""" + if not texts: + return [] + response = self.client.embeddings.create(model=self.model, input=texts) + self._record_usage("embed_documents", response, len(texts)) + return [item.embedding for item in response.data] + + def embed_query(self, text: str) -> list[float]: + """Embed a single query using the document embedding path.""" + return self.embed_documents([text])[0] + + def usage_summary(self) -> dict[str, Any]: + """Return aggregated embedding usage for the current turn.""" + totals = { + "prompt_tokens": 0, + "total_tokens": 0, + } + for event in self.usage_events: + totals["prompt_tokens"] += event.get("prompt_tokens", 0) + totals["total_tokens"] += event.get("total_tokens", 0) + return { + "model": self.model, + "calls": list(self.usage_events), + "totals": totals, + } + + def _record_usage(self, operation: str, response: Any, input_count: int) -> None: + """Capture embedding usage from one OpenAI embeddings response.""" + usage = getattr(response, "usage", None) + if usage is None: + return + self.usage_events.append( + { + "operation": operation, + "model": self.model, + "input_count": input_count, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + ) diff --git a/ddpui/core/dashboard_chat/events.py b/ddpui/core/dashboard_chat/events.py index 636f4a98c..655776c88 100644 --- a/ddpui/core/dashboard_chat/events.py +++ b/ddpui/core/dashboard_chat/events.py @@ -1,7 +1,5 @@ """Websocket event helpers for dashboard chat.""" -import json - from asgiref.sync import async_to_sync from channels.layers import get_channel_layer from django.utils import timezone @@ -48,7 +46,7 @@ def publish_dashboard_chat_event(session_id: str, event: dict) -> None: dashboard_chat_group_name(session_id), { "type": "dashboard_chat_event", - "event": json.dumps(event), + "event": event, }, ) except Exception: diff --git a/ddpui/core/dashboard_chat/graph/__init__.py b/ddpui/core/dashboard_chat/graph/__init__.py new file mode 100644 index 000000000..7897f8b2e --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/__init__.py @@ -0,0 +1,5 @@ +"""LangGraph orchestration modules for dashboard chat.""" + +from .orchestrator import DashboardChatRuntime, get_dashboard_chat_runtime + +__all__ = ["DashboardChatRuntime", "get_dashboard_chat_runtime"] diff --git a/ddpui/core/dashboard_chat/graph/bindings.py b/ddpui/core/dashboard_chat/graph/bindings.py new file mode 100644 index 000000000..c25380270 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/bindings.py @@ -0,0 +1,241 @@ +"""Method wiring for the dashboard chat runtime class.""" + +from . import conversation as conversation_methods +from . import message_stack as message_methods +from . import nodes as node_methods +from . import presentation as presentation_methods +from . import retrieval as retrieval_methods +from . import session_snapshot as snapshot_methods +from . import source_identifiers as source_identifier_methods +from . import sql_execution as sql_execution_methods +from . import sql_parsing as sql_parsing_methods +from . import tool_handlers as tool_handler_methods +from . import tool_loop as tool_loop_methods + + +def bind_dashboard_chat_runtime_methods(runtime_cls) -> None: + """Attach graph helper modules onto the runtime class.""" + runtime_cls._node_load_context = node_methods._node_load_context + runtime_cls._node_route_intent = node_methods._node_route_intent + runtime_cls._node_handle_small_talk = node_methods._node_handle_small_talk + runtime_cls._node_handle_irrelevant = node_methods._node_handle_irrelevant + runtime_cls._node_handle_needs_clarification = node_methods._node_handle_needs_clarification + runtime_cls._node_handle_query_with_sql = node_methods._node_handle_query_with_sql + runtime_cls._node_handle_query_without_sql = node_methods._node_handle_query_without_sql + runtime_cls._node_handle_follow_up_sql = node_methods._node_handle_follow_up_sql + runtime_cls._node_handle_follow_up_context = node_methods._node_handle_follow_up_context + runtime_cls._run_intent_tool_loop = node_methods._run_intent_tool_loop + runtime_cls._node_finalize_response = node_methods._node_finalize_response + runtime_cls._route_after_intent = staticmethod(node_methods._route_after_intent) + + runtime_cls._build_new_query_messages = message_methods._build_new_query_messages + runtime_cls._build_follow_up_messages = message_methods._build_follow_up_messages + runtime_cls._normalize_conversation_history = staticmethod( + message_methods._normalize_conversation_history + ) + + runtime_cls._execute_tool_loop = tool_loop_methods._execute_tool_loop + runtime_cls._execute_tool_call = tool_loop_methods._execute_tool_call + runtime_cls._build_tool_loop_result = tool_loop_methods._build_tool_loop_result + + runtime_cls._handle_retrieve_docs_tool = tool_handler_methods._handle_retrieve_docs_tool + runtime_cls._handle_get_schema_snippets_tool = ( + tool_handler_methods._handle_get_schema_snippets_tool + ) + runtime_cls._handle_search_dbt_models_tool = ( + tool_handler_methods._handle_search_dbt_models_tool + ) + runtime_cls._handle_get_dbt_model_info_tool = ( + tool_handler_methods._handle_get_dbt_model_info_tool + ) + runtime_cls._handle_get_distinct_values_tool = ( + tool_handler_methods._handle_get_distinct_values_tool + ) + runtime_cls._handle_list_tables_by_keyword_tool = ( + tool_handler_methods._handle_list_tables_by_keyword_tool + ) + runtime_cls._handle_check_table_row_count_tool = ( + tool_handler_methods._handle_check_table_row_count_tool + ) + runtime_cls._get_turn_warehouse_tools = tool_handler_methods._get_turn_warehouse_tools + runtime_cls._get_cached_schema_snippets = tool_handler_methods._get_cached_schema_snippets + runtime_cls._seed_distinct_cache_from_previous_sql = ( + tool_handler_methods._seed_distinct_cache_from_previous_sql + ) + runtime_cls._dbt_resources_by_unique_id = staticmethod( + tool_handler_methods._dbt_resources_by_unique_id + ) + runtime_cls._get_cached_query_embedding = tool_handler_methods._get_cached_query_embedding + + runtime_cls._extract_conversation_context = classmethod( + conversation_methods._extract_conversation_context + ) + runtime_cls._extract_chart_ids_from_payload = staticmethod( + conversation_methods._extract_chart_ids_from_payload + ) + runtime_cls._build_follow_up_context_prompt = classmethod( + conversation_methods._build_follow_up_context_prompt + ) + runtime_cls._detect_sql_modification_type = staticmethod( + conversation_methods._detect_sql_modification_type + ) + runtime_cls._extract_requested_follow_up_dimension = staticmethod( + conversation_methods._extract_requested_follow_up_dimension + ) + runtime_cls._extract_metrics_from_sql = staticmethod( + conversation_methods._extract_metrics_from_sql + ) + runtime_cls._extract_dimensions_from_sql = staticmethod( + conversation_methods._extract_dimensions_from_sql + ) + runtime_cls._extract_filters_from_sql = staticmethod( + conversation_methods._extract_filters_from_sql + ) + + runtime_cls._retrieve_vector_documents = retrieval_methods._retrieve_vector_documents + runtime_cls._filter_allowlisted_dbt_results = staticmethod( + retrieval_methods._filter_allowlisted_dbt_results + ) + runtime_cls._dedupe_retrieved_documents = staticmethod( + retrieval_methods._dedupe_retrieved_documents + ) + runtime_cls._build_citations = retrieval_methods._build_citations + runtime_cls._citation_title = staticmethod(retrieval_methods._citation_title) + runtime_cls._compact_snippet = staticmethod(retrieval_methods._compact_snippet) + runtime_cls._build_tool_document_payload = retrieval_methods._build_tool_document_payload + runtime_cls._build_chart_tool_metadata = classmethod( + retrieval_methods._build_chart_tool_metadata + ) + runtime_cls._prototype_doc_type = staticmethod(retrieval_methods._prototype_doc_type) + runtime_cls._chart_metric_columns = classmethod( + retrieval_methods._chart_metric_columns + ) + runtime_cls._chart_dimension_columns = classmethod( + retrieval_methods._chart_dimension_columns + ) + runtime_cls._chart_time_column = classmethod(retrieval_methods._chart_time_column) + runtime_cls._looks_like_time_dimension = staticmethod( + retrieval_methods._looks_like_time_dimension + ) + runtime_cls._chart_id_from_source_identifier = staticmethod( + source_identifier_methods.chart_id_from_source_identifier + ) + runtime_cls._unique_id_from_source_identifier = staticmethod( + source_identifier_methods.unique_id_from_source_identifier + ) + + runtime_cls._load_session_snapshot = snapshot_methods._load_session_snapshot + runtime_cls._build_session_snapshot = snapshot_methods._build_session_snapshot + runtime_cls._persist_session_schema_cache = snapshot_methods._persist_session_schema_cache + runtime_cls._persist_session_distinct_cache = snapshot_methods._persist_session_distinct_cache + + runtime_cls._validate_sql_allowlist = sql_execution_methods._validate_sql_allowlist + runtime_cls._run_sql_with_distinct_guard = ( + sql_execution_methods._run_sql_with_distinct_guard + ) + runtime_cls._missing_columns_in_primary_table = ( + sql_execution_methods._missing_columns_in_primary_table + ) + runtime_cls._structured_sql_execution_error = ( + sql_execution_methods._structured_sql_execution_error + ) + runtime_cls._validate_follow_up_dimension_usage = ( + sql_execution_methods._validate_follow_up_dimension_usage + ) + runtime_cls._missing_distinct = sql_execution_methods._missing_distinct + runtime_cls._normalize_distinct_value = staticmethod( + sql_execution_methods._normalize_distinct_value + ) + runtime_cls._has_validated_distinct_value = classmethod( + sql_execution_methods._has_validated_distinct_value + ) + runtime_cls._is_text_type = staticmethod(sql_execution_methods._is_text_type) + runtime_cls._record_validated_distinct_values = ( + sql_execution_methods._record_validated_distinct_values + ) + runtime_cls._record_validated_filters_from_sql = ( + sql_execution_methods._record_validated_filters_from_sql + ) + + runtime_cls._primary_table_name = staticmethod(sql_parsing_methods._primary_table_name) + runtime_cls._table_references = classmethod(sql_parsing_methods._table_references) + runtime_cls._resolve_table_qualifier = classmethod( + sql_parsing_methods._resolve_table_qualifier + ) + runtime_cls._table_columns = staticmethod(sql_parsing_methods._table_columns) + runtime_cls._tables_with_column = classmethod(sql_parsing_methods._tables_with_column) + runtime_cls._resolve_identifier_table = classmethod( + sql_parsing_methods._resolve_identifier_table + ) + runtime_cls._referenced_sql_identifier_refs = classmethod( + sql_parsing_methods._referenced_sql_identifier_refs + ) + runtime_cls._select_aliases = staticmethod(sql_parsing_methods._select_aliases) + runtime_cls._extract_identifier_refs_from_sql_segment = staticmethod( + sql_parsing_methods._extract_identifier_refs_from_sql_segment + ) + runtime_cls._best_table_for_missing_columns = staticmethod( + sql_parsing_methods._best_table_for_missing_columns + ) + runtime_cls._extract_text_filter_values = staticmethod( + sql_parsing_methods._extract_text_filter_values + ) + runtime_cls._find_tables_with_column = staticmethod( + sql_parsing_methods._find_tables_with_column + ) + runtime_cls._structural_dimensions_from_sql = classmethod( + sql_parsing_methods._structural_dimensions_from_sql + ) + runtime_cls._normalize_dimension_name = staticmethod( + sql_parsing_methods._normalize_dimension_name + ) + + runtime_cls._serialize_tool_result = staticmethod( + presentation_methods._serialize_tool_result + ) + runtime_cls._summarize_tool_call = presentation_methods._summarize_tool_call + runtime_cls._max_turns_message = presentation_methods._max_turns_message + runtime_cls._compose_final_answer_text = presentation_methods._compose_final_answer_text + runtime_cls._determine_response_format = staticmethod( + presentation_methods._determine_response_format + ) + runtime_cls._sql_result_columns = staticmethod( + presentation_methods._sql_result_columns + ) + runtime_cls._build_usage_summary = presentation_methods._build_usage_summary + runtime_cls._compose_small_talk_response = ( + presentation_methods._compose_small_talk_response + ) + runtime_cls._build_fast_path_intent = staticmethod( + presentation_methods._build_fast_path_intent + ) + runtime_cls._build_fast_path_small_talk_response = staticmethod( + presentation_methods._build_fast_path_small_talk_response + ) + runtime_cls._clarification_fallback = staticmethod( + presentation_methods._clarification_fallback + ) + runtime_cls._fallback_answer_text = staticmethod( + presentation_methods._fallback_answer_text + ) + runtime_cls._single_row_summary = staticmethod( + presentation_methods._single_row_summary + ) + runtime_cls._humanize_column_name = staticmethod( + presentation_methods._humanize_column_name + ) + runtime_cls._normalize_sql_results_for_answer = classmethod( + presentation_methods._normalize_sql_results_for_answer + ) + runtime_cls._normalize_sql_value_for_answer = classmethod( + presentation_methods._normalize_sql_value_for_answer + ) + runtime_cls._format_numeric_answer_value = classmethod( + presentation_methods._format_numeric_answer_value + ) + runtime_cls._parse_numeric_string = staticmethod( + presentation_methods._parse_numeric_string + ) + runtime_cls._looks_like_rate_metric = staticmethod( + presentation_methods._looks_like_rate_metric + ) diff --git a/ddpui/core/dashboard_chat/graph/conversation.py b/ddpui/core/dashboard_chat/graph/conversation.py new file mode 100644 index 000000000..9fdbb6a5a --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/conversation.py @@ -0,0 +1,223 @@ +"""Conversation-history helpers for dashboard chat graph execution.""" + +from collections.abc import Sequence +import re +from typing import Any + +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationContext, + DashboardChatConversationMessage, +) +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard + +from .source_identifiers import chart_id_from_source_identifier + + +def _extract_conversation_context( + cls, + conversation_history: Sequence[DashboardChatConversationMessage], +) -> DashboardChatConversationContext: + """Extract reusable conversation context like the prototype conversation manager.""" + context = DashboardChatConversationContext() + recent_history = list(conversation_history)[-10:] + + for message in reversed(recent_history): + if message.role != "assistant": + continue + + payload = message.payload or {} + sql = payload.get("sql") + metadata = payload.get("metadata") or {} + citations = payload.get("citations") or [] + chart_ids = cls._extract_chart_ids_from_payload(payload) + + if chart_ids and context.last_sql_query and not context.last_chart_ids: + context = DashboardChatConversationContext( + last_sql_query=context.last_sql_query, + last_tables_used=context.last_tables_used, + last_chart_ids=chart_ids, + last_metrics=context.last_metrics, + last_dimensions=context.last_dimensions, + last_filters=context.last_filters, + last_response_type=context.last_response_type, + last_answer_text=context.last_answer_text, + last_intent=context.last_intent, + ) + break + + if sql and not context.last_sql_query: + tables = [ + str(table_name).lower() + for table_name in metadata.get("query_plan_tables") or [] + if table_name + ] + if not tables: + tables = [ + str(citation.get("table_name")).lower() + for citation in citations + if citation.get("table_name") + ] + if not tables: + tables = DashboardChatSqlGuard._extract_table_names(str(sql)) + context = DashboardChatConversationContext( + last_sql_query=str(sql), + last_tables_used=list(dict.fromkeys(tables)), + last_chart_ids=chart_ids, + last_metrics=cls._extract_metrics_from_sql(str(sql)), + last_dimensions=cls._extract_dimensions_from_sql(str(sql)), + last_filters=cls._extract_filters_from_sql(str(sql)), + last_response_type="sql_result", + last_answer_text=message.content, + last_intent=str(payload.get("intent") or ""), + ) + if chart_ids: + break + continue + + if payload and context.last_response_type is None: + context = DashboardChatConversationContext( + last_chart_ids=chart_ids, + last_response_type="metadata_answer", + last_answer_text=message.content, + last_intent=str(payload.get("intent") or ""), + ) + + return context + + +def _extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: + """Extract chart ids from persisted metadata/citations like the prototype chat history.""" + metadata = payload.get("metadata") or {} + chart_ids = [str(chart_id) for chart_id in metadata.get("chart_ids_used") or [] if chart_id] + if chart_ids: + return list(dict.fromkeys(chart_ids)) + + extracted_chart_ids: list[str] = [] + for citation in payload.get("citations") or []: + source_identifier = str(citation.get("source_identifier") or "") + chart_id = chart_id_from_source_identifier(source_identifier) + if chart_id is not None: + extracted_chart_ids.append(str(chart_id)) + return list(dict.fromkeys(extracted_chart_ids)) + + +def _build_follow_up_context_prompt( + cls, + conversation_context: DashboardChatConversationContext, + user_query: str, +) -> str: + """Build the prototype follow-up context prompt.""" + return "\n".join( + [ + "PREVIOUS QUERY CONTEXT:", + f"Last SQL: {conversation_context.last_sql_query or 'None'}", + f"Tables used: {', '.join(conversation_context.last_tables_used) or 'None'}", + f"Metrics: {', '.join(conversation_context.last_metrics) or 'None'}", + f"Dimensions: {', '.join(conversation_context.last_dimensions) or 'None'}", + f"Filters: {', '.join(conversation_context.last_filters) or 'None'}", + "", + f"NEW INSTRUCTION: {user_query}", + "", + "TASK: Modify the previous query based on the new instruction. Reuse tables and context where possible.", + ] + ) + + +def _detect_sql_modification_type(user_query: str) -> str: + """Detect the same coarse follow-up modification categories as the prototype.""" + lowered_query = user_query.lower() + if any(keyword in lowered_query for keyword in ["by", "split by", "break down", "group by"]): + return "add_dimension" + if any(keyword in lowered_query for keyword in ["filter", "only", "exclude", "where"]): + return "add_filter" + if any( + keyword in lowered_query + for keyword in ["last", "this", "previous", "next", "monthly", "weekly", "quarterly"] + ): + return "modify_timeframe" + if any( + keyword in lowered_query + for keyword in ["total", "sum", "count", "average", "avg", "maximum", "minimum"] + ): + return "change_aggregation" + return "general_modification" + + +def _extract_requested_follow_up_dimension(text: str) -> str | None: + """Extract the requested follow-up dimension and normalize natural-language spaces.""" + normalized_text = text.strip().lower() + patterns = [ + r"split\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"break\s+down\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"group\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + r"\bby\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", + ] + for pattern in patterns: + match = re.search(pattern, normalized_text) + if not match: + continue + candidate = re.split( + r"\b(with|for|in|across|between)\b", + match.group(1), + maxsplit=1, + )[0] + candidate = re.sub(r"[^a-zA-Z0-9_\s]", " ", candidate) + normalized_candidate = "_".join(part for part in candidate.split() if part) + if normalized_candidate: + return normalized_candidate + return None + + +def _extract_metrics_from_sql(sql: str) -> list[str]: + """Extract aggregate expressions from the previous SQL for follow-up prompts.""" + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return [] + metrics: list[str] = [] + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + normalized_expression = expression.strip() + if normalized_expression and DashboardChatSqlGuard._contains_aggregate( + normalized_expression + ): + metrics.append(normalized_expression) + return metrics[:5] + + +def _extract_dimensions_from_sql(sql: str) -> list[str]: + """Extract GROUP BY dimensions from the previous SQL.""" + match = re.search( + r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not match: + return [] + return [ + dimension.strip().strip('`"') + for dimension in match.group(1).split(",") + if dimension.strip() + ][:5] + + +def _extract_filters_from_sql(sql: str) -> list[str]: + """Extract WHERE-clause filters from the previous SQL.""" + match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not match: + return [] + + where_clause = match.group(1).strip() + filters: list[str] = [] + for pattern in [ + r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", + r"([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\([^)]+\)", + ]: + for filter_match in re.findall(pattern, where_clause, flags=re.IGNORECASE): + if isinstance(filter_match, tuple) and len(filter_match) == 2: + filters.append(f"{filter_match[0]} = {filter_match[1]}") + else: + filters.append(str(filter_match)) + return filters[:5] diff --git a/ddpui/core/dashboard_chat/graph/definition.py b/ddpui/core/dashboard_chat/graph/definition.py new file mode 100644 index 000000000..9c3eef697 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/definition.py @@ -0,0 +1,45 @@ +"""Graph definition helpers for dashboard chat orchestration.""" + +from langgraph.graph import END, START, StateGraph + +from .state import DashboardChatRuntimeState + + +def build_dashboard_chat_graph(runtime): + """Build the explicit prototype-aligned intent graph.""" + graph = StateGraph(DashboardChatRuntimeState) + graph.add_node("load_context", runtime._node_load_context) + graph.add_node("route_intent", runtime._node_route_intent) + graph.add_node("handle_small_talk", runtime._node_handle_small_talk) + graph.add_node("handle_irrelevant", runtime._node_handle_irrelevant) + graph.add_node("handle_needs_clarification", runtime._node_handle_needs_clarification) + graph.add_node("handle_query_with_sql", runtime._node_handle_query_with_sql) + graph.add_node("handle_query_without_sql", runtime._node_handle_query_without_sql) + graph.add_node("handle_follow_up_sql", runtime._node_handle_follow_up_sql) + graph.add_node("handle_follow_up_context", runtime._node_handle_follow_up_context) + graph.add_node("finalize", runtime._node_finalize_response) + + graph.add_edge(START, "load_context") + graph.add_edge("load_context", "route_intent") + graph.add_conditional_edges( + "route_intent", + runtime._route_after_intent, + { + "small_talk": "handle_small_talk", + "irrelevant": "handle_irrelevant", + "needs_clarification": "handle_needs_clarification", + "query_with_sql": "handle_query_with_sql", + "query_without_sql": "handle_query_without_sql", + "follow_up_sql": "handle_follow_up_sql", + "follow_up_context": "handle_follow_up_context", + }, + ) + graph.add_edge("handle_small_talk", "finalize") + graph.add_edge("handle_irrelevant", "finalize") + graph.add_edge("handle_needs_clarification", "finalize") + graph.add_edge("handle_query_with_sql", "finalize") + graph.add_edge("handle_query_without_sql", "finalize") + graph.add_edge("handle_follow_up_sql", "finalize") + graph.add_edge("handle_follow_up_context", "finalize") + graph.add_edge("finalize", END) + return graph.compile() diff --git a/ddpui/core/dashboard_chat/graph/message_stack.py b/ddpui/core/dashboard_chat/graph/message_stack.py new file mode 100644 index 000000000..232b73bc8 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/message_stack.py @@ -0,0 +1,71 @@ +"""Prompt-message stack helpers for dashboard chat graph execution.""" + +from collections.abc import Sequence +from typing import Any + +from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage +from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey + +from .state import DashboardChatRuntimeState + + +def _build_new_query_messages( + self, + state: DashboardChatRuntimeState, +) -> list[dict[str, Any]]: + """Build the prototype new-query message stack.""" + system_prompt = self.llm_client.get_prompt( + DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM + ) + return [ + { + "role": "system", + "content": system_prompt, + }, + {"role": "user", "content": state["user_query"]}, + ] + + +def _build_follow_up_messages( + self, + state: DashboardChatRuntimeState, +) -> list[dict[str, Any]]: + """Build the prototype follow-up message stack.""" + modification_type = self._detect_sql_modification_type(state["user_query"]) + system_prompt = self.llm_client.get_prompt( + DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM + ) + return [ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "system", + "content": self._build_follow_up_context_prompt( + state["conversation_context"], + state["user_query"], + ), + }, + {"role": "system", "content": f"MODIFICATION_TYPE: {modification_type}"}, + {"role": "user", "content": state["user_query"]}, + ] + + +def _normalize_conversation_history( + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, +) -> list[DashboardChatConversationMessage]: + """Normalize stored history into the typed runtime message format.""" + normalized_messages: list[DashboardChatConversationMessage] = [] + for item in conversation_history or []: + if isinstance(item, DashboardChatConversationMessage): + normalized_messages.append(item) + continue + normalized_messages.append( + DashboardChatConversationMessage( + role=str(item.get("role") or "user"), + content=str(item.get("content") or ""), + payload=item.get("payload") or {}, + ) + ) + return normalized_messages diff --git a/ddpui/core/dashboard_chat/graph/nodes.py b/ddpui/core/dashboard_chat/graph/nodes.py new file mode 100644 index 000000000..04225c8f3 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/nodes.py @@ -0,0 +1,241 @@ +"""LangGraph node handlers for dashboard chat.""" + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatCitation, + DashboardChatIntent, + DashboardChatResponse, +) + +from .state import DashboardChatRuntimeState + + +def _node_load_context(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Load or reuse the session-stable dashboard context snapshot.""" + snapshot = self._load_session_snapshot(state) + state["dashboard_export"] = snapshot["dashboard_export"] + state["dbt_index"] = snapshot["dbt_index"] + state["allowlist"] = snapshot["allowlist"] + state["session_schema_cache"] = snapshot["schema_cache"] + state["session_distinct_cache"] = snapshot["distinct_cache"] + return state + + +def _node_route_intent(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + """Use the prototype router prompt for all non-trivial routing.""" + conversation_context = self._extract_conversation_context(state["conversation_history"]) + fast_path_intent = self._build_fast_path_intent(state["user_query"]) + if fast_path_intent is not None: + state["conversation_context"] = conversation_context + state["intent_decision"] = fast_path_intent + state["small_talk_response"] = self._build_fast_path_small_talk_response( + state["user_query"] + ) + return state + intent_decision = self.llm_client.classify_intent( + user_query=state["user_query"], + conversation_context=conversation_context, + ) + state["conversation_context"] = conversation_context + state["intent_decision"] = intent_decision + return state + + +def _node_handle_small_talk( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Handle simple social turns without any tool use.""" + state["response"] = DashboardChatResponse( + answer_text=state.get("small_talk_response") + or self._compose_small_talk_response(state["user_query"]), + intent=DashboardChatIntent.SMALL_TALK, + usage=self._build_usage_summary(), + ) + return state + + +def _node_handle_irrelevant( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Handle questions outside dashboard chat scope.""" + state["response"] = DashboardChatResponse( + answer_text=( + "I can only answer questions about this dashboard, its charts, and the data behind them." + ), + intent=DashboardChatIntent.IRRELEVANT, + usage=self._build_usage_summary(), + ) + return state + + +def _node_handle_needs_clarification( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Ask for clarification when the router says the query is underspecified.""" + intent_decision = state["intent_decision"] + state["response"] = DashboardChatResponse( + answer_text=( + intent_decision.clarification_question + or self._clarification_fallback(intent_decision.missing_info) + ), + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + usage=self._build_usage_summary(), + ) + return state + + +def _node_handle_query_with_sql( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Run the prototype new-query tool loop for SQL-routed questions.""" + return self._run_intent_tool_loop(state, max_turns=15, follow_up=False) + + +def _node_handle_query_without_sql( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Run the prototype new-query tool loop for context-only questions.""" + return self._run_intent_tool_loop(state, max_turns=15, follow_up=False) + + +def _node_handle_follow_up_sql( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Run the prototype follow-up loop for SQL-modifying turns.""" + return self._run_intent_tool_loop(state, max_turns=6, follow_up=True) + + +def _node_handle_follow_up_context( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Run the prototype follow-up loop for explanatory follow-ups.""" + return self._run_intent_tool_loop(state, max_turns=6, follow_up=True) + + +def _run_intent_tool_loop( + self, + state: DashboardChatRuntimeState, + *, + max_turns: int, + follow_up: bool, +) -> DashboardChatRuntimeState: + """Execute one prototype-style tool loop and store the response on state.""" + allowlist = state["allowlist"] + + query_embedding = self._get_cached_query_embedding( + state["user_query"], + embedding_cache={}, + ) + + messages = ( + self._build_follow_up_messages(state) + if follow_up + else self._build_new_query_messages(state) + ) + execution_result = self._execute_tool_loop( + state=state, + messages=messages, + max_turns=max_turns, + initial_embedding_cache={state["user_query"]: query_embedding}, + ) + + state["retrieved_documents"] = execution_result["retrieved_documents"] + state["citations"] = self._build_citations( + retrieved_documents=execution_result["retrieved_documents"], + dashboard_export=state["dashboard_export"], + allowlist=allowlist, + ) + state["tool_calls"] = execution_result["tool_calls"] + state["sql"] = execution_result["sql"] + state["sql_validation"] = execution_result["sql_validation"] + state["sql_results"] = execution_result["sql_results"] + state["warnings"] = execution_result["warnings"] + response_format = self._determine_response_format( + user_query=state["user_query"], + sql_results=execution_result["sql_results"], + ) + state["response"] = DashboardChatResponse( + answer_text=self._compose_final_answer_text( + state, + execution_result, + response_format=response_format, + ), + intent=state["intent_decision"].intent, + citations=state["citations"], + warnings=execution_result["warnings"], + sql=execution_result["sql"], + sql_results=execution_result["sql_results"], + usage=self._build_usage_summary(), + tool_calls=execution_result["tool_calls"], + metadata={ + "response_format": response_format, + "table_columns": self._sql_result_columns(execution_result["sql_results"]), + }, + ) + return state + + +def _node_finalize_response( + self, + state: DashboardChatRuntimeState, +) -> DashboardChatRuntimeState: + """Attach warehouse citations and metadata to the finished response.""" + response = state["response"] + citations = list(response.citations) + sql_validation = state.get("sql_validation") + if ( + sql_validation is not None + and sql_validation.is_valid + and sql_validation.sanitized_sql is not None + ): + citations.extend( + DashboardChatCitation( + source_type="warehouse_table", + source_identifier=table_name, + title=f"Warehouse table: {table_name}", + snippet=f"SQL executed against {table_name}.", + table_name=table_name, + ) + for table_name in sql_validation.tables + if table_name + ) + + allowlist = state.get("allowlist") or DashboardChatAllowlist() + response_metadata = dict(response.metadata) + response_metadata.update( + { + "dashboard_id": state["dashboard_id"], + "retrieved_document_ids": [ + document.document_id for document in state.get("retrieved_documents") or [] + ], + "allowlisted_tables": sorted(allowlist.allowed_tables), + "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], + "intent_reason": state["intent_decision"].reason, + "missing_info": state["intent_decision"].missing_info, + "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, + } + ) + state["response"] = DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ) + return state + + +def _route_after_intent(state: DashboardChatRuntimeState) -> str: + """Route to one explicit handler per prototype intent.""" + return state["intent_decision"].intent.value diff --git a/ddpui/core/dashboard_chat/graph/orchestrator.py b/ddpui/core/dashboard_chat/graph/orchestrator.py new file mode 100644 index 000000000..7651fcea2 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/orchestrator.py @@ -0,0 +1,88 @@ +"""Main dashboard chat LangGraph orchestrator.""" + +from collections.abc import Callable, Sequence +from functools import lru_cache +from typing import Any + +from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig +from ddpui.core.dashboard_chat.llm_client import DashboardChatLlmClient +from ddpui.core.dashboard_chat.openai_llm_client import OpenAIDashboardChatLlmClient +from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseTools +from ddpui.models.org import Org + +from .bindings import bind_dashboard_chat_runtime_methods +from .definition import build_dashboard_chat_graph +from .state import DashboardChatRuntimeState, GREETING_PATTERN +from .tool_specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS + + +class DashboardChatRuntime: + """Run dashboard chat turns with the prototype's explicit intent routing and tool loop.""" + + TOOL_SPECIFICATIONS = DASHBOARD_CHAT_TOOL_SPECIFICATIONS + + def __init__( + self, + vector_store: ChromaDashboardChatVectorStore | None = None, + llm_client: DashboardChatLlmClient | None = None, + warehouse_tools_factory: Callable[[Org], DashboardChatWarehouseTools] | None = None, + runtime_config: DashboardChatRuntimeConfig | None = None, + source_config: DashboardChatSourceConfig | None = None, + ): + self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() + self.source_config = source_config or DashboardChatSourceConfig.from_env() + self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.llm_client = llm_client or OpenAIDashboardChatLlmClient( + model=self.runtime_config.llm_model, + timeout_ms=self.runtime_config.llm_timeout_ms, + max_attempts=self.runtime_config.llm_max_attempts, + ) + self.warehouse_tools_factory = warehouse_tools_factory or ( + lambda org: DashboardChatWarehouseTools( + org=org, + max_rows=self.runtime_config.max_query_rows, + ) + ) + self.graph = build_dashboard_chat_graph(self) + + def run( + self, + org: Org, + dashboard_id: int, + user_query: str, + session_id: str | None = None, + vector_collection_name: str | None = None, + conversation_history: Sequence[dict[str, Any]] | None = None, + ): + """Run one dashboard chat turn.""" + if hasattr(self.llm_client, "reset_usage"): + self.llm_client.reset_usage() + if hasattr(self.vector_store, "reset_usage"): + self.vector_store.reset_usage() + initial_state: DashboardChatRuntimeState = { + "org": org, + "dashboard_id": dashboard_id, + "session_id": session_id, + "vector_collection_name": vector_collection_name, + "user_query": user_query, + "conversation_history": self._normalize_conversation_history(conversation_history), + "warnings": [], + "usage": {}, + } + final_state = self.graph.invoke(initial_state) + return final_state["response"] + +@lru_cache(maxsize=1) +def get_dashboard_chat_runtime() -> DashboardChatRuntime: + """Return the shared dashboard chat runtime used by live chat turns.""" + return DashboardChatRuntime() + +bind_dashboard_chat_runtime_methods(DashboardChatRuntime) + +__all__ = [ + "DashboardChatRuntime", + "DashboardChatRuntimeState", + "GREETING_PATTERN", + "get_dashboard_chat_runtime", +] diff --git a/ddpui/core/dashboard_chat/graph/presentation.py b/ddpui/core/dashboard_chat/graph/presentation.py new file mode 100644 index 000000000..0e2920d66 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/presentation.py @@ -0,0 +1,338 @@ +"""Answer composition and display-shaping helpers for dashboard chat.""" + +from collections.abc import Sequence +import logging +from typing import Any + +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatRetrievedDocument, +) + +from .state import DashboardChatRuntimeState, GREETING_PATTERN + +logger = logging.getLogger(__name__) + + +def _serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: + """Trim large tool payloads before feeding them back into the model.""" + serialized = dict(result) + docs = serialized.get("docs") + if isinstance(docs, list) and len(docs) > 6: + serialized["docs"] = docs[:6] + rows = serialized.get("rows") + if isinstance(rows, list) and len(rows) > 5: + serialized["rows"] = rows[:5] + values = serialized.get("values") + if isinstance(values, list) and len(values) > 20: + serialized["values"] = values[:20] + return serialized + + +def _summarize_tool_call( + self, + *, + tool_name: str, + args: dict[str, Any], + result: dict[str, Any], +) -> dict[str, Any]: + """Persist a compact execution trace for one tool call.""" + entry: dict[str, Any] = {"name": tool_name, "args": args} + if tool_name == "retrieve_docs": + entry["count"] = result.get("count", 0) + entry["doc_ids"] = [doc.get("doc_id") for doc in result.get("docs", [])[:6]] + elif tool_name == "get_schema_snippets": + entry["tables"] = [table.get("table") for table in result.get("tables", [])] + elif tool_name == "search_dbt_models": + entry["count"] = result.get("count", 0) + entry["models"] = [ + model.get("table") or model.get("name") for model in result.get("models", []) + ] + elif tool_name == "get_dbt_model_info": + entry["model"] = result.get("model") + entry["column_count"] = len(result.get("columns") or []) + elif tool_name == "get_distinct_values": + entry["error"] = result.get("error") + entry["count"] = result.get("count", 0) + entry["values_sample"] = (result.get("values") or [])[:10] + elif tool_name == "list_tables_by_keyword": + entry["tables"] = [table.get("table") for table in result.get("tables", [])] + elif tool_name == "check_table_row_count": + entry["row_count"] = result.get("row_count") + elif tool_name == "run_sql_query": + entry["success"] = result.get("success", False) + entry["row_count"] = result.get("row_count", 0) + entry["sql_used"] = result.get("sql_used") + entry["error"] = result.get("error") + else: + entry["result"] = result + return entry + + +def _max_turns_message( + self, + user_query: str, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], +) -> str: + """Return a bounded fallback when the prototype tool loop exhausts its budget.""" + if retrieved_documents: + return ( + "I found relevant dashboard context, but I couldn't complete the analysis safely. " + "Please rephrase the question or ask about a metric shown on this dashboard." + ) + return ( + f"I couldn't find enough dashboard-backed context to answer: {user_query}. " + "Please rephrase or ask about a metric shown on this dashboard." + ) + + +def _compose_final_answer_text( + self, + state: DashboardChatRuntimeState, + execution_result: dict[str, Any], + *, + response_format: str, +) -> str: + """Compose one final markdown answer for all non-trivial routes.""" + normalized_sql_results = self._normalize_sql_results_for_answer( + execution_result.get("sql_results") + ) + draft_answer = (execution_result.get("answer_text") or "").strip() or None + if hasattr(self.llm_client, "compose_final_answer"): + try: + answer_text = self.llm_client.compose_final_answer( + user_query=state["user_query"], + intent=state["intent_decision"].intent, + response_format=response_format, + draft_answer=draft_answer, + retrieved_documents=list(execution_result.get("retrieved_documents") or []), + sql=execution_result.get("sql"), + sql_results=normalized_sql_results, + warnings=list(execution_result.get("warnings") or []), + ) + if answer_text: + return answer_text + except Exception: + logger.exception("Dashboard chat final answer composition failed") + return self._fallback_answer_text( + execution_result.get("retrieved_documents") or [], + normalized_sql_results, + response_format=response_format, + draft_answer=draft_answer, + ) + + +def _determine_response_format( + *, + user_query: str, + sql_results: list[dict[str, Any]] | None, +) -> str: + """Return how the frontend should present the final answer.""" + if not sql_results: + return "text" + first_row = sql_results[0] if sql_results else {} + column_count = len(first_row.keys()) if isinstance(first_row, dict) else 0 + normalized_query = user_query.lower() + tableish_keywords = [ + "breakdown", + "split by", + "list", + "table", + "tabular", + "rank", + "ranking", + "top ", + "bottom ", + "wise", + ] + if "table" in normalized_query and column_count > 0: + return "table" + if len(sql_results) > 1 and column_count > 1: + return "text_with_table" + if any(keyword in normalized_query for keyword in tableish_keywords) and column_count > 1: + return "text_with_table" + return "text" + + +def _sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: + """Return table columns for frontend rendering metadata.""" + if not sql_results: + return [] + first_row = sql_results[0] + if not isinstance(first_row, dict): + return [] + return list(first_row.keys()) + + +def _build_usage_summary(self) -> dict[str, Any]: + """Collect per-turn usage from the llm client and embedding provider when supported.""" + usage: dict[str, Any] = {} + if hasattr(self.llm_client, "usage_summary"): + llm_usage = self.llm_client.usage_summary() + if llm_usage: + usage["llm"] = llm_usage + if hasattr(self.vector_store, "usage_summary"): + embedding_usage = self.vector_store.usage_summary() + if embedding_usage: + usage["embeddings"] = embedding_usage + return usage + + +def _compose_small_talk_response(self, user_query: str) -> str: + """Generate the prototype small-talk response or fall back to a fixed helper.""" + if hasattr(self.llm_client, "compose_small_talk"): + try: + return self.llm_client.compose_small_talk(user_query) + except Exception: + logger.exception("Dashboard chat small-talk generation failed") + return "Hi! I can help with your program data and metrics. What would you like to know?" + + +def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: + """Handle obvious greetings and thanks without an llm round trip.""" + if not GREETING_PATTERN.match(user_query.strip()): + return None + return DashboardChatIntentDecision( + intent=DashboardChatIntent.SMALL_TALK, + confidence=1.0, + reason="Obvious greeting or thanks", + ) + + +def _build_fast_path_small_talk_response(user_query: str) -> str: + """Keep greeting replies instant and deterministic.""" + normalized_query = user_query.strip().lower() + if "thank" in normalized_query: + return "You're welcome. Ask me anything about this dashboard or its data." + if "good morning" in normalized_query: + return "Good morning. Ask me anything about this dashboard or the data behind it." + if "good afternoon" in normalized_query: + return "Good afternoon. Ask me anything about this dashboard or the data behind it." + if "good evening" in normalized_query: + return "Good evening. Ask me anything about this dashboard or the data behind it." + return "Hi. Ask me anything about this dashboard or the data behind it." + + +def _clarification_fallback(missing_info: Sequence[str]) -> str: + """Mirror the prototype's specific clarification nudges when the router omits a question.""" + missing = {item.lower() for item in missing_info} + prompts: list[str] = [] + if "metric" in missing: + prompts.append("which metric") + if "time_range" in missing or "time period" in missing: + prompts.append("what time period") + if "dimension" in missing: + prompts.append("which breakdown or dimension") + if not prompts: + return "Could you be more specific about the metric, program, or time period you want?" + return "Could you clarify " + ", ".join(prompts) + "?" + + +def _fallback_answer_text( + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + sql_results: list[dict[str, Any]] | None, + *, + response_format: str = "text", + draft_answer: str | None = None, +) -> str: + """Fallback response when the model returns no final text.""" + if draft_answer: + return draft_answer + if sql_results is not None: + if not sql_results: + return "I didn't find any matching rows for that question." + if response_format in {"text_with_table", "table"}: + return f"I found {len(sql_results)} matching rows. See the table below for the breakdown." + if len(sql_results) == 1: + return _single_row_summary(sql_results[0]) + return f"I found {len(sql_results)} matching rows." + if retrieved_documents: + return _compact_snippet(retrieved_documents[0].content) + return "I couldn't find enough context to answer that." + + +def _single_row_summary(row: dict[str, Any]) -> str: + """Return a readable fallback when one structured row is available.""" + parts = [ + f"{_humanize_column_name(column)}: {value}" + for column, value in row.items() + ] + return "; ".join(parts) + + +def _humanize_column_name(column_name: str) -> str: + """Convert snake_case warehouse columns into human labels.""" + return str(column_name).replace("_", " ").strip().title() + + +def _normalize_sql_results_for_answer( + cls, + sql_results: list[dict[str, Any]] | None, +) -> list[dict[str, Any]] | None: + """Normalize SQL results into llm-friendly values for final answer writing.""" + if sql_results is None: + return None + normalized_rows: list[dict[str, Any]] = [] + for row in sql_results: + normalized_row: dict[str, Any] = {} + for column_name, value in row.items(): + normalized_row[column_name] = cls._normalize_sql_value_for_answer( + column_name, + value, + ) + normalized_rows.append(normalized_row) + return normalized_rows + + +def _normalize_sql_value_for_answer(cls, column_name: str, value: Any) -> Any: + """Format warehouse values into user-friendly forms for answer composition.""" + if value is None: + return None + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return cls._format_numeric_answer_value(column_name, value) + text_value = str(value) + numeric_value = cls._parse_numeric_string(text_value) + if numeric_value is None: + return text_value + return cls._format_numeric_answer_value(column_name, numeric_value) + + +def _format_numeric_answer_value(cls, column_name: str, value: float | int) -> str | int | float: + """Format numeric values for answer composition.""" + if cls._looks_like_rate_metric(column_name) and 0 <= float(value) <= 1: + percentage_value = f"{float(value) * 100:.1f}".rstrip("0").rstrip(".") + return f"{percentage_value}%" + rounded_value = round(float(value), 2) + if float(rounded_value).is_integer(): + return int(rounded_value) + return f"{rounded_value:.2f}".rstrip("0").rstrip(".") + + +def _parse_numeric_string(value: str) -> float | None: + """Parse decimal-like strings emitted by DjangoJSONEncoder.""" + normalized_value = value.strip() + if not normalized_value: + return None + import re + + if not re.fullmatch(r"-?\d+(?:\.\d+)?(?:E-?\d+)?", normalized_value, flags=re.IGNORECASE): + return None + try: + return float(normalized_value) + except ValueError: + return None + + +def _looks_like_rate_metric(column_name: str) -> bool: + """Return whether a metric name likely represents a percentage/rate.""" + normalized_column = column_name.lower() + return any( + token in normalized_column + for token in ["rate", "ratio", "percentage", "percent", "share", "pct"] + ) + + +from .retrieval import _compact_snippet diff --git a/ddpui/core/dashboard_chat/graph/retrieval.py b/ddpui/core/dashboard_chat/graph/retrieval.py new file mode 100644 index 000000000..8ac453911 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/retrieval.py @@ -0,0 +1,308 @@ +"""Retrieval and citation helpers for dashboard chat graph execution.""" + +from collections.abc import Sequence +from typing import Any + +from ddpui.core.dashboard_chat.allowlist import ( + DashboardChatAllowlist, + build_dashboard_chat_table_name, +) +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatCitation, + DashboardChatRetrievedDocument, +) +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType + +from .source_identifiers import ( + chart_id_from_source_identifier, + unique_id_from_source_identifier, +) + + +def _retrieve_vector_documents( + self, + *, + org, + collection_name: str | None, + query_text: str, + source_types, + dashboard_id: int | None = None, + query_embedding: list[float] | None = None, +) -> list[DashboardChatRetrievedDocument]: + """Query chroma and normalize the results.""" + if not source_types: + return [] + + results = self.vector_store.query( + org.id, + query_text=query_text, + n_results=self.runtime_config.retrieval_limit, + source_types=source_types, + dashboard_id=dashboard_id, + query_embedding=query_embedding, + collection_name=collection_name, + ) + return [ + DashboardChatRetrievedDocument( + document_id=result.document_id, + source_type=str(result.metadata.get("source_type") or ""), + source_identifier=str(result.metadata.get("source_identifier") or ""), + content=result.content, + dashboard_id=result.metadata.get("dashboard_id"), + distance=result.distance, + ) + for result in results + ] + + +def _filter_allowlisted_dbt_results( + results: Sequence[DashboardChatRetrievedDocument], + allowlist: DashboardChatAllowlist, +) -> list[DashboardChatRetrievedDocument]: + """Keep only dbt docs that belong to the dashboard lineage.""" + filtered_results: list[DashboardChatRetrievedDocument] = [] + for result in results: + unique_id = _unique_id_from_source_identifier(result.source_identifier) + if allowlist.is_unique_id_allowed(unique_id): + filtered_results.append(result) + return filtered_results + + +def _dedupe_retrieved_documents( + results: Sequence[DashboardChatRetrievedDocument], +) -> list[DashboardChatRetrievedDocument]: + """Deduplicate retrieved documents while preserving better-ranked items.""" + scored_results: list[tuple[float, DashboardChatRetrievedDocument]] = [] + for result in results: + scored_results.append((result.distance if result.distance is not None else 999.0, result)) + + merged_results: list[DashboardChatRetrievedDocument] = [] + seen_document_ids: set[str] = set() + for _, result in sorted(scored_results, key=lambda item: item[0]): + if result.document_id in seen_document_ids: + continue + merged_results.append(result) + seen_document_ids.add(result.document_id) + return merged_results + + +def _build_citations( + self, + *, + retrieved_documents: Sequence[DashboardChatRetrievedDocument], + dashboard_export: dict[str, Any], + allowlist: DashboardChatAllowlist, +) -> list[DashboardChatCitation]: + """Build citations from the retrieved tool-loop documents.""" + dashboard_title = dashboard_export["dashboard"].get("title") or "Current dashboard" + chart_lookup = { + chart.get("id"): chart.get("title") or f"Chart {chart.get('id')}" + for chart in dashboard_export.get("charts") or [] + } + citations: list[DashboardChatCitation] = [] + for document in retrieved_documents[:6]: + table_name = None + if document.source_type in { + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + }: + unique_id = unique_id_from_source_identifier(document.source_identifier) + table_name = allowlist.unique_id_to_table.get(unique_id) if unique_id else None + citations.append( + DashboardChatCitation( + source_type=document.source_type, + source_identifier=document.source_identifier, + title=self._citation_title( + document=document, + dashboard_title=dashboard_title, + chart_lookup=chart_lookup, + table_name=table_name, + ), + snippet=_compact_snippet(document.content), + dashboard_id=document.dashboard_id, + table_name=table_name, + ) + ) + return citations + + +def _citation_title( + *, + document: DashboardChatRetrievedDocument, + dashboard_title: str, + chart_lookup: dict[int, str], + table_name: str | None, +) -> str: + """Map a retrieved document into a human-readable citation title.""" + if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: + return "Organization context" + if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: + return f"Dashboard context: {dashboard_title}" + if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + chart_id = chart_id_from_source_identifier(document.source_identifier) + if chart_id is not None and chart_id in chart_lookup: + return f"Chart: {chart_lookup[chart_id]}" + return f"Dashboard export: {dashboard_title}" + if document.source_type == DashboardChatSourceType.DBT_MANIFEST.value: + return f"dbt manifest: {table_name or document.source_identifier}" + if document.source_type == DashboardChatSourceType.DBT_CATALOG.value: + return f"dbt catalog: {table_name or document.source_identifier}" + return document.source_identifier + + +def _compact_snippet(content: str, max_length: int = 220) -> str: + """Collapse whitespace and trim long snippets for citations and suggestions.""" + normalized = " ".join(content.split()) + if len(normalized) <= max_length: + return normalized + return normalized[: max_length - 3].rstrip() + "..." + + +def _build_tool_document_payload( + self, + document: DashboardChatRetrievedDocument, + allowlist: DashboardChatAllowlist, + dashboard_export: dict[str, Any], +) -> dict[str, Any]: + """Convert a runtime retrieval result into the prototype tool payload shape.""" + metadata: dict[str, Any] = { + "type": self._prototype_doc_type(document.source_type), + "source_type": document.source_type, + "source_identifier": document.source_identifier, + } + chart_id = chart_id_from_source_identifier(document.source_identifier) + if chart_id is not None: + metadata["chart_id"] = chart_id + metadata["dashboard_id"] = document.dashboard_id + chart_metadata = self._build_chart_tool_metadata(chart_id, dashboard_export) + if chart_metadata: + metadata.update(chart_metadata) + unique_id = unique_id_from_source_identifier(document.source_identifier) + if unique_id: + metadata["dbt_unique_id"] = unique_id + metadata["table_name"] = allowlist.unique_id_to_table.get(unique_id) + return { + "doc_id": document.document_id, + "content": document.content, + "metadata": metadata, + "similarity_score": document.distance, + } + + +def _build_chart_tool_metadata( + cls, + chart_id: int, + dashboard_export: dict[str, Any], +) -> dict[str, Any]: + """Return structured chart metadata that nudges the tool loop toward exact chart fields.""" + chart = next( + ( + candidate + for candidate in (dashboard_export.get("charts") or []) + if candidate.get("id") == chart_id + ), + None, + ) + if chart is None: + return {} + + preferred_table = build_dashboard_chat_table_name( + chart.get("schema_name"), + chart.get("table_name"), + ) + metric_columns = cls._chart_metric_columns(chart) + dimension_columns = cls._chart_dimension_columns(chart) + time_column = cls._chart_time_column(chart, dimension_columns) + payload: dict[str, Any] = { + "chart_title": str(chart.get("title") or ""), + "chart_type": str(chart.get("chart_type") or ""), + } + if preferred_table: + payload["preferred_table"] = preferred_table + if metric_columns: + payload["metric_columns"] = metric_columns + if dimension_columns: + payload["dimension_columns"] = dimension_columns + if time_column: + payload["time_column"] = time_column + return payload + + +def _prototype_doc_type(source_type: str) -> str: + """Map Dalgo source types into the prototype doc-type vocabulary.""" + if source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + return "chart" + if source_type in { + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + }: + return "dbt_model" + return "context" + + +def _chart_metric_columns(cls, chart: dict[str, Any]) -> list[str]: + """Extract the most likely metric columns from one chart export payload.""" + extra_config = chart.get("extra_config") or {} + metrics: list[str] = [] + for metric in extra_config.get("metrics") or []: + if isinstance(metric, str) and metric.strip(): + metrics.append(metric.strip()) + continue + if isinstance(metric, dict): + for key in ["column", "name", "field", "metric", "metric_column"]: + value = metric.get(key) + if isinstance(value, str) and value.strip(): + metrics.append(value.strip()) + break + for key in [ + "metric_col", + "metric_column", + "measure_col", + "measure_column", + "value_column", + "y_axis_column", + ]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + metrics.append(value.strip()) + return list(dict.fromkeys(metrics)) + + +def _chart_dimension_columns(cls, chart: dict[str, Any]) -> list[str]: + """Extract dimension-like fields from one chart export payload.""" + extra_config = chart.get("extra_config") or {} + dimensions: list[str] = [] + for key in ["dimension_col", "extra_dimension", "group_by", "category_column", "x_axis_column"]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + dimensions.append(value.strip()) + for value in extra_config.get("dimensions") or []: + if isinstance(value, str) and value.strip(): + dimensions.append(value.strip()) + return list(dict.fromkeys(dimensions)) + + +def _chart_time_column( + cls, + chart: dict[str, Any], + dimension_columns: Sequence[str], +) -> str | None: + """Extract or infer the chart's time dimension when one is present.""" + extra_config = chart.get("extra_config") or {} + for key in ["time_column", "time_dimension", "date_column"]: + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + for dimension in dimension_columns: + if cls._looks_like_time_dimension(dimension): + return dimension + return None + + +def _looks_like_time_dimension(column_name: str) -> bool: + """Return whether a dimension name probably represents time bucketing.""" + normalized_column = column_name.lower() + return any( + token in normalized_column + for token in ["date", "day", "week", "month", "quarter", "year", "time"] + ) diff --git a/ddpui/core/dashboard_chat/graph/session_snapshot.py b/ddpui/core/dashboard_chat/graph/session_snapshot.py new file mode 100644 index 000000000..43c18c809 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/session_snapshot.py @@ -0,0 +1,116 @@ +"""Session-snapshot helpers for dashboard chat graph execution.""" + +from typing import Any + +from django.core.cache import cache + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlistBuilder +from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.session_cache import ( + DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, + build_dashboard_chat_session_snapshot_cache_key, + deserialize_allowlist, + deserialize_distinct_cache, + deserialize_schema_snippets, + serialize_allowlist, + serialize_distinct_cache, + serialize_schema_snippets, +) +from ddpui.services.dashboard_service import DashboardService + +from .state import DashboardChatRuntimeState + + +def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: + """Return the current session's frozen dashboard context snapshot.""" + session_id = state.get("session_id") + if not session_id: + return self._build_session_snapshot(state) + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is not None: + return { + "dashboard_export": dict(cached_snapshot["dashboard_export"]), + "dbt_index": dict(cached_snapshot.get("dbt_index") or {"resources_by_unique_id": {}}), + "allowlist": deserialize_allowlist(cached_snapshot.get("allowlist")), + "schema_cache": deserialize_schema_snippets(cached_snapshot.get("schema_cache")), + "distinct_cache": deserialize_distinct_cache(cached_snapshot.get("distinct_cache")), + } + + snapshot = self._build_session_snapshot(state) + cache.set( + cache_key, + { + "dashboard_export": snapshot["dashboard_export"], + "dbt_index": snapshot["dbt_index"], + "allowlist": serialize_allowlist(snapshot["allowlist"]), + "schema_cache": serialize_schema_snippets(snapshot["schema_cache"]), + "distinct_cache": serialize_distinct_cache(snapshot["distinct_cache"]), + }, + DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, + ) + return snapshot + + +def _build_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: + """Build one session-stable snapshot of dashboard-specific runtime context.""" + dashboard_export = DashboardService.export_dashboard_context( + state["dashboard_id"], + state["org"], + ) + manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) + allowlist = DashboardChatAllowlistBuilder.build( + dashboard_export, + manifest_json=manifest_json, + ) + return { + "dashboard_export": dashboard_export, + "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index( + manifest_json, + allowlist, + ), + "allowlist": allowlist, + "schema_cache": {}, + "distinct_cache": set(), + } + + +def _persist_session_schema_cache( + self, + state: DashboardChatRuntimeState, + schema_cache: dict[str, DashboardChatSchemaSnippet], +) -> None: + """Persist lazily loaded schema snippets back into the session snapshot cache.""" + session_id = state.get("session_id") + if not session_id: + state["session_schema_cache"] = dict(schema_cache) + return + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is None: + return + cached_snapshot["schema_cache"] = serialize_schema_snippets(schema_cache) + cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) + state["session_schema_cache"] = dict(schema_cache) + + +def _persist_session_distinct_cache( + self, + state: DashboardChatRuntimeState, + distinct_cache: set[tuple[str, str, str]], +) -> None: + """Persist validated distinct values back into the session snapshot cache.""" + session_id = state.get("session_id") + if not session_id: + state["session_distinct_cache"] = set(distinct_cache) + return + + cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) + cached_snapshot = cache.get(cache_key) + if cached_snapshot is None: + return + cached_snapshot["distinct_cache"] = serialize_distinct_cache(distinct_cache) + cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) + state["session_distinct_cache"] = set(distinct_cache) diff --git a/ddpui/core/dashboard_chat/graph/source_identifiers.py b/ddpui/core/dashboard_chat/graph/source_identifiers.py new file mode 100644 index 000000000..89030a70a --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/source_identifiers.py @@ -0,0 +1,22 @@ +"""Helpers for parsing dashboard-chat vector source identifiers.""" + + +def chart_id_from_source_identifier(source_identifier: str) -> int | None: + """Extract chart ids from dashboard export source identifiers.""" + parts = source_identifier.split(":") + if len(parts) >= 4 and parts[-2] == "chart": + try: + return int(parts[-1]) + except ValueError: + return None + return None + + +def unique_id_from_source_identifier(source_identifier: str) -> str | None: + """Extract dbt unique ids from manifest/catalog source identifiers.""" + if ":" not in source_identifier: + return None + prefix, unique_id = source_identifier.split(":", 1) + if prefix not in {"manifest", "catalog"}: + return None + return unique_id diff --git a/ddpui/core/dashboard_chat/graph/sql_execution.py b/ddpui/core/dashboard_chat/graph/sql_execution.py new file mode 100644 index 000000000..421d4ae84 --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/sql_execution.py @@ -0,0 +1,519 @@ +"""SQL execution and guardrail helpers for dashboard chat graph execution.""" + +from collections.abc import Sequence +import json +import re +from typing import Any + +from django.core.serializers.json import DjangoJSONEncoder + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.runtime_types import DashboardChatIntent +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard + +from .state import DashboardChatRuntimeState + + +def _validate_sql_allowlist( + self, + sql: str, + allowlist: DashboardChatAllowlist, +) -> dict[str, Any]: + """Validate that all referenced tables are in the dashboard allowlist.""" + referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) + invalid_tables = [ + table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) + ] + if invalid_tables: + return { + "valid": False, + "invalid_tables": invalid_tables, + "message": ( + "SQL references tables not available in the current dashboard: " + + ", ".join(invalid_tables) + + ". Use list_tables_by_keyword to find allowed tables." + ), + } + return {"valid": True, "invalid_tables": [], "message": ""} + + +def _run_sql_with_distinct_guard( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Validate SQL like the prototype and let the tool loop self-correct on failures.""" + sql = str(args.get("sql") or "").strip() + if not sql: + return {"error": "sql_missing", "message": "SQL is required"} + + allowlist_validation = self._validate_sql_allowlist(sql, state["allowlist"]) + if not allowlist_validation["valid"]: + return { + "error": "table_not_allowed", + "invalid_tables": allowlist_validation["invalid_tables"], + "message": allowlist_validation["message"], + } + + follow_up_dimension_validation = self._validate_follow_up_dimension_usage( + sql=sql, + state=state, + execution_context=execution_context, + ) + if follow_up_dimension_validation is not None: + return follow_up_dimension_validation + missing_distinct = self._missing_distinct(sql, state, execution_context) + if missing_distinct: + return { + "error": "must_fetch_distinct_values", + "missing": missing_distinct, + "message": ( + "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." + ), + } + + validation = DashboardChatSqlGuard( + allowlist=state["allowlist"], + max_rows=self.runtime_config.max_query_rows, + ).validate(sql) + execution_context["last_sql_validation"] = validation + if not validation.is_valid or not validation.sanitized_sql: + return { + "error": "sql_validation_failed", + "issues": validation.errors, + "warnings": validation.warnings, + } + + missing_columns = self._missing_columns_in_primary_table( + sql=validation.sanitized_sql, + state=state, + execution_context=execution_context, + ) + if missing_columns is not None: + return missing_columns + + execution_context["last_sql"] = validation.sanitized_sql + try: + rows = self._get_turn_warehouse_tools( + execution_context, + state["org"], + ).execute_sql( + validation.sanitized_sql + ) + except Exception as error: + structured_error = self._structured_sql_execution_error( + sql=validation.sanitized_sql, + error=error, + state=state, + execution_context=execution_context, + ) + if structured_error is not None: + return structured_error + return { + "success": False, + "error": str(error), + "sql_used": validation.sanitized_sql, + } + + serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) + execution_context["last_sql_results"] = serialized_rows + self._record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=validation.sanitized_sql, + ) + return { + "success": True, + "row_count": len(serialized_rows), + "error": None, + "sql_used": validation.sanitized_sql, + "columns": list(serialized_rows[0].keys()) if serialized_rows else [], + "rows": serialized_rows, + } + + +def _missing_columns_in_primary_table( + self, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" + table_references = self._table_references(sql) + referenced_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") + ] + if not referenced_tables: + return None + + schema_cache = self._get_cached_schema_snippets( + state, + execution_context, + tables=referenced_tables, + ) + all_schema_cache = self._get_cached_schema_snippets(state, execution_context) + missing_columns_by_table: dict[str, set[str]] = {} + candidate_tables_by_column: dict[str, list[str]] = {} + tables_in_query = list(dict.fromkeys(referenced_tables)) + + for qualifier, column_name in self._referenced_sql_identifier_refs(sql): + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=column_name, + table_references=table_references, + schema_cache=schema_cache, + ) + if resolved_table is not None: + continue + + if qualifier is not None: + target_table = ( + self._resolve_table_qualifier(qualifier, table_references) + or self._primary_table_name(sql) + or tables_in_query[0] + ) + else: + matching_tables = self._tables_with_column( + column_name, + tables_in_query, + schema_cache, + ) + if len(matching_tables) > 1: + continue + target_table = self._primary_table_name(sql) or tables_in_query[0] + + missing_columns_by_table.setdefault(target_table, set()).add(column_name) + candidate_tables_by_column[column_name] = self._find_tables_with_column( + column_name, + all_schema_cache, + ) + + missing_columns = sorted( + { + column_name + for columns in missing_columns_by_table.values() + for column_name in columns + } + ) + if not missing_columns: + return None + + primary_table = self._primary_table_name(sql) or tables_in_query[0] + target_table = ( + next(iter(missing_columns_by_table)) + if len(missing_columns_by_table) == 1 + else primary_table + ) + best_table = self._best_table_for_missing_columns( + missing_columns, + all_schema_cache, + ) + message = ( + f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " + "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." + ) + if best_table: + message += f" Best candidate table: {best_table}." + result = { + "error": "column_not_in_table", + "table": target_table, + "missing_columns": missing_columns, + "candidate_tables": candidate_tables_by_column, + "best_table": best_table, + "message": message, + } + if len(missing_columns) == 1: + column_name = missing_columns[0] + result["column"] = column_name + result["candidates"] = candidate_tables_by_column.get(column_name, []) + return result + + +def _structured_sql_execution_error( + self, + *, + sql: str, + error: Exception, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" + error_text = str(error) + missing_column_match = re.search( + r'column "(?:[\w]+\.)?([^"]+)" does not exist', + error_text, + flags=re.IGNORECASE, + ) + if missing_column_match: + missing_column = missing_column_match.group(1).lower() + schema_cache = self._get_cached_schema_snippets(state, execution_context) + candidate_tables = self._find_tables_with_column(missing_column, schema_cache) + return { + "error": "column_not_in_table", + "table": self._primary_table_name(sql), + "column": missing_column, + "missing_columns": [missing_column], + "candidates": candidate_tables, + "candidate_tables": {missing_column: candidate_tables}, + "best_table": candidate_tables[0] if candidate_tables else None, + "message": ( + f"Column {missing_column} is not available on the current table. " + "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." + ), + "sql_used": sql, + } + return None + + +def _validate_follow_up_dimension_usage( + self, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" + intent_decision = state["intent_decision"] + if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: + return None + if intent_decision.follow_up_context.follow_up_type != "add_dimension": + return None + + requested_dimension = self._extract_requested_follow_up_dimension( + intent_decision.follow_up_context.modification_instruction or state["user_query"] + ) + if not requested_dimension: + return None + + previous_sql = state["conversation_context"].last_sql_query or "" + current_dimensions = self._structural_dimensions_from_sql(sql) + previous_dimensions = self._structural_dimensions_from_sql(previous_sql) + normalized_requested_dimension = self._normalize_dimension_name(requested_dimension) + if ( + normalized_requested_dimension in current_dimensions + and normalized_requested_dimension not in previous_dimensions + ): + return None + + candidate_tables = self._find_tables_with_column( + requested_dimension, + self._get_cached_schema_snippets(state, execution_context), + ) + return { + "error": "requested_dimension_missing", + "requested_dimension": requested_dimension, + "previous_dimensions": sorted(previous_dimensions), + "current_dimensions": sorted(current_dimensions), + "candidate_tables": candidate_tables, + "message": ( + f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " + "Use the requested dimension exactly, or pick a table that contains it." + ), + } + + +def _missing_distinct( + self, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> list[dict[str, Any]]: + """Detect text filters that require a prior distinct-values call.""" + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return [] + + table_references = self._table_references(sql) + query_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") + ] + if not query_tables: + return [] + primary_table = self._primary_table_name(sql) or query_tables[0] + + full_schema_cache = self._get_cached_schema_snippets( + state, + execution_context, + tables=query_tables, + ) + all_schema_cache = self._get_cached_schema_snippets(state, execution_context) + + column_types = { + table_name: { + str(column.get("name") or "").lower(): str( + column.get("data_type") or column.get("type") or "" + ).lower() + for column in getattr(snippet, "columns", []) + } + for table_name, snippet in full_schema_cache.items() + } + missing: list[dict[str, Any]] = [] + for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_references=table_references, + schema_cache=full_schema_cache, + ) + if resolved_table is None and qualifier is None: + matching_tables = self._tables_with_column( + normalized_column, + query_tables, + full_schema_cache, + ) + if len(matching_tables) > 1: + continue + if resolved_table is None: + candidate_tables = self._find_tables_with_column( + normalized_column, + all_schema_cache, + ) + if qualifier is None and candidate_tables: + continue + missing.append( + { + "table": primary_table, + "column": column_name, + "error": "column_not_in_table", + "candidates": candidate_tables, + } + ) + continue + data_type = column_types.get(resolved_table, {}).get(normalized_column, "") + if not data_type: + continue + if not self._is_text_type(data_type): + continue + if ( + not self._has_validated_distinct_value( + execution_context["distinct_cache"], + table_name=resolved_table, + column_name=normalized_column, + value=value, + ) + ): + missing.append( + {"table": resolved_table, "column": column_name, "value": value} + ) + return missing + + +def _normalize_distinct_value(value: Any) -> str: + """Normalize one distinct value for exact cache lookups.""" + return str(value).strip().lower() + + +def _has_validated_distinct_value( + cls, + distinct_cache: set[tuple[Any, ...]], + *, + table_name: str, + column_name: str, + value: Any, +) -> bool: + """Return whether this exact text filter value was already validated in-session.""" + normalized_value = cls._normalize_distinct_value(value) + normalized_column = column_name.lower() + normalized_table = table_name.lower() + return ( + (normalized_table, normalized_column, normalized_value) in distinct_cache + or ("*", normalized_column, normalized_value) in distinct_cache + or (normalized_table, normalized_column) in distinct_cache + or ("*", normalized_column) in distinct_cache + ) + + +def _is_text_type(data_type: str) -> bool: + """Treat common string-like warehouse types as requiring distinct-value lookup.""" + return any(text_token in data_type for text_token in ["char", "text", "string", "varchar"]) + + +def _record_validated_distinct_values( + self, + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + table_name: str, + column_name: str, + values: Sequence[Any], +) -> None: + """Persist exact validated filter values for the current session.""" + normalized_table = table_name.lower() + normalized_column = column_name.lower() + distinct_cache = execution_context["distinct_cache"] + for value in values: + normalized_value = self._normalize_distinct_value(value) + distinct_cache.add((normalized_table, normalized_column, normalized_value)) + distinct_cache.add(("*", normalized_column, normalized_value)) + self._persist_session_distinct_cache(state, distinct_cache) + + +def _record_validated_filters_from_sql( + self, + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + sql: str, +) -> None: + """Seed exact validated filter values from a successful SQL statement.""" + table_references = self._table_references(sql) + if not table_references: + return + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return + + query_tables = [ + reference["table_name"] + for reference in table_references + if reference.get("table_name") + ] + schema_cache = dict(execution_context.get("schema_cache") or {}) + values_by_target: dict[tuple[str, str], list[str]] = {} + for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = self._resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_references=table_references, + schema_cache=schema_cache, + ) + if resolved_table is None and qualifier is None: + if schema_cache: + matching_tables = self._tables_with_column( + normalized_column, + query_tables, + schema_cache, + ) + if len(matching_tables) == 1: + resolved_table = matching_tables[0] + elif len(query_tables) == 1: + resolved_table = query_tables[0] + values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) + + if not values_by_target: + return + + for (table_name, column_name), values in values_by_target.items(): + self._record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=table_name, + column_name=column_name, + values=values, + ) diff --git a/ddpui/core/dashboard_chat/graph/sql_parsing.py b/ddpui/core/dashboard_chat/graph/sql_parsing.py new file mode 100644 index 000000000..ae86300da --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/sql_parsing.py @@ -0,0 +1,340 @@ +"""SQL parsing helpers for dashboard chat validation.""" + +from collections.abc import Sequence +import re +from typing import Any + +from ddpui.core.dashboard_chat.allowlist import normalize_dashboard_chat_table_name +from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard + + +def _primary_table_name(sql: str) -> str | None: + """Return the primary FROM table for single-query correction logic.""" + table_match = re.search(r"\bFROM\s+([`\"]?)([\w\.]+)\1", sql, re.IGNORECASE) + if not table_match: + return None + return normalize_dashboard_chat_table_name(table_match.group(2)) + + +def _table_references(cls, sql: str) -> list[dict[str, str | None]]: + """Return normalized FROM/JOIN table references and aliases from one SQL statement.""" + references: list[dict[str, str | None]] = [] + for match in re.finditer( + r"\b(?:FROM|JOIN)\s+([`\"]?)([\w\.]+)\1(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", + sql, + flags=re.IGNORECASE, + ): + table_name = normalize_dashboard_chat_table_name(match.group(2)) + if not table_name: + continue + alias = str(match.group(3) or "").lower() or None + references.append( + { + "table_name": table_name, + "alias": alias, + "short_name": table_name.split(".")[-1], + } + ) + return references + + +def _resolve_table_qualifier( + cls, + qualifier: str, + table_references: Sequence[dict[str, str | None]], +) -> str | None: + """Resolve a qualifier like `f` or `analytics_table` to one query table.""" + normalized_qualifier = qualifier.lower().strip().strip('`"') + matches = [ + str(reference["table_name"]) + for reference in table_references + if normalized_qualifier + in { + str(reference.get("alias") or ""), + str(reference.get("short_name") or ""), + str(reference.get("table_name") or ""), + } + ] + deduped_matches = list(dict.fromkeys(match for match in matches if match)) + if len(deduped_matches) == 1: + return deduped_matches[0] + return None + + +def _table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: + """Return the normalized column names available on one schema snippet.""" + return { + str(column.get("name") or "").lower() + for column in getattr(snippet, "columns", []) or [] + } + + +def _tables_with_column( + cls, + column_name: str, + table_names: Sequence[str], + schema_cache: dict[str, Any], +) -> list[str]: + """Return the query tables that contain one column.""" + normalized_column_name = column_name.lower() + return [ + table_name + for table_name in table_names + if normalized_column_name in cls._table_columns(schema_cache.get(table_name)) + ] + + +def _resolve_identifier_table( + cls, + *, + qualifier: str | None, + column_name: str, + table_references: Sequence[dict[str, str | None]], + schema_cache: dict[str, Any], +) -> str | None: + """Resolve one referenced column to a concrete query table when it is unambiguous.""" + if qualifier is not None: + resolved_table = cls._resolve_table_qualifier(qualifier, table_references) + if not resolved_table: + return None + if column_name.lower() in cls._table_columns(schema_cache.get(resolved_table)): + return resolved_table + return None + + query_tables = [ + str(reference["table_name"]) + for reference in table_references + if reference.get("table_name") + ] + matching_tables = cls._tables_with_column(column_name, query_tables, schema_cache) + if len(matching_tables) == 1: + return matching_tables[0] + return None + + +def _referenced_sql_identifier_refs(cls, sql: str) -> list[tuple[str | None, str]]: + """Extract likely physical identifier references from the outer SQL.""" + table_aliases = { + alias.lower() + for alias in re.findall( + r"\b(?:FROM|JOIN)\s+[`\"]?[\w\.]+[`\"]?(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", + sql, + flags=re.IGNORECASE, + ) + if alias + } + select_aliases = cls._select_aliases(sql) + referenced_identifiers: list[tuple[str | None, str]] = [] + + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if select_clause: + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + referenced_identifiers.extend( + cls._extract_identifier_refs_from_sql_segment(expression, table_aliases) + ) + + for pattern in [ + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", + r"\bORDER\s+BY\s+(.+?)(?:\bLIMIT\b|$)", + ]: + match = re.search(pattern, sql, flags=re.IGNORECASE | re.DOTALL) + if match: + referenced_identifiers.extend( + cls._extract_identifier_refs_from_sql_segment( + match.group(1), + table_aliases, + ignored_identifiers=select_aliases, + ) + ) + + return list(dict.fromkeys(referenced_identifiers)) + + +def _select_aliases(sql: str) -> set[str]: + """Return aliases introduced by the outer SELECT clause.""" + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return set() + + aliases: set[str] = set() + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + alias_match = re.search( + r"\bAS\s+([A-Za-z_][A-Za-z0-9_]*)\s*$", + expression, + flags=re.IGNORECASE, + ) + if alias_match: + aliases.add(alias_match.group(1).lower()) + return aliases + + +def _extract_identifier_refs_from_sql_segment( + segment: str, + table_aliases: set[str], + ignored_identifiers: set[str] | None = None, +) -> list[tuple[str | None, str]]: + """Pull qualified and unqualified column-like identifiers out of one SQL segment.""" + normalized_segment = re.sub(r"'[^']*'", " ", segment) + normalized_segment = re.sub( + r"\bAS\s+[A-Za-z_][A-Za-z0-9_]*", + " ", + normalized_segment, + flags=re.IGNORECASE, + ) + ignored_tokens = { + "SELECT", + "FROM", + "WHERE", + "GROUP", + "BY", + "ORDER", + "LIMIT", + "COUNT", + "SUM", + "AVG", + "MIN", + "MAX", + "DISTINCT", + "AND", + "OR", + "AS", + "IN", + "CASE", + "WHEN", + "THEN", + "ELSE", + "END", + "TRUE", + "FALSE", + "NULL", + "NOT", + "ASC", + "DESC", + "ON", + "JOIN", + } + ignored_identifiers = {identifier.lower() for identifier in (ignored_identifiers or set())} + identifiers: list[tuple[str | None, str]] = [] + for match in re.finditer( + r"(?:(?P[A-Za-z_][A-Za-z0-9_]*)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)", + normalized_segment, + ): + qualifier = match.group("qualifier") + identifier = match.group("identifier") + if not identifier: + continue + if identifier.upper() in ignored_tokens: + continue + if identifier.lower() in table_aliases or identifier.lower() in ignored_identifiers: + continue + trailing_segment = normalized_segment[match.end() :].lstrip() + if qualifier is None and trailing_segment.startswith("("): + continue + identifiers.append((qualifier.lower() if qualifier else None, identifier.lower())) + return identifiers + + +def _best_table_for_missing_columns( + missing_columns: Sequence[str], + schema_cache: dict[str, Any], +) -> str | None: + """Return the first allowlisted table that covers all missing columns.""" + wanted_columns = {column_name.lower() for column_name in missing_columns} + for table_name, snippet in schema_cache.items(): + available_columns = { + str(column.get("name") or "").lower() for column in snippet.columns + } + if wanted_columns.issubset(available_columns): + return table_name + return None + + +def _extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, str]]: + """Extract quoted text filter values from one WHERE clause.""" + extracted_values: list[tuple[str | None, str, str]] = [] + for qualifier, column_name, value in re.findall( + r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", + where_clause, + flags=re.IGNORECASE, + ): + extracted_values.append((qualifier.lower() if qualifier else None, column_name, value)) + + for match in re.finditer( + r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\(([^)]*)\)", + where_clause, + flags=re.IGNORECASE, + ): + qualifier = match.group(1) + column_name = match.group(2) + for value in re.findall(r"'([^']+)'", match.group(3)): + extracted_values.append( + (qualifier.lower() if qualifier else None, column_name, value) + ) + return extracted_values + + +def _find_tables_with_column( + column_name: str, + schema_cache: dict[str, Any], + limit: int = 10, +) -> list[str]: + """Find allowlisted tables that contain one column.""" + matches: list[str] = [] + normalized_column_name = column_name.lower() + for table_name, snippet in schema_cache.items(): + if any( + normalized_column_name == str(column.get("name") or "").lower() + for column in snippet.columns + ): + matches.append(table_name) + if len(matches) >= limit: + break + return matches + + +def _structural_dimensions_from_sql(cls, sql: str) -> set[str]: + """Return normalized non-aggregate dimensions used by one SQL statement.""" + if not sql: + return set() + + dimensions: set[str] = set() + for dimension in cls._extract_dimensions_from_sql(sql): + identifier_refs = cls._extract_identifier_refs_from_sql_segment( + dimension, + table_aliases=set(), + ) + if identifier_refs: + dimensions.update( + cls._normalize_dimension_name(column_name) + for _, column_name in identifier_refs + ) + continue + dimensions.add(cls._normalize_dimension_name(dimension)) + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if not select_clause: + return {dimension for dimension in dimensions if dimension} + + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + normalized_expression = expression.strip() + if not normalized_expression or DashboardChatSqlGuard._contains_aggregate( + normalized_expression + ): + continue + for _, column_name in cls._extract_identifier_refs_from_sql_segment( + normalized_expression, + table_aliases=set(), + ignored_identifiers=cls._select_aliases(sql), + ): + dimensions.add(cls._normalize_dimension_name(column_name)) + return {dimension for dimension in dimensions if dimension} + + +def _normalize_dimension_name(value: str) -> str: + """Normalize dimension names from SQL expressions and natural-language follow-ups.""" + normalized_value = value.strip().strip('`"').lower() + normalized_value = normalized_value.split(".")[-1] + normalized_value = re.sub(r"[^a-z0-9_]+", "_", normalized_value) + normalized_value = re.sub(r"_+", "_", normalized_value).strip("_") + return normalized_value diff --git a/ddpui/core/dashboard_chat/graph/state.py b/ddpui/core/dashboard_chat/graph/state.py new file mode 100644 index 000000000..a507ddabb --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/state.py @@ -0,0 +1,50 @@ +"""Shared graph state and constants for dashboard chat orchestration.""" + +from typing import Any, TypedDict +import re + +from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatCitation, + DashboardChatConversationContext, + DashboardChatConversationMessage, + DashboardChatIntentDecision, + DashboardChatResponse, + DashboardChatRetrievedDocument, + DashboardChatSchemaSnippet, + DashboardChatSqlValidationResult, +) +from ddpui.models.org import Org + +GREETING_PATTERN = re.compile( + r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you)\b[\s!.?]*$", + re.IGNORECASE, +) + + +class DashboardChatRuntimeState(TypedDict, total=False): + """LangGraph state for one dashboard chat turn.""" + + org: Org + dashboard_id: int + session_id: str | None + vector_collection_name: str | None + user_query: str + conversation_history: list[DashboardChatConversationMessage] + conversation_context: DashboardChatConversationContext + small_talk_response: str | None + dashboard_export: dict[str, Any] + dbt_index: dict[str, Any] + allowlist: DashboardChatAllowlist + session_schema_cache: dict[str, DashboardChatSchemaSnippet] + session_distinct_cache: set[tuple[str, str, str]] + intent_decision: DashboardChatIntentDecision + retrieved_documents: list[DashboardChatRetrievedDocument] + citations: list[DashboardChatCitation] + tool_calls: list[dict[str, Any]] + sql: str | None + sql_validation: DashboardChatSqlValidationResult | None + sql_results: list[dict[str, Any]] | None + warnings: list[str] + usage: dict[str, Any] + response: DashboardChatResponse diff --git a/ddpui/core/dashboard_chat/graph/tool_handlers.py b/ddpui/core/dashboard_chat/graph/tool_handlers.py new file mode 100644 index 000000000..64bb854ad --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/tool_handlers.py @@ -0,0 +1,484 @@ +"""Tool handlers and turn-scoped tool helpers for dashboard chat.""" + +from collections.abc import Sequence +import logging +from typing import Any + +from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseTools +from ddpui.models.org import Org + +from .state import DashboardChatRuntimeState + +logger = logging.getLogger(__name__) + + +def _handle_retrieve_docs_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Retrieve current-dashboard, org, and dbt context using the prototype tool contract.""" + query = str(args.get("query") or state["user_query"]).strip() + limit = max(1, min(int(args.get("limit", 8)), 20)) + requested_types = [ + str(doc_type) + for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) + ] + retrieved_documents = [] + + if "chart" in requested_types: + retrieved_documents.extend( + self._retrieve_vector_documents( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_EXPORT] + ), + dashboard_id=state["dashboard_id"], + query_embedding=self._get_cached_query_embedding( + query, + execution_context["embedding_cache"], + ), + ) + ) + if "context" in requested_types: + retrieved_documents.extend( + self._retrieve_vector_documents( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_CONTEXT] + ), + dashboard_id=state["dashboard_id"], + query_embedding=self._get_cached_query_embedding( + query, + execution_context["embedding_cache"], + ), + ) + ) + retrieved_documents.extend( + self._retrieve_vector_documents( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [DashboardChatSourceType.ORG_CONTEXT] + ), + query_embedding=self._get_cached_query_embedding( + query, + execution_context["embedding_cache"], + ), + ) + ) + if "dataset" in requested_types or "dbt_model" in requested_types: + dbt_results = self._retrieve_vector_documents( + org=state["org"], + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=self.source_config.filter_enabled( + [ + DashboardChatSourceType.DBT_MANIFEST, + DashboardChatSourceType.DBT_CATALOG, + ] + ), + query_embedding=self._get_cached_query_embedding( + query, + execution_context["embedding_cache"], + ), + ) + retrieved_documents.extend( + self._filter_allowlisted_dbt_results(dbt_results, state["allowlist"]) + ) + + merged_results = self._dedupe_retrieved_documents(retrieved_documents)[:limit] + for document in merged_results: + if document.document_id in execution_context["retrieved_document_ids"]: + continue + execution_context["retrieved_document_ids"].add(document.document_id) + execution_context["retrieved_documents"].append(document) + + docs = [ + self._build_tool_document_payload( + document, + state["allowlist"], + state["dashboard_export"], + ) + for document in merged_results + ] + return {"docs": docs, "count": len(docs)} + + +def _handle_get_schema_snippets_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Return schema snippets for allowlisted tables only.""" + requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] + allowed_tables = [ + table_name + for table_name in requested_tables + if state["allowlist"].is_allowed(table_name) + ] + filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) + schema_cache = self._get_cached_schema_snippets( + state, + execution_context, + tables=allowed_tables, + ) + tables_payload = [ + {"table": table_name, "columns": snippet.columns} + for table_name, snippet in schema_cache.items() + if table_name in allowed_tables + ] + response: dict[str, Any] = {"tables": tables_payload} + if filtered_tables: + response["filtered_tables"] = filtered_tables + response["filter_note"] = ( + f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." + ) + return response + + +def _handle_search_dbt_models_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Search allowlisted dbt nodes by name, description, and column metadata.""" + query = str(args.get("query") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 8)), 20)) + if not query: + return {"models": [], "count": 0} + + results: list[dict[str, Any]] = [] + for node in self._dbt_resources_by_unique_id(state).values(): + table_name = node.get("table") + haystacks = [ + str(node.get("name") or ""), + str(node.get("description") or ""), + str(table_name or ""), + ] + for column in node.get("columns") or []: + haystacks.append(str(column.get("name") or "")) + haystacks.append(str(column.get("description") or "")) + if query not in " ".join(haystacks).lower(): + continue + results.append( + { + "name": str(node.get("name") or ""), + "schema": str(node.get("schema") or ""), + "database": str(node.get("database") or ""), + "description": str(node.get("description") or ""), + "columns": [ + str(column.get("name") or "") + for column in (node.get("columns") or []) + ][:20], + "table": table_name, + } + ) + if len(results) >= limit: + break + + return {"models": results, "count": len(results)} + + +def _handle_get_dbt_model_info_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Return one dbt model's description, columns, and lineage.""" + model_name = str(args.get("model_name") or "").strip().lower() + if not model_name: + return {"error": "model_name is required"} + + matched_unique_id: str | None = None + matched_node: dict[str, Any] | None = None + for unique_id, node in self._dbt_resources_by_unique_id(state).items(): + table_name = node.get("table") + candidates = { + str(node.get("name") or "").lower(), + str(table_name or "").lower(), + } + if model_name not in candidates: + continue + matched_unique_id = unique_id + matched_node = node + break + + if matched_unique_id is None or matched_node is None: + return {"error": f"Model not found: {model_name}"} + + return { + "model": str(matched_node.get("name") or ""), + "schema": str(matched_node.get("schema") or ""), + "database": str(matched_node.get("database") or ""), + "description": str(matched_node.get("description") or ""), + "columns": list(matched_node.get("columns") or [])[:50], + "upstream": list(matched_node.get("upstream") or []), + "downstream": list(matched_node.get("downstream") or []), + } + + +def _handle_get_distinct_values_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Return distinct values and persist validated filter values for the session.""" + table_name = str(args.get("table") or "").lower() + column_name = str(args.get("column") or "") + limit = max(1, min(int(args.get("limit", 50)), 200)) + if not state["allowlist"].is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": ( + f"Table {table_name} is not accessible in the current dashboard context." + ), + } + + schema_cache = self._get_cached_schema_snippets(state, execution_context) + snippet = schema_cache.get(table_name) + normalized_column_name = column_name.lower() + if snippet is not None and normalized_column_name not in { + str(column.get("name") or "").lower() for column in snippet.columns + }: + candidates = self._find_tables_with_column(normalized_column_name, schema_cache) + return { + "error": "column_not_in_table", + "table": table_name, + "column": column_name, + "candidates": candidates, + "message": ( + f"Column {column_name} is not available on {table_name}. " + "Use a table that contains it, inspect that schema, and retry the lookup." + ), + } + + values = self._get_turn_warehouse_tools( + execution_context, + state["org"], + ).get_distinct_values( + table_name=table_name, + column_name=column_name, + limit=limit, + ) + self._record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=table_name, + column_name=column_name, + values=values, + ) + return { + "table": table_name, + "column": column_name, + "values": values, + "count": len(values), + } + + +def _handle_list_tables_by_keyword_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Search allowlisted tables by table name or column name.""" + keyword = str(args.get("keyword") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 15)), 50)) + if not keyword: + return {"tables": []} + + allowlist_tables_source = state["allowlist"].prioritized_tables() or sorted( + state["allowlist"].allowed_tables + ) + allowlisted_tables = list( + dict.fromkeys(table_name.lower() for table_name in allowlist_tables_source) + ) + direct_match_tables = [ + table_name + for table_name in allowlisted_tables + if keyword in table_name or keyword in table_name.rsplit(".", 1)[-1] + ] + + schema_cache: dict[str, Any] = {} + lookup_tables = direct_match_tables or allowlisted_tables + if lookup_tables: + try: + schema_cache = self._get_cached_schema_snippets( + state, + execution_context, + tables=lookup_tables, + ) + except Exception as error: + logger.warning("Dashboard chat keyword table lookup fell back to names only: %s", error) + execution_context["warnings"].append(str(error)) + + matches: list[dict[str, Any]] = [] + seen_tables: set[str] = set() + + for table_name in direct_match_tables: + column_names = [ + str(column.get("name") or "") + for column in getattr(schema_cache.get(table_name), "columns", []) + ] + matches.append({"table": table_name, "columns": column_names[:40]}) + seen_tables.add(table_name) + if len(matches) >= limit: + break + + for table_name, snippet in schema_cache.items(): + if table_name in seen_tables: + continue + column_names = [str(column.get("name") or "") for column in snippet.columns] + if not any(keyword in column_name.lower() for column_name in column_names): + continue + matches.append({"table": table_name, "columns": column_names[:40]}) + if len(matches) >= limit: + break + + if matches: + return { + "tables": matches, + "hint": ( + f"Found {len(matches)} allowlisted tables. Check schema before assuming table structure." + ), + } + return { + "tables": [], + "hint": ( + f"No allowlisted tables matched '{keyword}'. Try a broader keyword or retrieve chart docs first." + ), + } + + +def _handle_check_table_row_count_tool( + self, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Count rows in one allowlisted table.""" + table_name = str(args.get("table") or "").lower() + if not state["allowlist"].is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": ( + f"Table {table_name} is not accessible in the current dashboard context." + ), + } + + sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" + validation = DashboardChatSqlGuard( + allowlist=state["allowlist"], + max_rows=1, + ).validate(sql) + if not validation.is_valid or not validation.sanitized_sql: + return {"error": "sql_validation_failed", "issues": validation.errors} + + rows = self._get_turn_warehouse_tools( + execution_context, + state["org"], + ).execute_sql( + validation.sanitized_sql + ) + row_count = 0 + if rows: + row_count = int(rows[0].get("row_count") or 0) + return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} + + +def _get_turn_warehouse_tools( + self, + execution_context: dict[str, Any], + org: Org, +) -> DashboardChatWarehouseTools: + """Build the warehouse tool helper lazily for the turn.""" + warehouse_tools = execution_context.get("warehouse_tools") + if warehouse_tools is None: + warehouse_tools = self.warehouse_tools_factory(org) + execution_context["warehouse_tools"] = warehouse_tools + return warehouse_tools + + +def _get_cached_schema_snippets( + self, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + tables: Sequence[str] | None = None, +) -> dict[str, Any]: + """Load and cache schema snippets for allowlisted tables.""" + requested_tables = [ + table_name.lower() + for table_name in ( + tables if tables is not None else state["allowlist"].prioritized_tables() + ) + if state["allowlist"].is_allowed(table_name) + ] + cache = execution_context["schema_cache"] + missing_tables = [table_name for table_name in requested_tables if table_name not in cache] + if missing_tables: + snippets = self._get_turn_warehouse_tools( + execution_context, + state["org"], + ).get_schema_snippets(missing_tables) + for table_name, snippet in snippets.items(): + cache[table_name.lower()] = snippet + if snippets: + self._persist_session_schema_cache(state, cache) + if tables is None: + return cache + return { + table_name: cache[table_name] + for table_name in requested_tables + if table_name in cache + } + + +def _seed_distinct_cache_from_previous_sql( + self, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> None: + """Treat text filters from the previous successful SQL as already validated for follow-ups.""" + previous_sql = state["conversation_context"].last_sql_query + if not previous_sql: + return + + self._record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=previous_sql, + ) + + +def _dbt_resources_by_unique_id( + state: DashboardChatRuntimeState, +) -> dict[str, dict[str, Any]]: + """Return the allowlisted dbt index built at session start.""" + dbt_index = state.get("dbt_index") or {} + return dict(dbt_index.get("resources_by_unique_id") or {}) + + +def _get_cached_query_embedding( + self, + query_text: str, + embedding_cache: dict[str, list[float]], +) -> list[float]: + """Cache embeddings per query string during one turn.""" + if query_text not in embedding_cache: + embedding_cache[query_text] = self.vector_store.embed_query(query_text) + return embedding_cache[query_text] diff --git a/ddpui/core/dashboard_chat/graph/tool_loop.py b/ddpui/core/dashboard_chat/graph/tool_loop.py new file mode 100644 index 000000000..c7bd619cd --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/tool_loop.py @@ -0,0 +1,189 @@ +"""Prototype-style tool-loop execution helpers for dashboard chat.""" + +import json +import logging +from typing import Any + +from django.core.serializers.json import DjangoJSONEncoder + +from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseToolsError + +from .state import DashboardChatRuntimeState + +logger = logging.getLogger(__name__) + + +def _execute_tool_loop( + self, + *, + state: DashboardChatRuntimeState, + messages: list[dict[str, Any]], + max_turns: int, + initial_embedding_cache: dict[str, list[float]] | None = None, +) -> dict[str, Any]: + """Execute the prototype's iterative tool loop.""" + execution_context: dict[str, Any] = { + "distinct_cache": set(state.get("session_distinct_cache") or set()), + "embedding_cache": dict(initial_embedding_cache or {}), + "schema_cache": dict(state.get("session_schema_cache") or {}), + "retrieved_documents": [], + "retrieved_document_ids": set(), + "tool_calls": [], + "warnings": list(state.get("warnings", [])), + "warehouse_tools": None, + "last_sql": None, + "last_sql_results": None, + "last_sql_validation": None, + } + self._seed_distinct_cache_from_previous_sql(state, execution_context) + intent_decision = state["intent_decision"] + + for turn_index in range(max_turns): + tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" + ai_message = self.llm_client.run_tool_loop_turn( + messages=messages, + tools=self.TOOL_SPECIFICATIONS, + tool_choice=tool_choice, + operation=f"tool_loop_{intent_decision.intent.value}", + ) + tool_calls = ai_message.get("tool_calls") or [] + assistant_record: dict[str, Any] = { + "role": "assistant", + "content": ai_message.get("content", "") or "", + } + if tool_calls: + assistant_record["tool_calls"] = [ + { + "id": tool_call.get("id"), + "type": "function", + "function": { + "name": tool_call.get("name"), + "arguments": ( + tool_call.get("args") + if isinstance(tool_call.get("args"), str) + else json.dumps(tool_call.get("args") or {}) + ), + }, + } + for tool_call in tool_calls + ] + messages.append(assistant_record) + + if not tool_calls: + return self._build_tool_loop_result( + answer_text=( + (ai_message.get("content") or "").strip() + or self._fallback_answer_text( + execution_context["retrieved_documents"], + execution_context["last_sql_results"], + ) + ), + execution_context=execution_context, + max_turns_reached=False, + ) + + for tool_call in tool_calls: + raw_args = tool_call.get("args") or {} + args = raw_args + if isinstance(raw_args, str): + try: + args = json.loads(raw_args) + except json.JSONDecodeError: + args = {} + result = self._execute_tool_call( + tool_name=str(tool_call.get("name") or ""), + args=args, + state=state, + execution_context=execution_context, + ) + execution_context["tool_calls"].append( + self._summarize_tool_call( + tool_name=str(tool_call.get("name") or ""), + args=args, + result=result, + ) + ) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.get("id"), + "content": json.dumps( + self._serialize_tool_result(result), + cls=DjangoJSONEncoder, + ), + } + ) + if str(tool_call.get("name") or "") == "run_sql_query" and result.get("success"): + return self._build_tool_loop_result( + answer_text="", + execution_context=execution_context, + max_turns_reached=False, + ) + + return self._build_tool_loop_result( + answer_text=self._max_turns_message( + state["user_query"], + execution_context["retrieved_documents"], + ), + execution_context=execution_context, + max_turns_reached=True, + ) + + +def _execute_tool_call( + self, + *, + tool_name: str, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Execute one prototype tool against the Dalgo runtime primitives.""" + try: + if tool_name == "retrieve_docs": + return self._handle_retrieve_docs_tool(args, state, execution_context) + if tool_name == "get_schema_snippets": + return self._handle_get_schema_snippets_tool(args, state, execution_context) + if tool_name == "search_dbt_models": + return self._handle_search_dbt_models_tool(args, state, execution_context) + if tool_name == "get_dbt_model_info": + return self._handle_get_dbt_model_info_tool(args, state, execution_context) + if tool_name == "get_distinct_values": + return self._handle_get_distinct_values_tool(args, state, execution_context) + if tool_name == "run_sql_query": + return self._run_sql_with_distinct_guard(args, state, execution_context) + if tool_name == "list_tables_by_keyword": + return self._handle_list_tables_by_keyword_tool(args, state, execution_context) + if tool_name == "check_table_row_count": + return self._handle_check_table_row_count_tool(args, state, execution_context) + return {"error": f"Unknown tool: {tool_name}"} + except DashboardChatWarehouseToolsError as error: + logger.warning("Dashboard chat tool %s failed: %s", tool_name, error) + execution_context["warnings"].append(str(error)) + return {"error": str(error)} + except Exception as error: + logger.exception("Dashboard chat tool %s failed", tool_name) + execution_context["warnings"].append(str(error)) + return {"error": str(error)} + + +def _build_tool_loop_result( + self, + *, + answer_text: str, + execution_context: dict[str, Any], + max_turns_reached: bool, +) -> dict[str, Any]: + """Normalize tool-loop state into one runtime response payload.""" + if max_turns_reached: + execution_context["tool_calls"].append({"name": "max_turns_reached"}) + warnings = list(dict.fromkeys(execution_context["warnings"])) + return { + "answer_text": answer_text.strip(), + "retrieved_documents": execution_context["retrieved_documents"], + "tool_calls": execution_context["tool_calls"], + "sql": execution_context["last_sql"], + "sql_validation": execution_context["last_sql_validation"], + "sql_results": execution_context["last_sql_results"], + "warnings": warnings, + } diff --git a/ddpui/core/dashboard_chat/graph/tool_specifications.py b/ddpui/core/dashboard_chat/graph/tool_specifications.py new file mode 100644 index 000000000..eb1c8264b --- /dev/null +++ b/ddpui/core/dashboard_chat/graph/tool_specifications.py @@ -0,0 +1,148 @@ +"""OpenAI tool specifications used by the dashboard chat graph.""" + +DASHBOARD_CHAT_TOOL_SPECIFICATIONS = [ + { + "type": "function", + "function": { + "name": "retrieve_docs", + "description": "Search for relevant charts, datasets, dbt models, or context sections.", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "types": { + "type": "array", + "items": { + "type": "string", + "enum": ["chart", "dataset", "context", "dbt_model"], + }, + "description": "Document types to search", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_schema_snippets", + "description": "Get column information for database tables.", + "parameters": { + "type": "object", + "properties": { + "tables": { + "type": "array", + "items": {"type": "string"}, + "description": "Fully-qualified table names (schema.table)", + } + }, + "required": ["tables"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "search_dbt_models", + "description": "Search dbt models by keyword to find relevant data models.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query for model names/descriptions", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, + }, + "required": ["query"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_dbt_model_info", + "description": "Get detailed information about a specific dbt model.", + "parameters": { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "description": "Model name or schema.table", + } + }, + "required": ["model_name"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_distinct_values", + "description": "Get distinct values for a column (required before filtering on text columns).", + "parameters": { + "type": "object", + "properties": { + "table": { + "type": "string", + "description": "Fully-qualified table name", + }, + "column": {"type": "string", "description": "Column name"}, + "limit": {"type": "integer", "minimum": 1, "maximum": 200, "default": 50}, + }, + "required": ["table", "column"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "run_sql_query", + "description": "Execute a read-only SQL query on the database.", + "parameters": { + "type": "object", + "properties": { + "sql": {"type": "string", "description": "SELECT query to execute"} + }, + "required": ["sql"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "list_tables_by_keyword", + "description": "Find tables whose name or columns match a keyword (no hard-coding).", + "parameters": { + "type": "object", + "properties": { + "keyword": { + "type": "string", + "description": "Keyword such as donor, funding, student", + }, + "limit": {"type": "integer", "minimum": 1, "maximum": 50, "default": 15}, + }, + "required": ["keyword"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "check_table_row_count", + "description": "Get the total number of rows in a table to check if it has data.", + "parameters": { + "type": "object", + "properties": { + "table": { + "type": "string", + "description": "Fully-qualified table name (schema.table)", + } + }, + "required": ["table"], + }, + }, + }, +] diff --git a/ddpui/core/dashboard_chat/llm_answer_formatting.py b/ddpui/core/dashboard_chat/llm_answer_formatting.py new file mode 100644 index 000000000..af41d0d2d --- /dev/null +++ b/ddpui/core/dashboard_chat/llm_answer_formatting.py @@ -0,0 +1,88 @@ +"""Answer-formatting helpers for dashboard chat LLM responses.""" + +import json +from typing import Any + +from ddpui.core.dashboard_chat.runtime_types import DashboardChatRetrievedDocument + +TABLE_SUMMARY_JSON_INSTRUCTIONS = """ +For table-like responses, return valid JSON only with this shape: +{ + "title": "short heading or null", + "summary": "1-2 sentence narrative summary", + "key_points": ["short point", "short point"] +} + +Rules: +- Do not include markdown tables. +- Do not include pipe characters or ASCII table formatting. +- Do not repeat every row from the result set. +- The UI will render the structured table separately from sql_results. +- Keep key_points to at most 3 concise bullets. +""".strip() + + +def build_final_answer_context_payload( + *, + user_query: str, + intent: str, + response_format: str, + draft_answer: str | None, + retrieved_documents: list[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: list[str], +) -> dict[str, Any]: + """Build the prompt payload used for final answer composition.""" + return { + "user_query": user_query, + "intent": intent, + "response_format": response_format, + "draft_answer": draft_answer or None, + "warnings": warnings[:5], + "sql": sql, + "sql_results": (sql_results or [])[:8], + "row_count": len(sql_results or []), + "retrieved_context": [ + { + "source_type": document.source_type, + "source_identifier": document.source_identifier, + "content": compact_answer_snippet(document.content), + } + for document in retrieved_documents[:6] + ], + } + + +def compact_answer_snippet(content: str, max_length: int = 320) -> str: + """Trim retrieved context before feeding it into the final answer prompt.""" + normalized_content = " ".join(content.split()) + if len(normalized_content) <= max_length: + return normalized_content + return normalized_content[: max_length - 1].rstrip() + "…" + + +def format_table_summary_markdown(result: dict[str, Any]) -> str: + """Render a structured table summary into short markdown without any table body.""" + title = str(result.get("title") or "").strip() + summary = str(result.get("summary") or "").strip() + raw_key_points = result.get("key_points") or [] + key_points = [ + str(point).strip() + for point in raw_key_points + if isinstance(point, str) and point.strip() + ][:3] + + sections: list[str] = [] + if title: + sections.append(f"### {title}") + if summary: + sections.append(summary) + if key_points: + sections.append("\n".join(f"- {point}" for point in key_points)) + return "\n\n".join(section for section in sections if section).strip() + + +def serialize_final_answer_context_payload(payload: dict[str, Any]) -> str: + """Serialize final answer prompt context deterministically.""" + return json.dumps(payload, ensure_ascii=False) diff --git a/ddpui/core/dashboard_chat/llm_client.py b/ddpui/core/dashboard_chat/llm_client.py index 52e4acb64..d0e4b0494 100644 --- a/ddpui/core/dashboard_chat/llm_client.py +++ b/ddpui/core/dashboard_chat/llm_client.py @@ -1,23 +1,15 @@ -"""Direct OpenAI client wrapper for dashboard chat runtime.""" +"""LLM interface contract for dashboard chat.""" -import json -import logging -import os -from time import sleep from typing import Any, Protocol -from ddpui.core.dashboard_chat.prompt_store import DashboardChatPromptStore from ddpui.core.dashboard_chat.runtime_types import ( DashboardChatConversationContext, - DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, DashboardChatRetrievedDocument, ) from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey -logger = logging.getLogger("ddpui") - class DashboardChatLlmClient(Protocol): """LLM contract used by the dashboard chat LangGraph runtime.""" @@ -61,332 +53,3 @@ def compose_final_answer( warnings: list[str], ) -> str: """Compose the final user-facing markdown answer.""" - - -class OpenAIDashboardChatLlmClient: - """Direct OpenAI SDK adapter with JSON-mode helpers.""" - - TECHNICAL_DIFFICULTIES_MESSAGE = ( - "I'm experiencing technical difficulties. Please try again." - ) - TABLE_SUMMARY_JSON_INSTRUCTIONS = """ -For table-like responses, return valid JSON only with this shape: -{ - "title": "short heading or null", - "summary": "1-2 sentence narrative summary", - "key_points": ["short point", "short point"] -} - -Rules: -- Do not include markdown tables. -- Do not include pipe characters or ASCII table formatting. -- Do not repeat every row from the result set. -- The UI will render the structured table separately from sql_results. -- Keep key_points to at most 3 concise bullets. -""".strip() - - def __init__( - self, - api_key: str | None = None, - model: str = "gpt-4o-mini", - timeout_ms: int = 12000, - max_attempts: int = 1, - client: Any = None, - prompt_store: DashboardChatPromptStore | None = None, - ): - self.api_key = api_key or os.getenv("OPENAI_API_KEY") - self.model = model - self.timeout_ms = timeout_ms - self.max_attempts = max(1, max_attempts) - self.prompt_store = prompt_store or DashboardChatPromptStore() - self.usage_events: list[dict[str, Any]] = [] - if client is None: - if not self.api_key: - raise ValueError("OPENAI_API_KEY must be set for dashboard chat runtime") - from openai import OpenAI - - client = OpenAI( - api_key=self.api_key, - timeout=timeout_ms / 1000, - max_retries=0, - ) - self.client = client - - def reset_usage(self) -> None: - """Reset aggregated OpenAI usage before one new chat turn.""" - self.usage_events = [] - - def classify_intent( - self, - user_query: str, - conversation_context: DashboardChatConversationContext, - ) -> DashboardChatIntentDecision: - """Classify intent with prototype-style conversation awareness.""" - system_prompt = self.prompt_store.get( - DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION - ) - if conversation_context.last_sql_query or conversation_context.last_chart_ids: - system_prompt += ( - "\n\nCONVERSATION CONTEXT:\n" - f"- Previous SQL: {conversation_context.last_sql_query or 'None'}\n" - f"- Previous tables: {', '.join(conversation_context.last_tables_used) or 'None'}\n" - f"- Previous charts: {', '.join(conversation_context.last_chart_ids) or 'None'}\n" - f"- Last response type: {conversation_context.last_response_type or 'None'}\n\n" - "Use this context to detect follow-up queries that want to modify or expand on previous results." - ) - try: - result = self._complete_json( - operation="intent_classification", - system_prompt=system_prompt, - user_prompt=f"Classify this query: {user_query}", - ) - except Exception: - logger.exception("Dashboard chat intent classification failed") - return DashboardChatIntentDecision( - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - confidence=0.0, - reason="Intent classification failed", - clarification_question=self.TECHNICAL_DIFFICULTIES_MESSAGE, - ) - intent_value = result.get("intent", DashboardChatIntent.QUERY_WITHOUT_SQL.value) - try: - intent = DashboardChatIntent(intent_value) - except ValueError: - intent = DashboardChatIntent.QUERY_WITHOUT_SQL - follow_up_result = result.get("follow_up_context") or {} - follow_up_context = DashboardChatFollowUpContext( - is_follow_up=bool(follow_up_result.get("is_follow_up")), - follow_up_type=follow_up_result.get("follow_up_type"), - reusable_elements=follow_up_result.get("reusable_elements") or {}, - modification_instruction=follow_up_result.get("modification_instruction"), - ) - return DashboardChatIntentDecision( - intent=intent, - confidence=float(result.get("confidence") or 0.0), - reason=str(result.get("reason") or "LLM classification"), - missing_info=[str(item) for item in result.get("missing_info", []) if item], - force_tool_usage=bool( - result.get( - "force_tool_usage", - intent - in { - DashboardChatIntent.QUERY_WITH_SQL, - DashboardChatIntent.FOLLOW_UP_SQL, - }, - ) - ), - clarification_question=result.get("clarification_question"), - follow_up_context=follow_up_context, - ) - - def compose_small_talk(self, user_query: str) -> str: - """Generate a brief friendly response using the prototype capabilities prompt.""" - response = self._create_chat_completion( - messages=[ - { - "role": "system", - "content": self.prompt_store.get( - DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES - ), - }, - {"role": "user", "content": user_query}, - ], - temperature=0.5, - max_tokens=80, - ) - self._record_usage("small_talk", response) - answer = response.choices[0].message.content or "" - return answer.strip() - - def compose_final_answer( - self, - *, - user_query: str, - intent: DashboardChatIntent, - response_format: str, - draft_answer: str | None, - retrieved_documents: list[DashboardChatRetrievedDocument], - sql: str | None, - sql_results: list[dict[str, Any]] | None, - warnings: list[str], - ) -> str: - """Compose the final user-facing markdown answer from tool-loop outputs.""" - context_payload = { - "user_query": user_query, - "intent": intent.value, - "response_format": response_format, - "draft_answer": draft_answer or None, - "warnings": warnings[:5], - "sql": sql, - "sql_results": (sql_results or [])[:8], - "row_count": len(sql_results or []), - "retrieved_context": [ - { - "source_type": document.source_type, - "source_identifier": document.source_identifier, - "content": self._compact_snippet(document.content), - } - for document in retrieved_documents[:6] - ], - } - if response_format in {"text_with_table", "table"}: - result = self._complete_json( - operation="final_answer_table_summary", - system_prompt=( - self.prompt_store.get( - DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION - ) - + "\n\n" - + self.TABLE_SUMMARY_JSON_INSTRUCTIONS - ), - user_prompt=json.dumps(context_payload, ensure_ascii=False), - ) - return self._format_table_summary_markdown(result) - - response = self._create_chat_completion( - messages=[ - { - "role": "system", - "content": self.prompt_store.get( - DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION - ), - }, - { - "role": "user", - "content": json.dumps(context_payload, ensure_ascii=False), - }, - ], - temperature=0.1, - max_tokens=400, - ) - self._record_usage("final_answer_composition", response) - answer = response.choices[0].message.content or "" - return answer.strip() - - def get_prompt(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: - """Return one stored dashboard chat prompt.""" - return self.prompt_store.get(prompt_key) - - def run_tool_loop_turn( - self, - *, - messages: list[dict[str, Any]], - tools: list[dict[str, Any]], - tool_choice: str, - operation: str, - ) -> dict[str, Any]: - """Run one raw OpenAI tool-calling turn and normalize the response.""" - try: - response = self._create_chat_completion( - messages=messages, - tools=tools, - tool_choice=tool_choice, - temperature=0, - ) - except Exception: - return {"content": self.TECHNICAL_DIFFICULTIES_MESSAGE, "tool_calls": []} - self._record_usage(operation, response) - message = response.choices[0].message - tool_calls: list[dict[str, Any]] = [] - if message.tool_calls: - for tool_call in message.tool_calls: - tool_calls.append( - { - "id": tool_call.id, - "name": tool_call.function.name, - "args": tool_call.function.arguments, - } - ) - return {"content": message.content or "", "tool_calls": tool_calls} - - def _complete_json(self, operation: str, system_prompt: str, user_prompt: str) -> dict[str, Any]: - """Run a JSON-mode chat completion and parse the result.""" - response = self._create_chat_completion( - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ], - temperature=0, - response_format={"type": "json_object"}, - ) - self._record_usage(operation, response) - content = response.choices[0].message.content or "{}" - return json.loads(content) - - def usage_summary(self) -> dict[str, Any]: - """Return aggregated OpenAI chat-completion usage for the current turn.""" - totals = { - "prompt_tokens": 0, - "completion_tokens": 0, - "total_tokens": 0, - } - for event in self.usage_events: - totals["prompt_tokens"] += event.get("prompt_tokens", 0) - totals["completion_tokens"] += event.get("completion_tokens", 0) - totals["total_tokens"] += event.get("total_tokens", 0) - return { - "model": self.model, - "calls": list(self.usage_events), - "totals": totals, - } - - def _record_usage(self, operation: str, response: Any) -> None: - """Capture usage data from one OpenAI response when available.""" - usage = getattr(response, "usage", None) - if usage is None: - return - self.usage_events.append( - { - "operation": operation, - "model": self.model, - "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, - "completion_tokens": getattr(usage, "completion_tokens", 0) or 0, - "total_tokens": getattr(usage, "total_tokens", 0) or 0, - } - ) - - def _create_chat_completion(self, **kwargs: Any) -> Any: - """Run one OpenAI chat completion with a small interactive retry envelope.""" - last_error: Exception | None = None - for attempt in range(self.max_attempts): - try: - return self.client.chat.completions.create( - model=self.model, - **kwargs, - ) - except Exception as error: - last_error = error - if attempt == self.max_attempts - 1: - break - sleep(min(2**attempt, 2)) - assert last_error is not None - raise last_error - - @staticmethod - def _compact_snippet(content: str, max_length: int = 320) -> str: - """Trim retrieved context before feeding it into the final answer prompt.""" - normalized_content = " ".join(content.split()) - if len(normalized_content) <= max_length: - return normalized_content - return normalized_content[: max_length - 1].rstrip() + "…" - - @staticmethod - def _format_table_summary_markdown(result: dict[str, Any]) -> str: - """Render a structured table summary into short markdown without any table body.""" - title = str(result.get("title") or "").strip() - summary = str(result.get("summary") or "").strip() - raw_key_points = result.get("key_points") or [] - key_points = [ - str(point).strip() - for point in raw_key_points - if isinstance(point, str) and point.strip() - ][:3] - - sections: list[str] = [] - if title: - sections.append(f"### {title}") - if summary: - sections.append(summary) - if key_points: - sections.append("\n".join(f"- {point}" for point in key_points)) - return "\n\n".join(section for section in sections if section).strip() diff --git a/ddpui/core/dashboard_chat/openai_llm_client.py b/ddpui/core/dashboard_chat/openai_llm_client.py new file mode 100644 index 000000000..23639512f --- /dev/null +++ b/ddpui/core/dashboard_chat/openai_llm_client.py @@ -0,0 +1,303 @@ +"""OpenAI-backed dashboard chat LLM adapter.""" + +import json +import logging +import os +from time import sleep +from typing import Any + +from openai import OpenAI + +from ddpui.core.dashboard_chat.llm_answer_formatting import ( + TABLE_SUMMARY_JSON_INSTRUCTIONS, + build_final_answer_context_payload, + format_table_summary_markdown, + serialize_final_answer_context_payload, +) +from ddpui.core.dashboard_chat.prompt_store import DashboardChatPromptStore +from ddpui.core.dashboard_chat.runtime_types import ( + DashboardChatConversationContext, + DashboardChatFollowUpContext, + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatRetrievedDocument, +) +from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey +from ddpui.utils.openai_client import get_shared_openai_client + +logger = logging.getLogger("ddpui") + + +class OpenAIDashboardChatLlmClient: + """Direct OpenAI SDK adapter with JSON-mode helpers.""" + + TECHNICAL_DIFFICULTIES_MESSAGE = ( + "I'm experiencing technical difficulties. Please try again." + ) + + def __init__( + self, + api_key: str | None = None, + model: str = "gpt-4o-mini", + timeout_ms: int = 12000, + max_attempts: int = 1, + client: OpenAI | None = None, + prompt_store: DashboardChatPromptStore | None = None, + ): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.model = model + self.timeout_ms = timeout_ms + self.max_attempts = max(1, max_attempts) + self.prompt_store = prompt_store or DashboardChatPromptStore() + self.usage_events: list[dict[str, Any]] = [] + if client is None: + if not self.api_key: + raise ValueError("OPENAI_API_KEY must be set for dashboard chat runtime") + client = get_shared_openai_client( + self.api_key, + timeout_seconds=timeout_ms / 1000, + max_retries=0, + ) + self.client = client + + def reset_usage(self) -> None: + """Reset aggregated OpenAI usage before one new chat turn.""" + self.usage_events = [] + + def classify_intent( + self, + user_query: str, + conversation_context: DashboardChatConversationContext, + ) -> DashboardChatIntentDecision: + """Classify intent with prototype-style conversation awareness.""" + system_prompt = self.prompt_store.get( + DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION + ) + if conversation_context.last_sql_query or conversation_context.last_chart_ids: + system_prompt += ( + "\n\nCONVERSATION CONTEXT:\n" + f"- Previous SQL: {conversation_context.last_sql_query or 'None'}\n" + f"- Previous tables: {', '.join(conversation_context.last_tables_used) or 'None'}\n" + f"- Previous charts: {', '.join(conversation_context.last_chart_ids) or 'None'}\n" + f"- Last response type: {conversation_context.last_response_type or 'None'}\n\n" + "Use this context to detect follow-up queries that want to modify or expand on previous results." + ) + try: + result = self._complete_json( + operation="intent_classification", + system_prompt=system_prompt, + user_prompt=f"Classify this query: {user_query}", + ) + except Exception: + logger.exception("Dashboard chat intent classification failed") + return DashboardChatIntentDecision( + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + confidence=0.0, + reason="Intent classification failed", + clarification_question=self.TECHNICAL_DIFFICULTIES_MESSAGE, + ) + intent_value = result.get("intent", DashboardChatIntent.QUERY_WITHOUT_SQL.value) + try: + intent = DashboardChatIntent(intent_value) + except ValueError: + intent = DashboardChatIntent.QUERY_WITHOUT_SQL + follow_up_result = result.get("follow_up_context") or {} + follow_up_context = DashboardChatFollowUpContext( + is_follow_up=bool(follow_up_result.get("is_follow_up")), + follow_up_type=follow_up_result.get("follow_up_type"), + reusable_elements=follow_up_result.get("reusable_elements") or {}, + modification_instruction=follow_up_result.get("modification_instruction"), + ) + return DashboardChatIntentDecision( + intent=intent, + confidence=float(result.get("confidence") or 0.0), + reason=str(result.get("reason") or "LLM classification"), + missing_info=[str(item) for item in result.get("missing_info", []) if item], + force_tool_usage=bool( + result.get( + "force_tool_usage", + intent + in { + DashboardChatIntent.QUERY_WITH_SQL, + DashboardChatIntent.FOLLOW_UP_SQL, + }, + ) + ), + clarification_question=result.get("clarification_question"), + follow_up_context=follow_up_context, + ) + + def compose_small_talk(self, user_query: str) -> str: + """Generate a brief friendly response using the prototype capabilities prompt.""" + response = self._create_chat_completion( + messages=[ + { + "role": "system", + "content": self.prompt_store.get( + DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES + ), + }, + {"role": "user", "content": user_query}, + ], + temperature=0.5, + max_tokens=80, + ) + self._record_usage("small_talk", response) + answer = response.choices[0].message.content or "" + return answer.strip() + + def compose_final_answer( + self, + *, + user_query: str, + intent: DashboardChatIntent, + response_format: str, + draft_answer: str | None, + retrieved_documents: list[DashboardChatRetrievedDocument], + sql: str | None, + sql_results: list[dict[str, Any]] | None, + warnings: list[str], + ) -> str: + """Compose the final user-facing markdown answer from tool-loop outputs.""" + context_payload = build_final_answer_context_payload( + user_query=user_query, + intent=intent.value, + response_format=response_format, + draft_answer=draft_answer, + retrieved_documents=retrieved_documents, + sql=sql, + sql_results=sql_results, + warnings=warnings, + ) + if response_format in {"text_with_table", "table"}: + result = self._complete_json( + operation="final_answer_table_summary", + system_prompt=( + self.prompt_store.get( + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION + ) + + "\n\n" + + TABLE_SUMMARY_JSON_INSTRUCTIONS + ), + user_prompt=serialize_final_answer_context_payload(context_payload), + ) + return format_table_summary_markdown(result) + + response = self._create_chat_completion( + messages=[ + { + "role": "system", + "content": self.prompt_store.get( + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION + ), + }, + { + "role": "user", + "content": serialize_final_answer_context_payload(context_payload), + }, + ], + temperature=0.1, + max_tokens=400, + ) + self._record_usage("final_answer_composition", response) + answer = response.choices[0].message.content or "" + return answer.strip() + + def get_prompt(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: + """Return one stored dashboard chat prompt.""" + return self.prompt_store.get(prompt_key) + + def run_tool_loop_turn( + self, + *, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + tool_choice: str, + operation: str, + ) -> dict[str, Any]: + """Run one raw OpenAI tool-calling turn and normalize the response.""" + try: + response = self._create_chat_completion( + messages=messages, + tools=tools, + tool_choice=tool_choice, + temperature=0, + ) + except Exception: + return {"content": self.TECHNICAL_DIFFICULTIES_MESSAGE, "tool_calls": []} + self._record_usage(operation, response) + message = response.choices[0].message + tool_calls: list[dict[str, Any]] = [] + if message.tool_calls: + for tool_call in message.tool_calls: + tool_calls.append( + { + "id": tool_call.id, + "name": tool_call.function.name, + "args": tool_call.function.arguments, + } + ) + return {"content": message.content or "", "tool_calls": tool_calls} + + def _complete_json(self, operation: str, system_prompt: str, user_prompt: str) -> dict[str, Any]: + """Run a JSON-mode chat completion and parse the result.""" + response = self._create_chat_completion( + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0, + response_format={"type": "json_object"}, + ) + self._record_usage(operation, response) + content = response.choices[0].message.content or "{}" + return json.loads(content) + + def usage_summary(self) -> dict[str, Any]: + """Return aggregated OpenAI chat-completion usage for the current turn.""" + totals = { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + } + for event in self.usage_events: + totals["prompt_tokens"] += event.get("prompt_tokens", 0) + totals["completion_tokens"] += event.get("completion_tokens", 0) + totals["total_tokens"] += event.get("total_tokens", 0) + return { + "model": self.model, + "calls": list(self.usage_events), + "totals": totals, + } + + def _record_usage(self, operation: str, response: Any) -> None: + """Capture usage data from one OpenAI response when available.""" + usage = getattr(response, "usage", None) + if usage is None: + return + self.usage_events.append( + { + "operation": operation, + "model": self.model, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "completion_tokens": getattr(usage, "completion_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + ) + + def _create_chat_completion(self, **kwargs: Any) -> Any: + """Run one OpenAI chat completion with a small interactive retry envelope.""" + last_error: Exception | None = None + for attempt in range(self.max_attempts): + try: + return self.client.chat.completions.create( + model=self.model, + **kwargs, + ) + except Exception as error: + last_error = error + if attempt == self.max_attempts - 1: + break + sleep(min(2**attempt, 2)) + assert last_error is not None + raise last_error diff --git a/ddpui/core/dashboard_chat/runtime.py b/ddpui/core/dashboard_chat/runtime.py deleted file mode 100644 index fb783a097..000000000 --- a/ddpui/core/dashboard_chat/runtime.py +++ /dev/null @@ -1,3010 +0,0 @@ -"""Prototype-faithful LangGraph runtime for dashboard chat orchestration.""" - -from collections.abc import Callable, Sequence -import json -import logging -import re -from typing import Any, TypedDict - -from django.core.cache import cache -from django.core.serializers.json import DjangoJSONEncoder -from django.db import close_old_connections, connections -from langgraph.graph import END, START, StateGraph - -from ddpui.core.dashboard_chat.allowlist import ( - DashboardChatAllowlist, - DashboardChatAllowlistBuilder, - build_dashboard_chat_table_name, - normalize_dashboard_chat_table_name, -) -from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig -from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.llm_client import ( - DashboardChatLlmClient, - OpenAIDashboardChatLlmClient, -) -from ddpui.core.dashboard_chat.session_cache import ( - DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, - build_dashboard_chat_session_snapshot_cache_key, - deserialize_allowlist, - deserialize_distinct_cache, - deserialize_schema_snippets, - serialize_allowlist, - serialize_distinct_cache, - serialize_schema_snippets, -) -from ddpui.core.dashboard_chat.runtime_types import ( - DashboardChatCitation, - DashboardChatConversationContext, - DashboardChatConversationMessage, - DashboardChatIntent, - DashboardChatIntentDecision, - DashboardChatResponse, - DashboardChatRetrievedDocument, - DashboardChatSchemaSnippet, - DashboardChatSqlValidationResult, -) -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore -from ddpui.core.dashboard_chat.warehouse_tools import ( - DashboardChatWarehouseTools, - DashboardChatWarehouseToolsError, -) -from ddpui.models.dashboard_chat import ( - DashboardChatPromptTemplateKey, -) -from ddpui.models.org import Org -from ddpui.services.dashboard_service import DashboardService - -logger = logging.getLogger(__name__) -GREETING_PATTERN = re.compile( - r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you)\b[\s!.?]*$", - re.IGNORECASE, -) - - -class DashboardChatRuntimeState(TypedDict, total=False): - """LangGraph state for one dashboard chat turn.""" - - org: Org - dashboard_id: int - session_id: str | None - vector_collection_name: str | None - user_query: str - conversation_history: list[DashboardChatConversationMessage] - conversation_context: DashboardChatConversationContext - small_talk_response: str | None - dashboard_export: dict[str, Any] - dbt_index: dict[str, Any] - allowlist: DashboardChatAllowlist - session_schema_cache: dict[str, DashboardChatSchemaSnippet] - session_distinct_cache: set[tuple[str, str, str]] - intent_decision: DashboardChatIntentDecision - retrieved_documents: list[DashboardChatRetrievedDocument] - citations: list[DashboardChatCitation] - tool_calls: list[dict[str, Any]] - sql: str | None - sql_validation: DashboardChatSqlValidationResult | None - sql_results: list[dict[str, Any]] | None - warnings: list[str] - usage: dict[str, Any] - response: DashboardChatResponse - - -class DashboardChatRuntime: - """Run dashboard chat turns with the prototype's explicit intent routing and tool loop.""" - - TOOL_SPECIFICATIONS = [ - { - "type": "function", - "function": { - "name": "retrieve_docs", - "description": "Search for relevant charts, datasets, dbt models, or context sections.", - "parameters": { - "type": "object", - "properties": { - "query": {"type": "string", "description": "Search query"}, - "types": { - "type": "array", - "items": { - "type": "string", - "enum": ["chart", "dataset", "context", "dbt_model"], - }, - "description": "Document types to search", - }, - "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, - }, - "required": ["query"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "get_schema_snippets", - "description": "Get column information for database tables.", - "parameters": { - "type": "object", - "properties": { - "tables": { - "type": "array", - "items": {"type": "string"}, - "description": "Fully-qualified table names (schema.table)", - } - }, - "required": ["tables"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "search_dbt_models", - "description": "Search dbt models by keyword to find relevant data models.", - "parameters": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Search query for model names/descriptions", - }, - "limit": {"type": "integer", "minimum": 1, "maximum": 20, "default": 8}, - }, - "required": ["query"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "get_dbt_model_info", - "description": "Get detailed information about a specific dbt model.", - "parameters": { - "type": "object", - "properties": { - "model_name": { - "type": "string", - "description": "Model name or schema.table", - } - }, - "required": ["model_name"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "get_distinct_values", - "description": "Get distinct values for a column (required before filtering on text columns).", - "parameters": { - "type": "object", - "properties": { - "table": { - "type": "string", - "description": "Fully-qualified table name", - }, - "column": {"type": "string", "description": "Column name"}, - "limit": {"type": "integer", "minimum": 1, "maximum": 200, "default": 50}, - }, - "required": ["table", "column"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "run_sql_query", - "description": "Execute a read-only SQL query on the database.", - "parameters": { - "type": "object", - "properties": { - "sql": {"type": "string", "description": "SELECT query to execute"} - }, - "required": ["sql"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "list_tables_by_keyword", - "description": "Find tables whose name or columns match a keyword (no hard-coding).", - "parameters": { - "type": "object", - "properties": { - "keyword": { - "type": "string", - "description": "Keyword such as donor, funding, student", - }, - "limit": {"type": "integer", "minimum": 1, "maximum": 50, "default": 15}, - }, - "required": ["keyword"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "check_table_row_count", - "description": "Get the total number of rows in a table to check if it has data.", - "parameters": { - "type": "object", - "properties": { - "table": { - "type": "string", - "description": "Fully-qualified table name (schema.table)", - } - }, - "required": ["table"], - }, - }, - }, - ] - - def __init__( - self, - vector_store: ChromaDashboardChatVectorStore | None = None, - llm_client: DashboardChatLlmClient | None = None, - warehouse_tools_factory: Callable[[Org], DashboardChatWarehouseTools] | None = None, - runtime_config: DashboardChatRuntimeConfig | None = None, - source_config: DashboardChatSourceConfig | None = None, - ): - self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() - self.source_config = source_config or DashboardChatSourceConfig.from_env() - self.vector_store = vector_store or ChromaDashboardChatVectorStore() - self.llm_client = llm_client or OpenAIDashboardChatLlmClient( - model=self.runtime_config.llm_model, - timeout_ms=self.runtime_config.llm_timeout_ms, - max_attempts=self.runtime_config.llm_max_attempts, - ) - self.warehouse_tools_factory = warehouse_tools_factory or ( - lambda org: DashboardChatWarehouseTools( - org=org, - max_rows=self.runtime_config.max_query_rows, - ) - ) - self.graph = self._build_graph() - - def run( - self, - org: Org, - dashboard_id: int, - user_query: str, - session_id: str | None = None, - vector_collection_name: str | None = None, - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] - | None = None, - ) -> DashboardChatResponse: - """Run one dashboard chat turn.""" - if hasattr(self.llm_client, "reset_usage"): - self.llm_client.reset_usage() - if hasattr(self.vector_store, "reset_usage"): - self.vector_store.reset_usage() - initial_state: DashboardChatRuntimeState = { - "org": org, - "dashboard_id": dashboard_id, - "session_id": session_id, - "vector_collection_name": vector_collection_name, - "user_query": user_query, - "conversation_history": self._normalize_conversation_history(conversation_history), - "warnings": [], - "usage": {}, - } - final_state = self.graph.invoke(initial_state) - return final_state["response"] - - def _build_graph(self): - """Build the explicit prototype-aligned intent graph.""" - graph = StateGraph(DashboardChatRuntimeState) - graph.add_node("load_context", self._wrap_node(self._node_load_context)) - graph.add_node("route_intent", self._wrap_node(self._node_route_intent)) - graph.add_node("handle_small_talk", self._wrap_node(self._node_handle_small_talk)) - graph.add_node("handle_irrelevant", self._wrap_node(self._node_handle_irrelevant)) - graph.add_node( - "handle_needs_clarification", - self._wrap_node(self._node_handle_needs_clarification), - ) - graph.add_node("handle_query_with_sql", self._wrap_node(self._node_handle_query_with_sql)) - graph.add_node( - "handle_query_without_sql", - self._wrap_node(self._node_handle_query_without_sql), - ) - graph.add_node("handle_follow_up_sql", self._wrap_node(self._node_handle_follow_up_sql)) - graph.add_node( - "handle_follow_up_context", - self._wrap_node(self._node_handle_follow_up_context), - ) - graph.add_node("finalize", self._wrap_node(self._node_finalize_response)) - - graph.add_edge(START, "load_context") - graph.add_edge("load_context", "route_intent") - graph.add_conditional_edges( - "route_intent", - self._route_after_intent, - { - DashboardChatIntent.SMALL_TALK.value: "handle_small_talk", - DashboardChatIntent.IRRELEVANT.value: "handle_irrelevant", - DashboardChatIntent.NEEDS_CLARIFICATION.value: "handle_needs_clarification", - DashboardChatIntent.QUERY_WITH_SQL.value: "handle_query_with_sql", - DashboardChatIntent.QUERY_WITHOUT_SQL.value: "handle_query_without_sql", - DashboardChatIntent.FOLLOW_UP_SQL.value: "handle_follow_up_sql", - DashboardChatIntent.FOLLOW_UP_CONTEXT.value: "handle_follow_up_context", - }, - ) - graph.add_edge("handle_small_talk", "finalize") - graph.add_edge("handle_irrelevant", "finalize") - graph.add_edge("handle_needs_clarification", "finalize") - graph.add_edge("handle_query_with_sql", "finalize") - graph.add_edge("handle_query_without_sql", "finalize") - graph.add_edge("handle_follow_up_sql", "finalize") - graph.add_edge("handle_follow_up_context", "finalize") - graph.add_edge("finalize", END) - return graph.compile() - - @staticmethod - def _wrap_node(handler: Callable[[DashboardChatRuntimeState], DashboardChatRuntimeState]): - """Run each LangGraph node with thread-local Django DB cleanup.""" - - def wrapped(state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - close_old_connections() - try: - return handler(state) - finally: - connections.close_all() - - return wrapped - - def _node_load_context(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Load or reuse the session-stable dashboard context snapshot.""" - snapshot = self._load_session_snapshot(state) - state["dashboard_export"] = snapshot["dashboard_export"] - state["dbt_index"] = snapshot["dbt_index"] - state["allowlist"] = snapshot["allowlist"] - state["session_schema_cache"] = snapshot["schema_cache"] - state["session_distinct_cache"] = snapshot["distinct_cache"] - return state - - def _node_route_intent(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Use the prototype router prompt for all non-trivial routing.""" - conversation_context = self._extract_conversation_context(state["conversation_history"]) - fast_path_intent = self._build_fast_path_intent(state["user_query"]) - if fast_path_intent is not None: - state["conversation_context"] = conversation_context - state["intent_decision"] = fast_path_intent - state["small_talk_response"] = self._build_fast_path_small_talk_response( - state["user_query"] - ) - return state - intent_decision = self.llm_client.classify_intent( - user_query=state["user_query"], - conversation_context=conversation_context, - ) - state["conversation_context"] = conversation_context - state["intent_decision"] = intent_decision - return state - - def _node_handle_small_talk( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Handle simple social turns without any tool use.""" - state["response"] = DashboardChatResponse( - answer_text=state.get("small_talk_response") - or self._compose_small_talk_response(state["user_query"]), - intent=DashboardChatIntent.SMALL_TALK, - usage=self._build_usage_summary(), - ) - return state - - def _node_handle_irrelevant( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Handle questions outside dashboard chat scope.""" - state["response"] = DashboardChatResponse( - answer_text=( - "I can only answer questions about this dashboard, its charts, and the data behind them." - ), - intent=DashboardChatIntent.IRRELEVANT, - usage=self._build_usage_summary(), - ) - return state - - def _node_handle_needs_clarification( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Ask for clarification when the router says the query is underspecified.""" - intent_decision = state["intent_decision"] - state["response"] = DashboardChatResponse( - answer_text=( - intent_decision.clarification_question - or self._clarification_fallback(intent_decision.missing_info) - ), - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - usage=self._build_usage_summary(), - ) - return state - - def _node_handle_query_with_sql( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Run the prototype new-query tool loop for SQL-routed questions.""" - return self._run_prototype_intent(state, max_turns=15, follow_up=False) - - def _node_handle_query_without_sql( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Run the prototype new-query tool loop for context-only questions.""" - return self._run_prototype_intent(state, max_turns=15, follow_up=False) - - def _node_handle_follow_up_sql( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Run the prototype follow-up loop for SQL-modifying turns.""" - return self._run_prototype_intent(state, max_turns=6, follow_up=True) - - def _node_handle_follow_up_context( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Run the prototype follow-up loop for explanatory follow-ups.""" - return self._run_prototype_intent(state, max_turns=6, follow_up=True) - - def _run_prototype_intent( - self, - state: DashboardChatRuntimeState, - *, - max_turns: int, - follow_up: bool, - ) -> DashboardChatRuntimeState: - """Execute one prototype-style tool loop and store the response on state.""" - allowlist = state["allowlist"] - - query_embedding = self._embed_query( - state["user_query"], - embedding_cache={}, - ) - - messages = ( - self._build_follow_up_messages(state) - if follow_up - else self._build_new_query_messages(state) - ) - execution_result = self._execute_tool_loop( - state=state, - messages=messages, - max_turns=max_turns, - initial_embedding_cache={state["user_query"]: query_embedding}, - ) - - state["retrieved_documents"] = execution_result["retrieved_documents"] - state["citations"] = self._build_citations( - retrieved_documents=execution_result["retrieved_documents"], - dashboard_export=state["dashboard_export"], - allowlist=allowlist, - ) - state["tool_calls"] = execution_result["tool_calls"] - state["sql"] = execution_result["sql"] - state["sql_validation"] = execution_result["sql_validation"] - state["sql_results"] = execution_result["sql_results"] - state["warnings"] = execution_result["warnings"] - response_format = self._determine_response_format( - user_query=state["user_query"], - sql_results=execution_result["sql_results"], - ) - state["response"] = DashboardChatResponse( - answer_text=self._compose_final_answer_text( - state, - execution_result, - response_format=response_format, - ), - intent=state["intent_decision"].intent, - citations=state["citations"], - warnings=execution_result["warnings"], - sql=execution_result["sql"], - sql_results=execution_result["sql_results"], - usage=self._build_usage_summary(), - tool_calls=execution_result["tool_calls"], - metadata={ - "response_format": response_format, - "table_columns": self._sql_result_columns(execution_result["sql_results"]), - }, - ) - return state - - def _build_new_query_messages( - self, - state: DashboardChatRuntimeState, - ) -> list[dict[str, Any]]: - """Build the prototype new-query message stack.""" - system_prompt = self.llm_client.get_prompt( - DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM - ) - return [ - { - "role": "system", - "content": system_prompt, - }, - {"role": "user", "content": state["user_query"]}, - ] - - def _build_follow_up_messages( - self, - state: DashboardChatRuntimeState, - ) -> list[dict[str, Any]]: - """Build the prototype follow-up message stack.""" - modification_type = self._detect_sql_modification_type(state["user_query"]) - system_prompt = self.llm_client.get_prompt( - DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM - ) - return [ - { - "role": "system", - "content": system_prompt, - }, - { - "role": "system", - "content": self._build_follow_up_context_prompt( - state["conversation_context"], - state["user_query"], - ), - }, - {"role": "system", "content": f"MODIFICATION_TYPE: {modification_type}"}, - {"role": "user", "content": state["user_query"]}, - ] - - def _execute_tool_loop( - self, - *, - state: DashboardChatRuntimeState, - messages: list[dict[str, Any]], - max_turns: int, - initial_embedding_cache: dict[str, list[float]] | None = None, - ) -> dict[str, Any]: - """Execute the prototype's iterative tool loop.""" - execution_context: dict[str, Any] = { - "distinct_cache": set(state.get("session_distinct_cache") or set()), - "embedding_cache": dict(initial_embedding_cache or {}), - "schema_cache": dict(state.get("session_schema_cache") or {}), - "retrieved_documents": [], - "retrieved_document_ids": set(), - "tool_calls": [], - "warnings": list(state.get("warnings", [])), - "warehouse_tools": None, - "last_sql": None, - "last_sql_results": None, - "last_sql_validation": None, - } - self._seed_distinct_cache_from_previous_sql(state, execution_context) - intent_decision = state["intent_decision"] - - for turn_index in range(max_turns): - tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" - ai_message = self.llm_client.run_tool_loop_turn( - messages=messages, - tools=self.TOOL_SPECIFICATIONS, - tool_choice=tool_choice, - operation=f"tool_loop_{intent_decision.intent.value}", - ) - tool_calls = ai_message.get("tool_calls") or [] - assistant_record: dict[str, Any] = { - "role": "assistant", - "content": ai_message.get("content", "") or "", - } - if tool_calls: - assistant_record["tool_calls"] = [ - { - "id": tool_call.get("id"), - "type": "function", - "function": { - "name": tool_call.get("name"), - "arguments": ( - tool_call.get("args") - if isinstance(tool_call.get("args"), str) - else json.dumps(tool_call.get("args") or {}) - ), - }, - } - for tool_call in tool_calls - ] - messages.append(assistant_record) - - if not tool_calls: - return self._build_execution_result( - answer_text=( - (ai_message.get("content") or "").strip() - or self._fallback_answer_text( - execution_context["retrieved_documents"], - execution_context["last_sql_results"], - ) - ), - execution_context=execution_context, - max_turns_reached=False, - ) - - for tool_call in tool_calls: - raw_args = tool_call.get("args") or {} - args = raw_args - if isinstance(raw_args, str): - try: - args = json.loads(raw_args) - except json.JSONDecodeError: - args = {} - result = self._execute_tool( - tool_name=str(tool_call.get("name") or ""), - args=args, - state=state, - execution_context=execution_context, - ) - execution_context["tool_calls"].append( - self._summarize_tool_call( - tool_name=str(tool_call.get("name") or ""), - args=args, - result=result, - ) - ) - messages.append( - { - "role": "tool", - "tool_call_id": tool_call.get("id"), - "content": json.dumps( - self._serialize_tool_result(result), - cls=DjangoJSONEncoder, - ), - } - ) - if str(tool_call.get("name") or "") == "run_sql_query" and result.get("success"): - return self._build_execution_result( - answer_text="", - execution_context=execution_context, - max_turns_reached=False, - ) - - return self._build_execution_result( - answer_text=self._max_turns_message( - state["user_query"], - execution_context["retrieved_documents"], - ), - execution_context=execution_context, - max_turns_reached=True, - ) - - def _execute_tool( - self, - *, - tool_name: str, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Execute one prototype tool against the Dalgo runtime primitives.""" - try: - if tool_name == "retrieve_docs": - return self._tool_retrieve_docs(args, state, execution_context) - if tool_name == "get_schema_snippets": - return self._tool_get_schema_snippets(args, state, execution_context) - if tool_name == "search_dbt_models": - return self._tool_search_dbt_models(args, state, execution_context) - if tool_name == "get_dbt_model_info": - return self._tool_get_dbt_model_info(args, state, execution_context) - if tool_name == "get_distinct_values": - return self._tool_get_distinct_values(args, state, execution_context) - if tool_name == "run_sql_query": - return self._run_sql_with_distinct_guard(args, state, execution_context) - if tool_name == "list_tables_by_keyword": - return self._tool_list_tables_by_keyword(args, state, execution_context) - if tool_name == "check_table_row_count": - return self._tool_check_table_row_count(args, state, execution_context) - return {"error": f"Unknown tool: {tool_name}"} - except DashboardChatWarehouseToolsError as error: - logger.warning("Dashboard chat tool %s failed: %s", tool_name, error) - execution_context["warnings"].append(str(error)) - return {"error": str(error)} - except Exception as error: - logger.exception("Dashboard chat tool %s failed", tool_name) - execution_context["warnings"].append(str(error)) - return {"error": str(error)} - - def _tool_retrieve_docs( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Retrieve current-dashboard, org, and dbt context using the prototype tool contract.""" - query = str(args.get("query") or state["user_query"]).strip() - limit = max(1, min(int(args.get("limit", 8)), 20)) - requested_types = [ - str(doc_type) - for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) - ] - retrieved_documents: list[DashboardChatRetrievedDocument] = [] - - if "chart" in requested_types: - retrieved_documents.extend( - self._query_vector_store( - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=self.source_config.filter_enabled( - [DashboardChatSourceType.DASHBOARD_EXPORT.value] - ), - dashboard_id=state["dashboard_id"], - query_embedding=self._embed_query(query, execution_context["embedding_cache"]), - ) - ) - if "context" in requested_types: - retrieved_documents.extend( - self._query_vector_store( - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=self.source_config.filter_enabled( - [DashboardChatSourceType.DASHBOARD_CONTEXT.value] - ), - dashboard_id=state["dashboard_id"], - query_embedding=self._embed_query(query, execution_context["embedding_cache"]), - ) - ) - retrieved_documents.extend( - self._query_vector_store( - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=self.source_config.filter_enabled( - [DashboardChatSourceType.ORG_CONTEXT.value] - ), - query_embedding=self._embed_query(query, execution_context["embedding_cache"]), - ) - ) - if "dataset" in requested_types or "dbt_model" in requested_types: - dbt_results = self._query_vector_store( - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=self.source_config.filter_enabled( - [ - DashboardChatSourceType.DBT_MANIFEST.value, - DashboardChatSourceType.DBT_CATALOG.value, - ] - ), - query_embedding=self._embed_query(query, execution_context["embedding_cache"]), - ) - retrieved_documents.extend( - self._filter_allowlisted_dbt_results(dbt_results, state["allowlist"]) - ) - - merged_results = self._dedupe_retrieved_documents(retrieved_documents)[:limit] - for document in merged_results: - if document.document_id in execution_context["retrieved_document_ids"]: - continue - execution_context["retrieved_document_ids"].add(document.document_id) - execution_context["retrieved_documents"].append(document) - - docs = [ - self._tool_document_payload( - document, - state["allowlist"], - state["dashboard_export"], - ) - for document in merged_results - ] - return {"docs": docs, "count": len(docs)} - - def _tool_get_schema_snippets( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Return schema snippets for allowlisted tables only.""" - requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] - allowed_tables = [ - table_name - for table_name in requested_tables - if state["allowlist"].is_allowed(table_name) - ] - filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) - schema_cache = self._schema_cache( - state, - execution_context, - tables=allowed_tables, - ) - tables_payload = [ - {"table": table_name, "columns": snippet.columns} - for table_name, snippet in schema_cache.items() - if table_name in allowed_tables - ] - response: dict[str, Any] = {"tables": tables_payload} - if filtered_tables: - response["filtered_tables"] = filtered_tables - response["filter_note"] = ( - f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." - ) - return response - - def _tool_search_dbt_models( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Search allowlisted dbt nodes by name, description, and column metadata.""" - query = str(args.get("query") or "").strip().lower() - limit = max(1, min(int(args.get("limit", 8)), 20)) - if not query: - return {"models": [], "count": 0} - - results: list[dict[str, Any]] = [] - for node in self._dbt_resources_by_unique_id(state).values(): - table_name = node.get("table") - haystacks = [ - str(node.get("name") or ""), - str(node.get("description") or ""), - str(table_name or ""), - ] - for column in node.get("columns") or []: - haystacks.append(str(column.get("name") or "")) - haystacks.append(str(column.get("description") or "")) - if query not in " ".join(haystacks).lower(): - continue - results.append( - { - "name": str(node.get("name") or ""), - "schema": str(node.get("schema") or ""), - "database": str(node.get("database") or ""), - "description": str(node.get("description") or ""), - "columns": [ - str(column.get("name") or "") - for column in (node.get("columns") or []) - ][:20], - "table": table_name, - } - ) - if len(results) >= limit: - break - - return {"models": results, "count": len(results)} - - def _tool_get_dbt_model_info( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Return one dbt model's description, columns, and lineage.""" - model_name = str(args.get("model_name") or "").strip().lower() - if not model_name: - return {"error": "model_name is required"} - - matched_unique_id: str | None = None - matched_node: dict[str, Any] | None = None - for unique_id, node in self._dbt_resources_by_unique_id(state).items(): - table_name = node.get("table") - candidates = { - str(node.get("name") or "").lower(), - str(table_name or "").lower(), - } - if model_name not in candidates: - continue - matched_unique_id = unique_id - matched_node = node - break - - if matched_unique_id is None or matched_node is None: - return {"error": f"Model not found: {model_name}"} - - return { - "model": str(matched_node.get("name") or ""), - "schema": str(matched_node.get("schema") or ""), - "database": str(matched_node.get("database") or ""), - "description": str(matched_node.get("description") or ""), - "columns": list(matched_node.get("columns") or [])[:50], - "upstream": list(matched_node.get("upstream") or []), - "downstream": list(matched_node.get("downstream") or []), - } - - def _tool_get_distinct_values( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Return distinct values and persist validated filter values for the session.""" - table_name = str(args.get("table") or "").lower() - column_name = str(args.get("column") or "") - limit = max(1, min(int(args.get("limit", 50)), 200)) - if not state["allowlist"].is_allowed(table_name): - return { - "error": "table_not_allowed", - "table": table_name, - "message": ( - f"Table {table_name} is not accessible in the current dashboard context." - ), - } - - schema_cache = self._schema_cache(state, execution_context) - snippet = schema_cache.get(table_name) - normalized_column_name = column_name.lower() - if snippet is not None and normalized_column_name not in { - str(column.get("name") or "").lower() for column in snippet.columns - }: - candidates = self._find_tables_with_column(normalized_column_name, schema_cache) - return { - "error": "column_not_in_table", - "table": table_name, - "column": column_name, - "candidates": candidates, - "message": ( - f"Column {column_name} is not available on {table_name}. " - "Use a table that contains it, inspect that schema, and retry the lookup." - ), - } - - values = self._warehouse_tools(execution_context, state["org"]).get_distinct_values( - table_name=table_name, - column_name=column_name, - limit=limit, - ) - self._record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=table_name, - column_name=column_name, - values=values, - ) - return { - "table": table_name, - "column": column_name, - "values": values, - "count": len(values), - } - - def _tool_list_tables_by_keyword( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Search allowlisted tables by table name or column name.""" - keyword = str(args.get("keyword") or "").strip().lower() - limit = max(1, min(int(args.get("limit", 15)), 50)) - if not keyword: - return {"tables": []} - - allowlist_tables_source = state["allowlist"].prioritized_tables() or sorted( - state["allowlist"].allowed_tables - ) - allowlisted_tables = list( - dict.fromkeys(table_name.lower() for table_name in allowlist_tables_source) - ) - direct_match_tables = [ - table_name - for table_name in allowlisted_tables - if keyword in table_name or keyword in table_name.rsplit(".", 1)[-1] - ] - - schema_cache: dict[str, Any] = {} - lookup_tables = direct_match_tables or allowlisted_tables - if lookup_tables: - try: - schema_cache = self._schema_cache( - state, - execution_context, - tables=lookup_tables, - ) - except Exception as error: - logger.warning("Dashboard chat keyword table lookup fell back to names only: %s", error) - execution_context["warnings"].append(str(error)) - - matches: list[dict[str, Any]] = [] - seen_tables: set[str] = set() - - for table_name in direct_match_tables: - column_names = [ - str(column.get("name") or "") - for column in getattr(schema_cache.get(table_name), "columns", []) - ] - matches.append({"table": table_name, "columns": column_names[:40]}) - seen_tables.add(table_name) - if len(matches) >= limit: - break - - for table_name, snippet in schema_cache.items(): - if table_name in seen_tables: - continue - column_names = [str(column.get("name") or "") for column in snippet.columns] - if not any(keyword in column_name.lower() for column_name in column_names): - continue - matches.append({"table": table_name, "columns": column_names[:40]}) - if len(matches) >= limit: - break - - if matches: - return { - "tables": matches, - "hint": ( - f"Found {len(matches)} allowlisted tables. Check schema before assuming table structure." - ), - } - return { - "tables": [], - "hint": ( - f"No allowlisted tables matched '{keyword}'. Try a broader keyword or retrieve chart docs first." - ), - } - - def _tool_check_table_row_count( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Count rows in one allowlisted table.""" - table_name = str(args.get("table") or "").lower() - if not state["allowlist"].is_allowed(table_name): - return { - "error": "table_not_allowed", - "table": table_name, - "message": ( - f"Table {table_name} is not accessible in the current dashboard context." - ), - } - - sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" - validation = DashboardChatSqlGuard( - allowlist=state["allowlist"], - max_rows=1, - ).validate(sql) - if not validation.is_valid or not validation.sanitized_sql: - return {"error": "sql_validation_failed", "issues": validation.errors} - - rows = self._warehouse_tools(execution_context, state["org"]).execute_sql( - validation.sanitized_sql - ) - row_count = 0 - if rows: - row_count = int(rows[0].get("row_count") or 0) - return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} - - def _run_sql_with_distinct_guard( - self, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any]: - """Validate SQL like the prototype and let the tool loop self-correct on failures.""" - sql = str(args.get("sql") or "").strip() - if not sql: - return {"error": "sql_missing", "message": "SQL is required"} - - allowlist_validation = self._validate_sql_allowlist(sql, state["allowlist"]) - if not allowlist_validation["valid"]: - return { - "error": "table_not_allowed", - "invalid_tables": allowlist_validation["invalid_tables"], - "message": allowlist_validation["message"], - } - - follow_up_dimension_validation = self._validate_follow_up_dimension_usage( - sql=sql, - state=state, - execution_context=execution_context, - ) - if follow_up_dimension_validation is not None: - return follow_up_dimension_validation - missing_distinct = self._missing_distinct(sql, state, execution_context) - if missing_distinct: - return { - "error": "must_fetch_distinct_values", - "missing": missing_distinct, - "message": ( - "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." - ), - } - - validation = DashboardChatSqlGuard( - allowlist=state["allowlist"], - max_rows=self.runtime_config.max_query_rows, - ).validate(sql) - execution_context["last_sql_validation"] = validation - if not validation.is_valid or not validation.sanitized_sql: - return { - "error": "sql_validation_failed", - "issues": validation.errors, - "warnings": validation.warnings, - } - - missing_columns = self._missing_columns_in_primary_table( - sql=validation.sanitized_sql, - state=state, - execution_context=execution_context, - ) - if missing_columns is not None: - return missing_columns - - execution_context["last_sql"] = validation.sanitized_sql - try: - rows = self._warehouse_tools(execution_context, state["org"]).execute_sql( - validation.sanitized_sql - ) - except Exception as error: - structured_error = self._structured_sql_execution_error( - sql=validation.sanitized_sql, - error=error, - state=state, - execution_context=execution_context, - ) - if structured_error is not None: - return structured_error - return { - "success": False, - "error": str(error), - "sql_used": validation.sanitized_sql, - } - - serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) - execution_context["last_sql_results"] = serialized_rows - self._record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=validation.sanitized_sql, - ) - return { - "success": True, - "row_count": len(serialized_rows), - "data_preview": self._preview_sql_rows(serialized_rows), - "error": None, - "sql_used": validation.sanitized_sql, - "columns": list(serialized_rows[0].keys()) if serialized_rows else [], - "rows": serialized_rows, - } - - def _missing_columns_in_primary_table( - self, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any] | None: - """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" - table_references = self._table_references(sql) - referenced_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") - ] - if not referenced_tables: - return None - - schema_cache = self._schema_cache(state, execution_context, tables=referenced_tables) - all_schema_cache = self._schema_cache(state, execution_context) - missing_columns_by_table: dict[str, set[str]] = {} - candidate_tables_by_column: dict[str, list[str]] = {} - tables_in_query = list(dict.fromkeys(referenced_tables)) - - for qualifier, column_name in self._referenced_sql_identifier_refs(sql): - resolved_table = self._resolve_identifier_table( - qualifier=qualifier, - column_name=column_name, - table_references=table_references, - schema_cache=schema_cache, - ) - if resolved_table is not None: - continue - - if qualifier is not None: - target_table = ( - self._resolve_table_qualifier(qualifier, table_references) - or self._primary_table_name(sql) - or tables_in_query[0] - ) - else: - matching_tables = self._tables_with_column( - column_name, - tables_in_query, - schema_cache, - ) - if len(matching_tables) > 1: - continue - target_table = self._primary_table_name(sql) or tables_in_query[0] - - missing_columns_by_table.setdefault(target_table, set()).add(column_name) - candidate_tables_by_column[column_name] = self._find_tables_with_column( - column_name, - all_schema_cache, - ) - - missing_columns = sorted( - { - column_name - for columns in missing_columns_by_table.values() - for column_name in columns - } - ) - if not missing_columns: - return None - - primary_table = self._primary_table_name(sql) or tables_in_query[0] - target_table = ( - next(iter(missing_columns_by_table)) - if len(missing_columns_by_table) == 1 - else primary_table - ) - best_table = self._best_table_for_missing_columns( - missing_columns, - all_schema_cache, - ) - message = ( - f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " - "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." - ) - if best_table: - message += f" Best candidate table: {best_table}." - result = { - "error": "column_not_in_table", - "table": target_table, - "missing_columns": missing_columns, - "candidate_tables": candidate_tables_by_column, - "best_table": best_table, - "message": message, - } - if len(missing_columns) == 1: - column_name = missing_columns[0] - result["column"] = column_name - result["candidates"] = candidate_tables_by_column.get(column_name, []) - return result - - def _structured_sql_execution_error( - self, - *, - sql: str, - error: Exception, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any] | None: - """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" - error_text = str(error) - missing_column_match = re.search( - r'column "(?:[\w]+\.)?([^"]+)" does not exist', - error_text, - flags=re.IGNORECASE, - ) - if missing_column_match: - missing_column = missing_column_match.group(1).lower() - schema_cache = self._schema_cache(state, execution_context) - candidate_tables = self._find_tables_with_column(missing_column, schema_cache) - return { - "error": "column_not_in_table", - "table": self._primary_table_name(sql), - "column": missing_column, - "missing_columns": [missing_column], - "candidates": candidate_tables, - "candidate_tables": {missing_column: candidate_tables}, - "best_table": candidate_tables[0] if candidate_tables else None, - "message": ( - f"Column {missing_column} is not available on the current table. " - "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." - ), - "sql_used": sql, - } - return None - - def _validate_follow_up_dimension_usage( - self, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> dict[str, Any] | None: - """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" - intent_decision = state["intent_decision"] - if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: - return None - if intent_decision.follow_up_context.follow_up_type != "add_dimension": - return None - - requested_dimension = self._extract_requested_follow_up_dimension( - intent_decision.follow_up_context.modification_instruction or state["user_query"] - ) - if not requested_dimension: - return None - - previous_sql = state["conversation_context"].last_sql_query or "" - current_dimensions = self._structural_dimensions_from_sql(sql) - previous_dimensions = self._structural_dimensions_from_sql(previous_sql) - normalized_requested_dimension = self._normalize_dimension_name(requested_dimension) - if ( - normalized_requested_dimension in current_dimensions - and normalized_requested_dimension not in previous_dimensions - ): - return None - - candidate_tables = self._find_tables_with_column( - requested_dimension, - self._schema_cache(state, execution_context), - ) - return { - "error": "requested_dimension_missing", - "requested_dimension": requested_dimension, - "previous_dimensions": sorted(previous_dimensions), - "current_dimensions": sorted(current_dimensions), - "candidate_tables": candidate_tables, - "message": ( - f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " - "Use the requested dimension exactly, or pick a table that contains it." - ), - } - - def _node_finalize_response( - self, - state: DashboardChatRuntimeState, - ) -> DashboardChatRuntimeState: - """Attach warehouse citations and metadata to the finished response.""" - response = state["response"] - citations = list(response.citations) - sql_validation = state.get("sql_validation") - if ( - sql_validation is not None - and sql_validation.is_valid - and sql_validation.sanitized_sql is not None - ): - citations.extend( - DashboardChatCitation( - source_type="warehouse_table", - source_identifier=table_name, - title=f"Warehouse table: {table_name}", - snippet=f"SQL executed against {table_name}.", - table_name=table_name, - ) - for table_name in sql_validation.tables - if table_name - ) - - allowlist = state.get("allowlist") or DashboardChatAllowlist() - response_metadata = dict(response.metadata) - response_metadata.update( - { - "dashboard_id": state["dashboard_id"], - "retrieved_document_ids": [ - document.document_id for document in state.get("retrieved_documents") or [] - ], - "allowlisted_tables": sorted(allowlist.allowed_tables), - "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], - "intent_reason": state["intent_decision"].reason, - "missing_info": state["intent_decision"].missing_info, - "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, - } - ) - state["response"] = DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=list(dict.fromkeys(citations)), - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata=response_metadata, - ) - return state - - def _build_execution_result( - self, - *, - answer_text: str, - execution_context: dict[str, Any], - max_turns_reached: bool, - ) -> dict[str, Any]: - """Normalize tool-loop state into one runtime response payload.""" - if max_turns_reached: - execution_context["tool_calls"].append({"name": "max_turns_reached"}) - warnings = list(dict.fromkeys(execution_context["warnings"])) - return { - "answer_text": answer_text.strip(), - "retrieved_documents": execution_context["retrieved_documents"], - "tool_calls": execution_context["tool_calls"], - "sql": execution_context["last_sql"], - "sql_validation": execution_context["last_sql_validation"], - "sql_results": execution_context["last_sql_results"], - "warnings": warnings, - } - - def _warehouse_tools( - self, - execution_context: dict[str, Any], - org: Org, - ) -> DashboardChatWarehouseTools: - """Build the warehouse tool helper lazily for the turn.""" - warehouse_tools = execution_context.get("warehouse_tools") - if warehouse_tools is None: - warehouse_tools = self.warehouse_tools_factory(org) - execution_context["warehouse_tools"] = warehouse_tools - return warehouse_tools - - def _schema_cache( - self, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - tables: Sequence[str] | None = None, - ) -> dict[str, Any]: - """Load and cache schema snippets for allowlisted tables.""" - requested_tables = [ - table_name.lower() - for table_name in ( - tables if tables is not None else state["allowlist"].prioritized_tables() - ) - if state["allowlist"].is_allowed(table_name) - ] - cache = execution_context["schema_cache"] - missing_tables = [table_name for table_name in requested_tables if table_name not in cache] - if missing_tables: - snippets = self._warehouse_tools(execution_context, state["org"]).get_schema_snippets( - missing_tables - ) - for table_name, snippet in snippets.items(): - cache[table_name.lower()] = snippet - if snippets: - self._persist_session_schema_cache(state, cache) - if tables is None: - return cache - return { - table_name: cache[table_name] - for table_name in requested_tables - if table_name in cache - } - - def _seed_distinct_cache_from_previous_sql( - self, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> None: - """Treat text filters from the previous successful SQL as already validated for follow-ups.""" - previous_sql = state["conversation_context"].last_sql_query - if not previous_sql: - return - - self._record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=previous_sql, - ) - - @staticmethod - def _dbt_resources_by_unique_id( - state: DashboardChatRuntimeState, - ) -> dict[str, dict[str, Any]]: - """Return the allowlisted dbt index built at session start.""" - dbt_index = state.get("dbt_index") or {} - return dict(dbt_index.get("resources_by_unique_id") or {}) - - def _embed_query( - self, - query_text: str, - embedding_cache: dict[str, list[float]], - ) -> list[float]: - """Cache embeddings per query string during one turn.""" - if query_text not in embedding_cache: - embedding_cache[query_text] = self.vector_store.embed_query(query_text) - return embedding_cache[query_text] - - @staticmethod - def _route_after_intent(state: DashboardChatRuntimeState) -> str: - """Route to one explicit handler per prototype intent.""" - return state["intent_decision"].intent.value - - @staticmethod - def _normalize_conversation_history( - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, - ) -> list[DashboardChatConversationMessage]: - """Normalize stored history into the typed runtime message format.""" - normalized_messages: list[DashboardChatConversationMessage] = [] - for item in conversation_history or []: - if isinstance(item, DashboardChatConversationMessage): - normalized_messages.append(item) - continue - normalized_messages.append( - DashboardChatConversationMessage( - role=str(item.get("role") or "user"), - content=str(item.get("content") or ""), - payload=item.get("payload") or {}, - ) - ) - return normalized_messages - - @classmethod - def _extract_conversation_context( - cls, - conversation_history: Sequence[DashboardChatConversationMessage], - ) -> DashboardChatConversationContext: - """Extract reusable conversation context like the prototype conversation manager.""" - context = DashboardChatConversationContext() - recent_history = list(conversation_history)[-10:] - - for message in reversed(recent_history): - if message.role != "assistant": - continue - - payload = message.payload or {} - sql = payload.get("sql") - metadata = payload.get("metadata") or {} - citations = payload.get("citations") or [] - chart_ids = cls._extract_chart_ids_from_payload(payload) - - if chart_ids and context.last_sql_query and not context.last_chart_ids: - context = DashboardChatConversationContext( - last_sql_query=context.last_sql_query, - last_tables_used=context.last_tables_used, - last_chart_ids=chart_ids, - last_metrics=context.last_metrics, - last_dimensions=context.last_dimensions, - last_filters=context.last_filters, - last_response_type=context.last_response_type, - last_answer_text=context.last_answer_text, - last_intent=context.last_intent, - ) - break - - if sql and not context.last_sql_query: - tables = [ - str(table_name).lower() - for table_name in metadata.get("query_plan_tables") or [] - if table_name - ] - if not tables: - tables = [ - str(citation.get("table_name")).lower() - for citation in citations - if citation.get("table_name") - ] - if not tables: - tables = DashboardChatSqlGuard._extract_table_names(str(sql)) - context = DashboardChatConversationContext( - last_sql_query=str(sql), - last_tables_used=list(dict.fromkeys(tables)), - last_chart_ids=chart_ids, - last_metrics=cls._extract_metrics_from_sql(str(sql)), - last_dimensions=cls._extract_dimensions_from_sql(str(sql)), - last_filters=cls._extract_filters_from_sql(str(sql)), - last_response_type="sql_result", - last_answer_text=message.content, - last_intent=str(payload.get("intent") or ""), - ) - if chart_ids: - break - continue - - if payload and context.last_response_type is None: - context = DashboardChatConversationContext( - last_chart_ids=chart_ids, - last_response_type="metadata_answer", - last_answer_text=message.content, - last_intent=str(payload.get("intent") or ""), - ) - - return context - - @staticmethod - def _extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: - """Extract chart ids from persisted metadata/citations like the prototype chat history.""" - metadata = payload.get("metadata") or {} - chart_ids = [str(chart_id) for chart_id in metadata.get("chart_ids_used") or [] if chart_id] - if chart_ids: - return list(dict.fromkeys(chart_ids)) - - extracted_chart_ids: list[str] = [] - for citation in payload.get("citations") or []: - source_identifier = str(citation.get("source_identifier") or "") - chart_id = DashboardChatRuntime._chart_id_from_source_identifier(source_identifier) - if chart_id is not None: - extracted_chart_ids.append(str(chart_id)) - return list(dict.fromkeys(extracted_chart_ids)) - - @classmethod - def _build_follow_up_context_prompt( - cls, - conversation_context: DashboardChatConversationContext, - user_query: str, - ) -> str: - """Build the prototype follow-up context prompt.""" - return "\n".join( - [ - "PREVIOUS QUERY CONTEXT:", - f"Last SQL: {conversation_context.last_sql_query or 'None'}", - f"Tables used: {', '.join(conversation_context.last_tables_used) or 'None'}", - f"Metrics: {', '.join(conversation_context.last_metrics) or 'None'}", - f"Dimensions: {', '.join(conversation_context.last_dimensions) or 'None'}", - f"Filters: {', '.join(conversation_context.last_filters) or 'None'}", - "", - f"NEW INSTRUCTION: {user_query}", - "", - "TASK: Modify the previous query based on the new instruction. Reuse tables and context where possible.", - ] - ) - - @staticmethod - def _detect_sql_modification_type(user_query: str) -> str: - """Detect the same coarse follow-up modification categories as the prototype.""" - lowered_query = user_query.lower() - if any(keyword in lowered_query for keyword in ["by", "split by", "break down", "group by"]): - return "add_dimension" - if any(keyword in lowered_query for keyword in ["filter", "only", "exclude", "where"]): - return "add_filter" - if any( - keyword in lowered_query - for keyword in ["last", "this", "previous", "next", "monthly", "weekly", "quarterly"] - ): - return "modify_timeframe" - if any( - keyword in lowered_query - for keyword in ["total", "sum", "count", "average", "avg", "maximum", "minimum"] - ): - return "change_aggregation" - return "general_modification" - - @staticmethod - def _extract_requested_follow_up_dimension(text: str) -> str | None: - """Extract the requested follow-up dimension and normalize natural-language spaces.""" - normalized_text = text.strip().lower() - patterns = [ - r"split\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", - r"break\s+down\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", - r"group\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", - r"\bby\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", - ] - for pattern in patterns: - match = re.search(pattern, normalized_text) - if not match: - continue - candidate = re.split( - r"\b(with|for|in|across|between)\b", - match.group(1), - maxsplit=1, - )[0] - candidate = re.sub(r"[^a-zA-Z0-9_\s]", " ", candidate) - normalized_candidate = "_".join(part for part in candidate.split() if part) - if normalized_candidate: - return normalized_candidate - return None - - @staticmethod - def _extract_metrics_from_sql(sql: str) -> list[str]: - """Extract aggregate expressions from the previous SQL for follow-up prompts.""" - select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) - if not select_clause: - return [] - metrics: list[str] = [] - for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): - normalized_expression = expression.strip() - if normalized_expression and DashboardChatSqlGuard._contains_aggregate( - normalized_expression - ): - metrics.append(normalized_expression) - return metrics[:5] - - @staticmethod - def _extract_dimensions_from_sql(sql: str) -> list[str]: - """Extract GROUP BY dimensions from the previous SQL.""" - match = re.search( - r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not match: - return [] - return [ - dimension.strip().strip('`"') - for dimension in match.group(1).split(",") - if dimension.strip() - ][:5] - - @staticmethod - def _extract_filters_from_sql(sql: str) -> list[str]: - """Extract WHERE-clause filters from the previous SQL.""" - match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not match: - return [] - - where_clause = match.group(1).strip() - filters: list[str] = [] - for pattern in [ - r"([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", - r"([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\([^)]+\)", - ]: - for filter_match in re.findall(pattern, where_clause, flags=re.IGNORECASE): - if isinstance(filter_match, tuple) and len(filter_match) == 2: - filters.append(f"{filter_match[0]} = {filter_match[1]}") - else: - filters.append(str(filter_match)) - return filters[:5] - - def _query_vector_store( - self, - *, - org: Org, - collection_name: str | None, - query_text: str, - source_types: Sequence[str], - dashboard_id: int | None = None, - query_embedding: list[float] | None = None, - ) -> list[DashboardChatRetrievedDocument]: - """Query Chroma and normalize the results.""" - if not source_types: - return [] - - results = self.vector_store.query( - org.id, - query_text=query_text, - n_results=self.runtime_config.retrieval_limit, - source_types=list(source_types), - dashboard_id=dashboard_id, - query_embedding=query_embedding, - collection_name=collection_name, - ) - return [ - DashboardChatRetrievedDocument( - document_id=result.document_id, - source_type=str(result.metadata.get("source_type") or ""), - source_identifier=str(result.metadata.get("source_identifier") or ""), - content=result.content, - dashboard_id=result.metadata.get("dashboard_id"), - distance=result.distance, - ) - for result in results - ] - - @staticmethod - def _filter_allowlisted_dbt_results( - results: Sequence[DashboardChatRetrievedDocument], - allowlist: DashboardChatAllowlist, - ) -> list[DashboardChatRetrievedDocument]: - """Keep only dbt docs that belong to the dashboard lineage.""" - filtered_results: list[DashboardChatRetrievedDocument] = [] - for result in results: - unique_id = DashboardChatRuntime._unique_id_from_source_identifier( - result.source_identifier - ) - if allowlist.is_unique_id_allowed(unique_id): - filtered_results.append(result) - return filtered_results - - @staticmethod - def _dedupe_retrieved_documents( - results: Sequence[DashboardChatRetrievedDocument], - ) -> list[DashboardChatRetrievedDocument]: - """Deduplicate retrieved documents while preserving better-ranked items.""" - scored_results: list[tuple[float, DashboardChatRetrievedDocument]] = [] - for result in results: - scored_results.append((result.distance if result.distance is not None else 999.0, result)) - - merged_results: list[DashboardChatRetrievedDocument] = [] - seen_document_ids: set[str] = set() - for _, result in sorted(scored_results, key=lambda item: item[0]): - if result.document_id in seen_document_ids: - continue - merged_results.append(result) - seen_document_ids.add(result.document_id) - return merged_results - - def _build_citations( - self, - *, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - dashboard_export: dict[str, Any], - allowlist: DashboardChatAllowlist, - ) -> list[DashboardChatCitation]: - """Build citations from the retrieved tool-loop documents.""" - dashboard_title = dashboard_export["dashboard"].get("title") or "Current dashboard" - chart_lookup = { - chart.get("id"): chart.get("title") or f"Chart {chart.get('id')}" - for chart in dashboard_export.get("charts") or [] - } - citations: list[DashboardChatCitation] = [] - for document in retrieved_documents[:6]: - chart_id = self._chart_id_from_source_identifier(document.source_identifier) - table_name = None - if document.source_type in { - DashboardChatSourceType.DBT_MANIFEST.value, - DashboardChatSourceType.DBT_CATALOG.value, - }: - unique_id = self._unique_id_from_source_identifier(document.source_identifier) - table_name = allowlist.unique_id_to_table.get(unique_id) if unique_id else None - citations.append( - DashboardChatCitation( - source_type=document.source_type, - source_identifier=document.source_identifier, - title=self._citation_title( - document=document, - dashboard_title=dashboard_title, - chart_lookup=chart_lookup, - table_name=table_name, - ), - snippet=self._compact_snippet(document.content), - dashboard_id=document.dashboard_id, - table_name=table_name, - ) - ) - return citations - - @staticmethod - def _citation_title( - *, - document: DashboardChatRetrievedDocument, - dashboard_title: str, - chart_lookup: dict[int, str], - table_name: str | None, - ) -> str: - """Map a retrieved document into a human-readable citation title.""" - if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: - return "Organization context" - if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: - return f"Dashboard context: {dashboard_title}" - if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: - chart_id = DashboardChatRuntime._chart_id_from_source_identifier( - document.source_identifier - ) - if chart_id is not None and chart_id in chart_lookup: - return f"Chart: {chart_lookup[chart_id]}" - return f"Dashboard export: {dashboard_title}" - if document.source_type == DashboardChatSourceType.DBT_MANIFEST.value: - return f"dbt manifest: {table_name or document.source_identifier}" - if document.source_type == DashboardChatSourceType.DBT_CATALOG.value: - return f"dbt catalog: {table_name or document.source_identifier}" - return document.source_identifier - - def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: - """Return the current session's frozen dashboard context snapshot.""" - session_id = state.get("session_id") - if not session_id: - return self._build_session_snapshot(state) - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is not None: - cached_dbt_index = cached_snapshot.get("dbt_index") - if cached_dbt_index is None and cached_snapshot.get("manifest_json") is not None: - cached_dbt_index = DashboardChatAllowlistBuilder.build_dbt_index( - cached_snapshot.get("manifest_json"), - deserialize_allowlist(cached_snapshot.get("allowlist")), - ) - return { - "dashboard_export": dict(cached_snapshot["dashboard_export"]), - "dbt_index": cached_dbt_index or {"resources_by_unique_id": {}}, - "allowlist": deserialize_allowlist(cached_snapshot.get("allowlist")), - "schema_cache": deserialize_schema_snippets(cached_snapshot.get("schema_cache")), - "distinct_cache": deserialize_distinct_cache( - cached_snapshot.get("distinct_cache") - ), - } - - snapshot = self._build_session_snapshot(state) - cache.set( - cache_key, - { - "dashboard_export": snapshot["dashboard_export"], - "dbt_index": snapshot["dbt_index"], - "allowlist": serialize_allowlist(snapshot["allowlist"]), - "schema_cache": serialize_schema_snippets(snapshot["schema_cache"]), - "distinct_cache": serialize_distinct_cache(snapshot["distinct_cache"]), - }, - DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, - ) - return snapshot - - def _build_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: - """Build one session-stable snapshot of dashboard-specific runtime context.""" - dashboard_export = DashboardService.export_dashboard_context( - state["dashboard_id"], - state["org"], - ) - manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) - allowlist = DashboardChatAllowlistBuilder.build( - dashboard_export, - manifest_json=manifest_json, - ) - return { - "dashboard_export": dashboard_export, - "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index( - manifest_json, - allowlist, - ), - "allowlist": allowlist, - "schema_cache": {}, - "distinct_cache": set(), - } - - def _persist_session_schema_cache( - self, - state: DashboardChatRuntimeState, - schema_cache: dict[str, DashboardChatSchemaSnippet], - ) -> None: - """Persist lazily loaded schema snippets back into the session snapshot cache.""" - session_id = state.get("session_id") - if not session_id: - state["session_schema_cache"] = dict(schema_cache) - return - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is None: - return - cached_snapshot["schema_cache"] = serialize_schema_snippets(schema_cache) - cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) - state["session_schema_cache"] = dict(schema_cache) - - def _persist_session_distinct_cache( - self, - state: DashboardChatRuntimeState, - distinct_cache: set[tuple[str, str, str]], - ) -> None: - """Persist validated distinct values back into the session snapshot cache.""" - session_id = state.get("session_id") - if not session_id: - state["session_distinct_cache"] = set(distinct_cache) - return - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is None: - return - cached_snapshot["distinct_cache"] = serialize_distinct_cache(distinct_cache) - cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) - state["session_distinct_cache"] = set(distinct_cache) - - @staticmethod - def _compact_snippet(content: str, max_length: int = 220) -> str: - """Collapse whitespace and trim long snippets for citations and suggestions.""" - normalized = " ".join(content.split()) - if len(normalized) <= max_length: - return normalized - return normalized[: max_length - 3].rstrip() + "..." - - def _tool_document_payload( - self, - document: DashboardChatRetrievedDocument, - allowlist: DashboardChatAllowlist, - dashboard_export: dict[str, Any], - ) -> dict[str, Any]: - """Convert a runtime retrieval result into the prototype tool payload shape.""" - metadata: dict[str, Any] = { - "type": self._prototype_doc_type(document.source_type), - "source_type": document.source_type, - "source_identifier": document.source_identifier, - } - chart_id = self._chart_id_from_source_identifier(document.source_identifier) - if chart_id is not None: - metadata["chart_id"] = chart_id - metadata["dashboard_id"] = document.dashboard_id - chart_metadata = self._chart_tool_metadata(chart_id, dashboard_export) - if chart_metadata: - metadata.update(chart_metadata) - unique_id = self._unique_id_from_source_identifier(document.source_identifier) - if unique_id: - metadata["dbt_unique_id"] = unique_id - metadata["table_name"] = allowlist.unique_id_to_table.get(unique_id) - return { - "doc_id": document.document_id, - "content": document.content, - "metadata": metadata, - "similarity_score": document.distance, - } - - @classmethod - def _chart_tool_metadata( - cls, - chart_id: int, - dashboard_export: dict[str, Any], - ) -> dict[str, Any]: - """Return structured chart metadata that nudges the tool loop toward exact chart fields.""" - chart = next( - ( - candidate - for candidate in (dashboard_export.get("charts") or []) - if candidate.get("id") == chart_id - ), - None, - ) - if chart is None: - return {} - - preferred_table = build_dashboard_chat_table_name( - chart.get("schema_name"), - chart.get("table_name"), - ) - metric_columns = cls._chart_metric_columns(chart) - dimension_columns = cls._chart_dimension_columns(chart) - time_column = cls._chart_time_column(chart, dimension_columns) - payload: dict[str, Any] = { - "chart_title": str(chart.get("title") or ""), - "chart_type": str(chart.get("chart_type") or ""), - } - if preferred_table: - payload["preferred_table"] = preferred_table - if metric_columns: - payload["metric_columns"] = metric_columns - if dimension_columns: - payload["dimension_columns"] = dimension_columns - if time_column: - payload["time_column"] = time_column - return payload - - @staticmethod - def _prototype_doc_type(source_type: str) -> str: - """Map Dalgo source types into the prototype doc-type vocabulary.""" - if source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: - return "chart" - if source_type in { - DashboardChatSourceType.DBT_MANIFEST.value, - DashboardChatSourceType.DBT_CATALOG.value, - }: - return "dbt_model" - return "context" - - def _validate_sql_allowlist( - self, - sql: str, - allowlist: DashboardChatAllowlist, - ) -> dict[str, Any]: - """Validate that all referenced tables are in the dashboard allowlist.""" - referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) - invalid_tables = [ - table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) - ] - if invalid_tables: - return { - "valid": False, - "invalid_tables": invalid_tables, - "message": ( - "SQL references tables not available in the current dashboard: " - + ", ".join(invalid_tables) - + ". Use list_tables_by_keyword to find allowed tables." - ), - } - return {"valid": True, "invalid_tables": [], "message": ""} - - @staticmethod - def _primary_table_name(sql: str) -> str | None: - """Return the primary FROM table for single-query correction logic.""" - table_match = re.search(r"\bFROM\s+([`\"]?)([\w\.]+)\1", sql, re.IGNORECASE) - if not table_match: - return None - return normalize_dashboard_chat_table_name(table_match.group(2)) - - @classmethod - def _table_references(cls, sql: str) -> list[dict[str, str | None]]: - """Return normalized FROM/JOIN table references and aliases from one SQL statement.""" - references: list[dict[str, str | None]] = [] - for match in re.finditer( - r"\b(?:FROM|JOIN)\s+([`\"]?)([\w\.]+)\1(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", - sql, - flags=re.IGNORECASE, - ): - table_name = normalize_dashboard_chat_table_name(match.group(2)) - if not table_name: - continue - alias = str(match.group(3) or "").lower() or None - references.append( - { - "table_name": table_name, - "alias": alias, - "short_name": table_name.split(".")[-1], - } - ) - return references - - @classmethod - def _resolve_table_qualifier( - cls, - qualifier: str, - table_references: Sequence[dict[str, str | None]], - ) -> str | None: - """Resolve a qualifier like `f` or `analytics_table` to one query table.""" - normalized_qualifier = qualifier.lower().strip().strip('`"') - matches = [ - str(reference["table_name"]) - for reference in table_references - if normalized_qualifier - in { - str(reference.get("alias") or ""), - str(reference.get("short_name") or ""), - str(reference.get("table_name") or ""), - } - ] - deduped_matches = list(dict.fromkeys(match for match in matches if match)) - if len(deduped_matches) == 1: - return deduped_matches[0] - return None - - @staticmethod - def _table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: - """Return the normalized column names available on one schema snippet.""" - return { - str(column.get("name") or "").lower() - for column in getattr(snippet, "columns", []) or [] - } - - @classmethod - def _tables_with_column( - cls, - column_name: str, - table_names: Sequence[str], - schema_cache: dict[str, Any], - ) -> list[str]: - """Return the query tables that contain one column.""" - normalized_column_name = column_name.lower() - return [ - table_name - for table_name in table_names - if normalized_column_name in cls._table_columns(schema_cache.get(table_name)) - ] - - @classmethod - def _resolve_identifier_table( - cls, - *, - qualifier: str | None, - column_name: str, - table_references: Sequence[dict[str, str | None]], - schema_cache: dict[str, Any], - ) -> str | None: - """Resolve one referenced column to a concrete query table when it is unambiguous.""" - if qualifier is not None: - resolved_table = cls._resolve_table_qualifier(qualifier, table_references) - if not resolved_table: - return None - if column_name.lower() in cls._table_columns(schema_cache.get(resolved_table)): - return resolved_table - return None - - query_tables = [str(reference["table_name"]) for reference in table_references if reference.get("table_name")] - matching_tables = cls._tables_with_column(column_name, query_tables, schema_cache) - if len(matching_tables) == 1: - return matching_tables[0] - return None - - @classmethod - def _referenced_sql_identifier_refs(cls, sql: str) -> list[tuple[str | None, str]]: - """Extract likely physical identifier references from the outer SQL.""" - table_aliases = { - alias.lower() - for alias in re.findall( - r"\b(?:FROM|JOIN)\s+[`\"]?[\w\.]+[`\"]?(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", - sql, - flags=re.IGNORECASE, - ) - if alias - } - select_aliases = cls._select_aliases(sql) - referenced_identifiers: list[tuple[str | None, str]] = [] - - select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) - if select_clause: - for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): - referenced_identifiers.extend( - cls._extract_identifier_refs_from_sql_segment(expression, table_aliases) - ) - - for pattern in [ - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", - r"\bORDER\s+BY\s+(.+?)(?:\bLIMIT\b|$)", - ]: - match = re.search(pattern, sql, flags=re.IGNORECASE | re.DOTALL) - if match: - referenced_identifiers.extend( - cls._extract_identifier_refs_from_sql_segment( - match.group(1), - table_aliases, - ignored_identifiers=select_aliases, - ) - ) - - return list(dict.fromkeys(referenced_identifiers)) - - @staticmethod - def _select_aliases(sql: str) -> set[str]: - """Return aliases introduced by the outer SELECT clause.""" - select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) - if not select_clause: - return set() - - aliases: set[str] = set() - for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): - alias_match = re.search( - r"\bAS\s+([A-Za-z_][A-Za-z0-9_]*)\s*$", - expression, - flags=re.IGNORECASE, - ) - if alias_match: - aliases.add(alias_match.group(1).lower()) - return aliases - - @staticmethod - def _extract_identifier_refs_from_sql_segment( - segment: str, - table_aliases: set[str], - ignored_identifiers: set[str] | None = None, - ) -> list[tuple[str | None, str]]: - """Pull qualified and unqualified column-like identifiers out of one SQL segment.""" - normalized_segment = re.sub(r"'[^']*'", " ", segment) - normalized_segment = re.sub( - r"\bAS\s+[A-Za-z_][A-Za-z0-9_]*", - " ", - normalized_segment, - flags=re.IGNORECASE, - ) - ignored_tokens = { - "SELECT", - "FROM", - "WHERE", - "GROUP", - "BY", - "ORDER", - "LIMIT", - "COUNT", - "SUM", - "AVG", - "MIN", - "MAX", - "DISTINCT", - "AND", - "OR", - "AS", - "IN", - "CASE", - "WHEN", - "THEN", - "ELSE", - "END", - "TRUE", - "FALSE", - "NULL", - "NOT", - "ASC", - "DESC", - "ON", - "JOIN", - } - ignored_identifiers = {identifier.lower() for identifier in (ignored_identifiers or set())} - identifiers: list[tuple[str | None, str]] = [] - for match in re.finditer( - r"(?:(?P[A-Za-z_][A-Za-z0-9_]*)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)", - normalized_segment, - ): - qualifier = match.group("qualifier") - identifier = match.group("identifier") - if not identifier: - continue - if identifier.upper() in ignored_tokens: - continue - if identifier.lower() in table_aliases or identifier.lower() in ignored_identifiers: - continue - trailing_segment = normalized_segment[match.end() :].lstrip() - if qualifier is None and trailing_segment.startswith("("): - continue - identifiers.append((qualifier.lower() if qualifier else None, identifier.lower())) - return identifiers - - @staticmethod - def _best_table_for_missing_columns( - missing_columns: Sequence[str], - schema_cache: dict[str, Any], - ) -> str | None: - """Return the first allowlisted table that covers all missing columns.""" - wanted_columns = {column_name.lower() for column_name in missing_columns} - for table_name, snippet in schema_cache.items(): - available_columns = { - str(column.get("name") or "").lower() for column in snippet.columns - } - if wanted_columns.issubset(available_columns): - return table_name - return None - - def _missing_distinct( - self, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - ) -> list[dict[str, Any]]: - """Detect text filters that require a prior distinct-values call.""" - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return [] - - table_references = self._table_references(sql) - query_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") - ] - if not query_tables: - return [] - primary_table = self._primary_table_name(sql) or query_tables[0] - - full_schema_cache = self._schema_cache(state, execution_context, tables=query_tables) - all_schema_cache = self._schema_cache(state, execution_context) - - column_types = { - table_name: { - str(column.get("name") or "").lower(): str( - column.get("data_type") or column.get("type") or "" - ).lower() - for column in getattr(snippet, "columns", []) - } - for table_name, snippet in full_schema_cache.items() - } - missing: list[dict[str, Any]] = [] - for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = self._resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_references=table_references, - schema_cache=full_schema_cache, - ) - if resolved_table is None and qualifier is None: - matching_tables = self._tables_with_column( - normalized_column, - query_tables, - full_schema_cache, - ) - if len(matching_tables) > 1: - continue - if resolved_table is None: - candidate_tables = self._find_tables_with_column( - normalized_column, - all_schema_cache, - ) - if qualifier is None and candidate_tables: - continue - missing.append( - { - "table": primary_table, - "column": column_name, - "error": "column_not_in_table", - "candidates": candidate_tables, - } - ) - continue - data_type = column_types.get(resolved_table, {}).get(normalized_column, "") - if not data_type: - continue - if not self._is_text_type(data_type): - continue - if ( - not self._has_validated_distinct_value( - execution_context["distinct_cache"], - table_name=resolved_table, - column_name=normalized_column, - value=value, - ) - ): - missing.append( - {"table": resolved_table, "column": column_name, "value": value} - ) - return missing - - @staticmethod - def _extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, str]]: - """Extract quoted text filter values from one WHERE clause.""" - extracted_values: list[tuple[str | None, str, str]] = [] - for qualifier, column_name, value in re.findall( - r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*'([^']+)'", - where_clause, - flags=re.IGNORECASE, - ): - extracted_values.append((qualifier.lower() if qualifier else None, column_name, value)) - - for match in re.finditer( - r"(?:([a-zA-Z_][a-zA-Z0-9_]*)\.)?([a-zA-Z_][a-zA-Z0-9_]*)\s+IN\s*\(([^)]*)\)", - where_clause, - flags=re.IGNORECASE, - ): - qualifier = match.group(1) - column_name = match.group(2) - for value in re.findall(r"'([^']+)'", match.group(3)): - extracted_values.append( - (qualifier.lower() if qualifier else None, column_name, value) - ) - return extracted_values - - @staticmethod - def _normalize_distinct_value(value: Any) -> str: - """Normalize one distinct value for exact cache lookups.""" - return str(value).strip().lower() - - @classmethod - def _has_validated_distinct_value( - cls, - distinct_cache: set[tuple[Any, ...]], - *, - table_name: str, - column_name: str, - value: Any, - ) -> bool: - """Return whether this exact text filter value was already validated in-session.""" - normalized_value = cls._normalize_distinct_value(value) - normalized_column = column_name.lower() - normalized_table = table_name.lower() - return ( - (normalized_table, normalized_column, normalized_value) in distinct_cache - or ("*", normalized_column, normalized_value) in distinct_cache - or (normalized_table, normalized_column) in distinct_cache - or ("*", normalized_column) in distinct_cache - ) - - @staticmethod - def _find_tables_with_column( - column_name: str, - schema_cache: dict[str, Any], - limit: int = 10, - ) -> list[str]: - """Find allowlisted tables that contain one column.""" - matches: list[str] = [] - normalized_column_name = column_name.lower() - for table_name, snippet in schema_cache.items(): - if any( - normalized_column_name == str(column.get("name") or "").lower() - for column in snippet.columns - ): - matches.append(table_name) - if len(matches) >= limit: - break - return matches - - @staticmethod - def _is_text_type(data_type: str) -> bool: - """Treat common string-like warehouse types as requiring distinct-value lookup.""" - return any( - text_token in data_type - for text_token in ["char", "text", "string", "varchar"] - ) - - @staticmethod - def _preview_sql_rows(rows: list[dict[str, Any]], max_rows: int = 5) -> str: - """Render a compact human-readable preview for successful SQL executions.""" - if not rows: - return "No matching rows found." - preview_rows = rows[:max_rows] - preview_lines = [json.dumps(row, cls=DjangoJSONEncoder) for row in preview_rows] - if len(rows) > max_rows: - preview_lines.append(f"... {len(rows) - max_rows} more rows") - return "\n".join(preview_lines) - - def _record_validated_distinct_values( - self, - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - table_name: str, - column_name: str, - values: Sequence[Any], - ) -> None: - """Persist exact validated filter values for the current session.""" - normalized_table = table_name.lower() - normalized_column = column_name.lower() - distinct_cache = execution_context["distinct_cache"] - for value in values: - normalized_value = self._normalize_distinct_value(value) - distinct_cache.add((normalized_table, normalized_column, normalized_value)) - # Follow-ups often move to an upstream table with the same validated dimension. - distinct_cache.add(("*", normalized_column, normalized_value)) - self._persist_session_distinct_cache(state, distinct_cache) - - def _record_validated_filters_from_sql( - self, - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - sql: str, - ) -> None: - """Seed exact validated filter values from a successful SQL statement.""" - table_references = self._table_references(sql) - if not table_references: - return - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return - - query_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") - ] - schema_cache = dict(execution_context.get("schema_cache") or {}) - values_by_target: dict[tuple[str, str], list[str]] = {} - for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = self._resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_references=table_references, - schema_cache=schema_cache, - ) - if resolved_table is None and qualifier is None: - if schema_cache: - matching_tables = self._tables_with_column( - normalized_column, - query_tables, - schema_cache, - ) - if len(matching_tables) == 1: - resolved_table = matching_tables[0] - elif len(query_tables) == 1: - resolved_table = query_tables[0] - values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) - - if not values_by_target: - return - - for (table_name, column_name), values in values_by_target.items(): - self._record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=table_name, - column_name=column_name, - values=values, - ) - - @classmethod - def _structural_dimensions_from_sql(cls, sql: str) -> set[str]: - """Return normalized non-aggregate dimensions used by one SQL statement.""" - if not sql: - return set() - - dimensions: set[str] = set() - for dimension in cls._extract_dimensions_from_sql(sql): - identifier_refs = cls._extract_identifier_refs_from_sql_segment( - dimension, - table_aliases=set(), - ) - if identifier_refs: - dimensions.update( - cls._normalize_dimension_name(column_name) - for _, column_name in identifier_refs - ) - continue - dimensions.add(cls._normalize_dimension_name(dimension)) - select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) - if not select_clause: - return {dimension for dimension in dimensions if dimension} - - for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): - normalized_expression = expression.strip() - if not normalized_expression or DashboardChatSqlGuard._contains_aggregate( - normalized_expression - ): - continue - for _, column_name in cls._extract_identifier_refs_from_sql_segment( - normalized_expression, - table_aliases=set(), - ignored_identifiers=cls._select_aliases(sql), - ): - dimensions.add(cls._normalize_dimension_name(column_name)) - return {dimension for dimension in dimensions if dimension} - - @staticmethod - def _normalize_dimension_name(value: str) -> str: - """Normalize dimension names from SQL expressions and natural-language follow-ups.""" - normalized_value = value.strip().strip('`"').lower() - normalized_value = normalized_value.split(".")[-1] - normalized_value = re.sub(r"[^a-z0-9_]+", "_", normalized_value) - normalized_value = re.sub(r"_+", "_", normalized_value).strip("_") - return normalized_value - - @classmethod - def _chart_metric_columns(cls, chart: dict[str, Any]) -> list[str]: - """Extract the most likely metric columns from one chart export payload.""" - extra_config = chart.get("extra_config") or {} - metrics: list[str] = [] - for metric in extra_config.get("metrics") or []: - if isinstance(metric, str) and metric.strip(): - metrics.append(metric.strip()) - continue - if isinstance(metric, dict): - for key in ["column", "name", "field", "metric", "metric_column"]: - value = metric.get(key) - if isinstance(value, str) and value.strip(): - metrics.append(value.strip()) - break - for key in [ - "metric_col", - "metric_column", - "measure_col", - "measure_column", - "value_column", - "y_axis_column", - ]: - value = extra_config.get(key) - if isinstance(value, str) and value.strip(): - metrics.append(value.strip()) - return list(dict.fromkeys(metrics)) - - @classmethod - def _chart_dimension_columns(cls, chart: dict[str, Any]) -> list[str]: - """Extract dimension-like fields from one chart export payload.""" - extra_config = chart.get("extra_config") or {} - dimensions: list[str] = [] - for key in ["dimension_col", "extra_dimension", "group_by", "category_column", "x_axis_column"]: - value = extra_config.get(key) - if isinstance(value, str) and value.strip(): - dimensions.append(value.strip()) - for value in extra_config.get("dimensions") or []: - if isinstance(value, str) and value.strip(): - dimensions.append(value.strip()) - return list(dict.fromkeys(dimensions)) - - @classmethod - def _chart_time_column( - cls, - chart: dict[str, Any], - dimension_columns: Sequence[str], - ) -> str | None: - """Extract or infer the chart's time dimension when one is present.""" - extra_config = chart.get("extra_config") or {} - for key in ["time_column", "time_dimension", "date_column"]: - value = extra_config.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - for dimension in dimension_columns: - if cls._looks_like_time_dimension(dimension): - return dimension - return None - - @staticmethod - def _looks_like_time_dimension(column_name: str) -> bool: - """Return whether a dimension name probably represents time bucketing.""" - normalized_column = column_name.lower() - return any( - token in normalized_column - for token in ["date", "day", "week", "month", "quarter", "year", "time"] - ) - - @staticmethod - def _serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: - """Trim large tool payloads before feeding them back into the model.""" - serialized = dict(result) - docs = serialized.get("docs") - if isinstance(docs, list) and len(docs) > 6: - serialized["docs"] = docs[:6] - rows = serialized.get("rows") - if isinstance(rows, list) and len(rows) > 5: - serialized["rows"] = rows[:5] - values = serialized.get("values") - if isinstance(values, list) and len(values) > 20: - serialized["values"] = values[:20] - return serialized - - def _summarize_tool_call( - self, - *, - tool_name: str, - args: dict[str, Any], - result: dict[str, Any], - ) -> dict[str, Any]: - """Persist a compact execution trace for one tool call.""" - entry: dict[str, Any] = {"name": tool_name, "args": args} - if tool_name == "retrieve_docs": - entry["count"] = result.get("count", 0) - entry["doc_ids"] = [doc.get("doc_id") for doc in result.get("docs", [])[:6]] - elif tool_name == "get_schema_snippets": - entry["tables"] = [table.get("table") for table in result.get("tables", [])] - elif tool_name == "search_dbt_models": - entry["count"] = result.get("count", 0) - entry["models"] = [model.get("table") or model.get("name") for model in result.get("models", [])] - elif tool_name == "get_dbt_model_info": - entry["model"] = result.get("model") - entry["column_count"] = len(result.get("columns") or []) - elif tool_name == "get_distinct_values": - entry["error"] = result.get("error") - entry["count"] = result.get("count", 0) - entry["values_sample"] = (result.get("values") or [])[:10] - elif tool_name == "list_tables_by_keyword": - entry["tables"] = [table.get("table") for table in result.get("tables", [])] - elif tool_name == "check_table_row_count": - entry["row_count"] = result.get("row_count") - elif tool_name == "run_sql_query": - entry["success"] = result.get("success", False) - entry["row_count"] = result.get("row_count", 0) - entry["sql_used"] = result.get("sql_used") - entry["error"] = result.get("error") - else: - entry["result"] = result - return entry - - def _max_turns_message( - self, - user_query: str, - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - ) -> str: - """Return a bounded fallback when the prototype tool loop exhausts its budget.""" - if retrieved_documents: - return ( - "I found relevant dashboard context, but I couldn't complete the analysis safely. " - "Please rephrase the question or ask about a metric shown on this dashboard." - ) - return ( - f"I couldn't find enough dashboard-backed context to answer: {user_query}. " - "Please rephrase or ask about a metric shown on this dashboard." - ) - - def _compose_final_answer_text( - self, - state: DashboardChatRuntimeState, - execution_result: dict[str, Any], - *, - response_format: str, - ) -> str: - """Compose one final markdown answer for all non-trivial routes.""" - normalized_sql_results = self._normalize_sql_results_for_answer( - execution_result.get("sql_results") - ) - draft_answer = (execution_result.get("answer_text") or "").strip() or None - if hasattr(self.llm_client, "compose_final_answer"): - try: - answer_text = self.llm_client.compose_final_answer( - user_query=state["user_query"], - intent=state["intent_decision"].intent, - response_format=response_format, - draft_answer=draft_answer, - retrieved_documents=list(execution_result.get("retrieved_documents") or []), - sql=execution_result.get("sql"), - sql_results=normalized_sql_results, - warnings=list(execution_result.get("warnings") or []), - ) - if answer_text: - return answer_text - except Exception: - logger.exception("Dashboard chat final answer composition failed") - return self._fallback_answer_text( - execution_result.get("retrieved_documents") or [], - normalized_sql_results, - response_format=response_format, - draft_answer=draft_answer, - ) - - @staticmethod - def _determine_response_format( - *, - user_query: str, - sql_results: list[dict[str, Any]] | None, - ) -> str: - """Return how the frontend should present the final answer.""" - if not sql_results: - return "text" - first_row = sql_results[0] if sql_results else {} - column_count = len(first_row.keys()) if isinstance(first_row, dict) else 0 - normalized_query = user_query.lower() - tableish_keywords = [ - "breakdown", - "split by", - "list", - "table", - "tabular", - "rank", - "ranking", - "top ", - "bottom ", - "wise", - ] - if "table" in normalized_query and column_count > 0: - return "table" - if len(sql_results) > 1 and column_count > 1: - return "text_with_table" - if any(keyword in normalized_query for keyword in tableish_keywords) and column_count > 1: - return "text_with_table" - return "text" - - @staticmethod - def _sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: - """Return table columns for frontend rendering metadata.""" - if not sql_results: - return [] - first_row = sql_results[0] - if not isinstance(first_row, dict): - return [] - return list(first_row.keys()) - - def _build_usage_summary(self) -> dict[str, Any]: - """Collect per-turn usage from the LLM client and embedding provider when supported.""" - usage: dict[str, Any] = {} - if hasattr(self.llm_client, "usage_summary"): - llm_usage = self.llm_client.usage_summary() - if llm_usage: - usage["llm"] = llm_usage - if hasattr(self.vector_store, "usage_summary"): - embedding_usage = self.vector_store.usage_summary() - if embedding_usage: - usage["embeddings"] = embedding_usage - return usage - - def _compose_small_talk_response(self, user_query: str) -> str: - """Generate the prototype small-talk response or fall back to a fixed helper.""" - if hasattr(self.llm_client, "compose_small_talk"): - try: - return self.llm_client.compose_small_talk(user_query) - except Exception: - logger.exception("Dashboard chat small-talk generation failed") - return "Hi! I can help with your program data and metrics. What would you like to know?" - - @staticmethod - def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: - """Handle obvious greetings and thanks without an LLM round trip.""" - if not GREETING_PATTERN.match(user_query.strip()): - return None - return DashboardChatIntentDecision( - intent=DashboardChatIntent.SMALL_TALK, - confidence=1.0, - reason="Obvious greeting or thanks", - ) - - @staticmethod - def _build_fast_path_small_talk_response(user_query: str) -> str: - """Keep greeting replies instant and deterministic.""" - normalized_query = user_query.strip().lower() - if "thank" in normalized_query: - return "You're welcome. Ask me anything about this dashboard or its data." - if "good morning" in normalized_query: - return "Good morning. Ask me anything about this dashboard or the data behind it." - if "good afternoon" in normalized_query: - return "Good afternoon. Ask me anything about this dashboard or the data behind it." - if "good evening" in normalized_query: - return "Good evening. Ask me anything about this dashboard or the data behind it." - return "Hi. Ask me anything about this dashboard or the data behind it." - - @staticmethod - def _clarification_fallback(missing_info: Sequence[str]) -> str: - """Mirror the prototype's specific clarification nudges when the router omits a question.""" - missing = {item.lower() for item in missing_info} - prompts: list[str] = [] - if "metric" in missing: - prompts.append("which metric") - if "time_range" in missing or "time period" in missing: - prompts.append("what time period") - if "dimension" in missing: - prompts.append("which breakdown or dimension") - if not prompts: - return "Could you be more specific about the metric, program, or time period you want?" - return "Could you clarify " + ", ".join(prompts) + "?" - - @staticmethod - def _fallback_answer_text( - retrieved_documents: Sequence[DashboardChatRetrievedDocument], - sql_results: list[dict[str, Any]] | None, - *, - response_format: str = "text", - draft_answer: str | None = None, - ) -> str: - """Fallback response when the model returns no final text.""" - if draft_answer: - return draft_answer - if sql_results is not None: - if not sql_results: - return "I didn't find any matching rows for that question." - if response_format in {"text_with_table", "table"}: - return f"I found {len(sql_results)} matching rows. See the table below for the breakdown." - if len(sql_results) == 1: - return DashboardChatRuntime._single_row_summary(sql_results[0]) - return f"I found {len(sql_results)} matching rows." - if retrieved_documents: - return DashboardChatRuntime._compact_snippet(retrieved_documents[0].content) - return "I couldn't find enough context to answer that." - - @staticmethod - def _single_row_summary(row: dict[str, Any]) -> str: - """Return a readable fallback when one structured row is available.""" - parts = [ - f"{DashboardChatRuntime._humanize_column_name(column)}: {value}" - for column, value in row.items() - ] - return "; ".join(parts) - - @staticmethod - def _humanize_column_name(column_name: str) -> str: - """Convert snake_case warehouse columns into human labels.""" - return str(column_name).replace("_", " ").strip().title() - - @classmethod - def _normalize_sql_results_for_answer( - cls, - sql_results: list[dict[str, Any]] | None, - ) -> list[dict[str, Any]] | None: - """Normalize SQL results into LLM-friendly values for final answer writing.""" - if sql_results is None: - return None - normalized_rows: list[dict[str, Any]] = [] - for row in sql_results: - normalized_row: dict[str, Any] = {} - for column_name, value in row.items(): - normalized_row[column_name] = cls._normalize_sql_value_for_answer( - column_name, - value, - ) - normalized_rows.append(normalized_row) - return normalized_rows - - @classmethod - def _normalize_sql_value_for_answer(cls, column_name: str, value: Any) -> Any: - """Format warehouse values into user-friendly forms for answer composition.""" - if value is None: - return None - if isinstance(value, bool): - return value - if isinstance(value, (int, float)): - return cls._format_numeric_answer_value(column_name, value) - text_value = str(value) - numeric_value = cls._parse_numeric_string(text_value) - if numeric_value is None: - return text_value - return cls._format_numeric_answer_value(column_name, numeric_value) - - @classmethod - def _format_numeric_answer_value(cls, column_name: str, value: float | int) -> str | int | float: - """Format numeric values for answer composition.""" - if cls._looks_like_rate_metric(column_name) and 0 <= float(value) <= 1: - percentage_value = f"{float(value) * 100:.1f}".rstrip("0").rstrip(".") - return f"{percentage_value}%" - rounded_value = round(float(value), 2) - if float(rounded_value).is_integer(): - return int(rounded_value) - return f"{rounded_value:.2f}".rstrip("0").rstrip(".") - - @staticmethod - def _parse_numeric_string(value: str) -> float | None: - """Parse decimal-like strings emitted by DjangoJSONEncoder.""" - normalized_value = value.strip() - if not normalized_value: - return None - if not re.fullmatch(r"-?\d+(?:\.\d+)?(?:E-?\d+)?", normalized_value, flags=re.IGNORECASE): - return None - try: - return float(normalized_value) - except ValueError: - return None - - @staticmethod - def _looks_like_rate_metric(column_name: str) -> bool: - """Return whether a metric name likely represents a percentage/rate.""" - normalized_column = column_name.lower() - return any( - token in normalized_column - for token in ["rate", "ratio", "percentage", "percent", "share", "pct"] - ) - - @staticmethod - def _chart_id_from_source_identifier(source_identifier: str) -> int | None: - """Extract chart ids from dashboard export source identifiers.""" - parts = source_identifier.split(":") - if len(parts) >= 4 and parts[-2] == "chart": - try: - return int(parts[-1]) - except ValueError: - return None - return None - - @staticmethod - def _unique_id_from_source_identifier(source_identifier: str) -> str | None: - """Extract dbt unique ids from manifest/catalog source identifiers.""" - if ":" not in source_identifier: - return None - prefix, unique_id = source_identifier.split(":", 1) - if prefix not in {"manifest", "catalog"}: - return None - return unique_id diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/session_service.py index 84317e394..3a5807fc3 100644 --- a/ddpui/core/dashboard_chat/session_service.py +++ b/ddpui/core/dashboard_chat/session_service.py @@ -8,6 +8,7 @@ from django.db.models import Max from django.utils import timezone +from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage from ddpui.models.dashboard import Dashboard @@ -39,10 +40,16 @@ def get_or_create_dashboard_chat_session( ) -> DashboardChatSession: """Create a new session or validate an existing one for the current dashboard.""" if session_id is None: + if dashboard.org_id != orguser.org_id: + raise DashboardChatSessionError( + "Cannot create a chat session for a dashboard outside the current organization" + ) collection_name = None if orguser.org.dbt and orguser.org.dbt.vector_last_ingested_at is not None: + vector_store_config = DashboardChatVectorStoreConfig.from_env() collection_name = build_dashboard_chat_collection_name( orguser.org.id, + prefix=vector_store_config.collection_prefix, version=orguser.org.dbt.vector_last_ingested_at, ) return DashboardChatSession.objects.create( diff --git a/ddpui/core/dashboard_chat/vector_building.py b/ddpui/core/dashboard_chat/vector_building.py new file mode 100644 index 000000000..239c41bd6 --- /dev/null +++ b/ddpui/core/dashboard_chat/vector_building.py @@ -0,0 +1,153 @@ +"""Vector build pipeline for dashboard chat retrieval.""" + +from dataclasses import dataclass +from datetime import timedelta +from typing import Callable + +from django.utils import timezone + +from ddpui.core.dashboard_chat.dbt_docs import ( + DashboardChatDbtDocsArtifacts, + generate_dashboard_chat_dbt_docs_artifacts, +) +from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig +from ddpui.core.dashboard_chat.vector_document_builder import DashboardChatVectorDocumentBuilder +from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.models.dashboard_chat import DashboardChatSession +from ddpui.models.org import Org + +INGEST_SOURCE_ORDER = [ + DashboardChatSourceType.ORG_CONTEXT, + DashboardChatSourceType.DASHBOARD_CONTEXT, + DashboardChatSourceType.DASHBOARD_EXPORT, + DashboardChatSourceType.DBT_MANIFEST, + DashboardChatSourceType.DBT_CATALOG, +] + + +class DashboardChatVectorBuildError(Exception): + """Raised when the dashboard chat vector build cannot complete.""" + + +@dataclass(frozen=True) +class DashboardChatVectorBuildResult: + """Summary of one completed org vector build.""" + + org_id: int + docs_generated_at: timezone.datetime | None + vector_ingested_at: timezone.datetime + source_document_counts: dict[str, int] + upserted_document_ids: list[str] + deleted_document_ids: list[str] + + +class DashboardChatVectorBuildService: + """Build org-scoped dashboard-chat vector context and sync it into Chroma.""" + + def __init__( + self, + vector_store: ChromaDashboardChatVectorStore | None = None, + dbt_docs_generator: Callable[[Org, object], DashboardChatDbtDocsArtifacts] | None = None, + source_config: DashboardChatSourceConfig | None = None, + document_builder: DashboardChatVectorDocumentBuilder | None = None, + ): + self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts + self.source_config = source_config or DashboardChatSourceConfig.from_env() + self.document_builder = document_builder or DashboardChatVectorDocumentBuilder( + source_config=self.source_config + ) + + def build_org_vector_context(self, org: Org) -> DashboardChatVectorBuildResult: + """Run dbt docs generation and rebuild the desired vector documents for an org.""" + if org.dbt is None: + raise DashboardChatVectorBuildError("dbt workspace not configured") + + collection_versioned_at = timezone.now() + target_collection_name = self.vector_store.collection_name( + org.id, + version=collection_versioned_at, + ) + dbt_docs = None + if self.source_config.is_enabled( + DashboardChatSourceType.DBT_MANIFEST + ) or self.source_config.is_enabled(DashboardChatSourceType.DBT_CATALOG): + dbt_docs = self.dbt_docs_generator(org, org.dbt) + documents_by_source = self.document_builder.build_documents_by_source(org, dbt_docs) + desired_documents = [ + document + for source_type in INGEST_SOURCE_ORDER + if self.source_config.is_enabled(source_type) + for document in documents_by_source[source_type.value] + ] + if self.vector_store.load_collection( + org.id, + collection_name=target_collection_name, + allow_legacy_fallback=False, + ) is not None: + self.vector_store.delete_collection( + org.id, + collection_name=target_collection_name, + ) + + upserted_document_ids = sorted( + self.vector_store.upsert_documents( + org.id, + desired_documents, + collection_name=target_collection_name, + ) + ) + + vector_ingested_at = collection_versioned_at + org.dbt.vector_last_ingested_at = collection_versioned_at + org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) + self._garbage_collect_inactive_collections( + org=org, + active_collection_name=target_collection_name, + ) + + return DashboardChatVectorBuildResult( + org_id=org.id, + docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, + vector_ingested_at=vector_ingested_at, + source_document_counts={ + source_type.value: ( + len(documents_by_source[source_type.value]) + if self.source_config.is_enabled(source_type) + else 0 + ) + for source_type in INGEST_SOURCE_ORDER + }, + upserted_document_ids=upserted_document_ids, + deleted_document_ids=[], + ) + + def _garbage_collect_inactive_collections( + self, + *, + org: Org, + active_collection_name: str, + ) -> None: + """Delete old versioned collections that are not pinned by recent chat sessions.""" + retention_cutoff = timezone.now() - timedelta(hours=24) + recent_sessions = DashboardChatSession.objects.filter( + org=org, + updated_at__gte=retention_cutoff, + ) + pinned_collection_names = { + collection_name + for collection_name in recent_sessions.values_list("vector_collection_name", flat=True) + if collection_name + } + if recent_sessions.filter(vector_collection_name__isnull=True).exists(): + pinned_collection_names.add(self.vector_store.collection_name(org.id)) + pinned_collection_names.add(active_collection_name) + + for collection_name in self.vector_store.list_org_collection_names(org.id): + if collection_name in pinned_collection_names: + continue + self.vector_store.delete_collection( + org.id, + collection_name=collection_name, + ) diff --git a/ddpui/core/dashboard_chat/ingestion.py b/ddpui/core/dashboard_chat/vector_document_builder.py similarity index 76% rename from ddpui/core/dashboard_chat/ingestion.py rename to ddpui/core/dashboard_chat/vector_document_builder.py index 562f94647..7d13dd87b 100644 --- a/ddpui/core/dashboard_chat/ingestion.py +++ b/ddpui/core/dashboard_chat/vector_document_builder.py @@ -1,25 +1,18 @@ -"""Context-build pipeline for dashboard chat retrieval.""" +"""Document-building helpers for dashboard chat vector context.""" from collections import defaultdict -from dataclasses import dataclass -from datetime import timedelta import json -from typing import Callable from django.utils import timezone -from ddpui.core.dashboard_chat.dbt_docs import ( - DashboardChatDbtDocsArtifacts, - generate_dashboard_chat_dbt_docs_artifacts, -) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig +from ddpui.core.dashboard_chat.dbt_docs import DashboardChatDbtDocsArtifacts from ddpui.core.dashboard_chat.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, ) -from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardAIContext, DashboardChatSession, OrgAIContext +from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext from ddpui.models.org import Org from ddpui.models.visualization import Chart from ddpui.services.dashboard_service import DashboardService @@ -35,22 +28,6 @@ ] -class DashboardChatIngestionError(Exception): - """Raised when the dashboard chat context build cannot complete.""" - - -@dataclass(frozen=True) -class DashboardChatIngestionResult: - """Summary of one completed org context build.""" - - org_id: int - docs_generated_at: timezone.datetime | None - vector_ingested_at: timezone.datetime - source_document_counts: dict[str, int] - upserted_document_ids: list[str] - deleted_document_ids: list[str] - - def _normalize_text(value: str) -> str: """Normalize text before chunking so document IDs stay deterministic.""" return "\n".join( @@ -98,113 +75,13 @@ def chunk_dashboard_chat_text(text: str, max_chars: int = MARKDOWN_CHUNK_MAX_CHA return chunks -class DashboardChatIngestionService: - """Build and ingest org-scoped retrieval documents for dashboard chat.""" +class DashboardChatVectorDocumentBuilder: + """Build dashboard-chat vector documents from app context and dbt docs.""" - def __init__( - self, - vector_store: ChromaDashboardChatVectorStore | None = None, - dbt_docs_generator: Callable[[Org, object], DashboardChatDbtDocsArtifacts] | None = None, - source_config: DashboardChatSourceConfig | None = None, - ): - self.vector_store = vector_store or ChromaDashboardChatVectorStore() - self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts + def __init__(self, source_config: DashboardChatSourceConfig | None = None): self.source_config = source_config or DashboardChatSourceConfig.from_env() - def ingest_org(self, org: Org) -> DashboardChatIngestionResult: - """Run dbt docs generation and rebuild the desired vector documents for an org.""" - if org.dbt is None: - raise DashboardChatIngestionError("dbt workspace not configured") - - collection_versioned_at = timezone.now() - target_collection_name = self.vector_store.collection_name( - org.id, - version=collection_versioned_at, - ) - dbt_docs = None - if self.source_config.is_enabled( - DashboardChatSourceType.DBT_MANIFEST - ) or self.source_config.is_enabled(DashboardChatSourceType.DBT_CATALOG): - dbt_docs = self.dbt_docs_generator(org, org.dbt) - documents_by_source = self._build_documents(org, dbt_docs) - desired_documents = [ - document - for source_type in INGEST_SOURCE_ORDER - if self.source_config.is_enabled(source_type) - for document in documents_by_source[source_type.value] - ] - if self.vector_store.load_collection( - org.id, - collection_name=target_collection_name, - allow_legacy_fallback=False, - ) is not None: - self.vector_store.delete_collection( - org.id, - collection_name=target_collection_name, - ) - - upserted_document_ids = sorted( - self.vector_store.upsert_documents( - org.id, - desired_documents, - collection_name=target_collection_name, - ) - ) - - vector_ingested_at = collection_versioned_at - org.dbt.vector_last_ingested_at = collection_versioned_at - org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) - self._garbage_collect_inactive_collections( - org=org, - active_collection_name=target_collection_name, - ) - - return DashboardChatIngestionResult( - org_id=org.id, - docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, - vector_ingested_at=vector_ingested_at, - source_document_counts={ - source_type.value: ( - len(documents_by_source[source_type.value]) - if self.source_config.is_enabled(source_type) - else 0 - ) - for source_type in INGEST_SOURCE_ORDER - }, - upserted_document_ids=upserted_document_ids, - deleted_document_ids=[], - ) - - def _garbage_collect_inactive_collections( - self, - *, - org: Org, - active_collection_name: str, - ) -> None: - """Delete old versioned collections that are not pinned by recent chat sessions.""" - retention_cutoff = timezone.now() - timedelta(hours=24) - recent_sessions = DashboardChatSession.objects.filter( - org=org, - updated_at__gte=retention_cutoff, - ) - pinned_collection_names = { - collection_name - for collection_name in recent_sessions.values_list("vector_collection_name", flat=True) - if collection_name - } - if recent_sessions.filter(vector_collection_name__isnull=True).exists(): - pinned_collection_names.add(self.vector_store.collection_name(org.id)) - pinned_collection_names.add(active_collection_name) - - for collection_name in self.vector_store.list_org_collection_names(org.id): - if collection_name in pinned_collection_names: - continue - self.vector_store.delete_collection( - org.id, - collection_name=collection_name, - ) - - def _build_documents( + def build_documents_by_source( self, org: Org, dbt_docs: DashboardChatDbtDocsArtifacts | None, @@ -504,7 +381,7 @@ def _include_dbt_unique_id(unique_id: str, project_name: str | None) -> bool: @staticmethod def _format_manifest_source(unique_id: str, source: dict) -> str: """Format a manifest source entry into stable text.""" - column_lines = DashboardChatIngestionService._format_columns(source.get("columns") or {}) + column_lines = DashboardChatVectorDocumentBuilder._format_columns(source.get("columns") or {}) blocks = [ f"dbt manifest source: {source.get('schema')}.{source.get('name')}", f"Unique id: {unique_id}", @@ -530,7 +407,7 @@ def _format_manifest_model(unique_id: str, node: dict) -> str: blocks.append( "Depends on:\n" + "\n".join(f"- {dependency}" for dependency in depends_on_nodes) ) - column_lines = DashboardChatIngestionService._format_columns(node.get("columns") or {}) + column_lines = DashboardChatVectorDocumentBuilder._format_columns(node.get("columns") or {}) if column_lines: blocks.append("Columns:\n" + "\n".join(column_lines)) return "\n\n".join( @@ -549,7 +426,7 @@ def _format_catalog_entry(unique_id: str, entry: dict, entry_type: str) -> str: f"Database: {metadata.get('database')}", f"Type: {metadata.get('type')}", ] - column_lines = DashboardChatIngestionService._format_catalog_columns( + column_lines = DashboardChatVectorDocumentBuilder._format_catalog_columns( entry.get("columns") or {} ) if column_lines: diff --git a/ddpui/core/dashboard_chat/vector_store.py b/ddpui/core/dashboard_chat/vector_store.py index 928fb66b6..11d76e69b 100644 --- a/ddpui/core/dashboard_chat/vector_store.py +++ b/ddpui/core/dashboard_chat/vector_store.py @@ -1,9 +1,14 @@ -"""Chroma-backed vector store wrapper for dashboard chat.""" +"""Dashboard-chat vector retrieval built on top of the shared Chroma transport.""" -from dataclasses import dataclass -import os -from typing import Any, Protocol +from collections.abc import Sequence +from typing import Any +from chromadb import ClientAPI + +from ddpui.core.dashboard_chat.embeddings import ( + DashboardChatEmbeddingProvider, + OpenAIEmbeddingProvider, +) from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector_documents import ( DashboardChatSourceType, @@ -11,130 +16,35 @@ build_dashboard_chat_collection_base_name, build_dashboard_chat_collection_name, ) +from ddpui.utils.vector.chroma import ( + ChromaHttpVectorStore, + ChromaQueryResult, + ChromaStoredDocument, +) - -class DashboardChatEmbeddingProvider(Protocol): - """Embedding provider interface used by the vector store wrapper.""" - - def embed_documents(self, texts: list[str]) -> list[list[float]]: - """Embed a batch of texts.""" - - def embed_query(self, text: str) -> list[float]: - """Embed a single query.""" - - def reset_usage(self) -> None: - """Reset per-turn embedding usage before a new runtime invocation.""" - - -class OpenAIEmbeddingProvider: - """OpenAI embeddings adapter for dashboard chat retrieval.""" - - def __init__( - self, - api_key: str | None = None, - model: str = "text-embedding-3-small", - client: Any = None, - ): - self.api_key = api_key or os.getenv("OPENAI_API_KEY") - self.model = model - self.usage_events: list[dict[str, Any]] = [] - if client is None: - if not self.api_key: - raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") - from openai import OpenAI - - client = OpenAI(api_key=self.api_key, max_retries=2) - self.client = client - - def reset_usage(self) -> None: - """Reset aggregated embedding usage before one new chat turn.""" - self.usage_events = [] - - def embed_documents(self, texts: list[str]) -> list[list[float]]: - """Embed a batch of documents using OpenAI.""" - if not texts: - return [] - response = self.client.embeddings.create(model=self.model, input=texts) - self._record_usage("embed_documents", response, len(texts)) - return [item.embedding for item in response.data] - - def embed_query(self, text: str) -> list[float]: - """Embed a single query using the document embedding path.""" - return self.embed_documents([text])[0] - - def usage_summary(self) -> dict[str, Any]: - """Return aggregated embedding usage for the current turn.""" - totals = { - "prompt_tokens": 0, - "total_tokens": 0, - } - for event in self.usage_events: - totals["prompt_tokens"] += event.get("prompt_tokens", 0) - totals["total_tokens"] += event.get("total_tokens", 0) - return { - "model": self.model, - "calls": list(self.usage_events), - "totals": totals, - } - - def _record_usage(self, operation: str, response: Any, input_count: int) -> None: - """Capture embedding usage from one OpenAI embeddings response.""" - usage = getattr(response, "usage", None) - if usage is None: - return - self.usage_events.append( - { - "operation": operation, - "model": self.model, - "input_count": input_count, - "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, - "total_tokens": getattr(usage, "total_tokens", 0) or 0, - } - ) - - -@dataclass(frozen=True) -class DashboardChatVectorQueryResult: - """Single query result returned from Chroma.""" - - document_id: str - content: str - metadata: dict[str, Any] - distance: float | None = None - - -@dataclass(frozen=True) -class DashboardChatStoredDocument: - """Stored document metadata returned from Chroma collection reads.""" - - document_id: str - metadata: dict[str, Any] - content: str | None = None +DashboardChatVectorQueryResult = ChromaQueryResult +DashboardChatStoredDocument = ChromaStoredDocument class ChromaDashboardChatVectorStore: - """Thin wrapper around the Chroma HTTP client with Dalgo-specific conventions.""" + """Dashboard-chat-specific adapter on top of the generic Chroma wrapper.""" def __init__( self, config: DashboardChatVectorStoreConfig | None = None, embedding_provider: DashboardChatEmbeddingProvider | None = None, - client: Any = None, + client: ClientAPI | None = None, + chroma_store: ChromaHttpVectorStore | None = None, ): self.config = config or DashboardChatVectorStoreConfig.from_env() self.embedding_provider = embedding_provider or OpenAIEmbeddingProvider( model=self.config.embedding_model ) - self.client = client or self._build_client() - - def _build_client(self) -> Any: - """Build the real Chroma HTTP client lazily.""" - from chromadb import HttpClient - - return HttpClient( + self.chroma_store = chroma_store or ChromaHttpVectorStore( host=self.config.chroma_host, port=self.config.chroma_port, ssl=self.config.chroma_ssl, + client=client, ) def collection_name( @@ -158,7 +68,7 @@ def create_collection( ) -> Any: """Create or load the Chroma collection for an org.""" resolved_collection_name = collection_name or self.collection_name(org_id) - return self.client.get_or_create_collection( + return self.chroma_store.create_collection( name=resolved_collection_name, metadata={"org_id": str(org_id)}, ) @@ -171,23 +81,16 @@ def load_collection( allow_legacy_fallback: bool = True, ) -> Any | None: """Load an existing Chroma collection for an org.""" - from chromadb.errors import InvalidCollectionException - - try: - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.client.get_collection(name=resolved_collection_name) - except (InvalidCollectionException, ValueError): - if collection_name is None or not allow_legacy_fallback: - return None - try: - return self.client.get_collection( - name=build_dashboard_chat_collection_base_name( - org_id, - self.config.collection_prefix, - ) - ) - except (InvalidCollectionException, ValueError): - return None + resolved_collection_name = collection_name or self.collection_name(org_id) + collection = self.chroma_store.load_collection(resolved_collection_name) + if collection is not None or collection_name is None or not allow_legacy_fallback: + return collection + return self.chroma_store.load_collection( + build_dashboard_chat_collection_base_name( + org_id, + self.config.collection_prefix, + ) + ) def delete_collection( self, @@ -197,27 +100,11 @@ def delete_collection( ) -> bool: """Delete the Chroma collection for an org if it exists.""" resolved_collection_name = collection_name or self.collection_name(org_id) - if self.load_collection( - org_id, - collection_name=resolved_collection_name, - allow_legacy_fallback=False, - ) is None: - return False - self.client.delete_collection(name=resolved_collection_name) - return True + return self.chroma_store.delete_collection(resolved_collection_name) def list_collection_names(self) -> list[str]: """Return all Chroma collection names for the current client.""" - raw_collections = self.client.list_collections() - collection_names: list[str] = [] - for collection in raw_collections: - if isinstance(collection, str): - collection_names.append(collection) - continue - name = getattr(collection, "name", None) - if name: - collection_names.append(str(name)) - return collection_names + return self.chroma_store.list_collection_names() def list_org_collection_names(self, org_id: int) -> list[str]: """Return all collection names that belong to one org.""" @@ -231,62 +118,40 @@ def list_org_collection_names(self, org_id: int) -> list[str]: def get_documents( self, org_id: int, - source_types: list[DashboardChatSourceType | str] | None = None, + source_types: Sequence[DashboardChatSourceType] | None = None, dashboard_id: int | None = None, include_documents: bool = False, collection_name: str | None = None, ) -> list[DashboardChatStoredDocument]: """Load stored documents for an org using metadata filters.""" - collection = self.load_collection(org_id, collection_name=collection_name) - if collection is None: - return [] - - include = ["metadatas"] - if include_documents: - include.append("documents") - - result = collection.get( - where=self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id), - include=include, + resolved_collection_name = collection_name or self.collection_name(org_id) + return self.chroma_store.get_documents( + resolved_collection_name, + where=self._build_vector_metadata_filter( + source_types=source_types, + dashboard_id=dashboard_id, + ), + include_documents=include_documents, ) - return self._parse_get_result(result, include_documents=include_documents) def delete_documents( self, org_id: int, ids: list[str] | None = None, - source_types: list[DashboardChatSourceType | str] | None = None, + source_types: Sequence[DashboardChatSourceType] | None = None, dashboard_id: int | None = None, collection_name: str | None = None, ) -> int: """Delete matching documents from an org collection.""" - collection = self.load_collection( - org_id, - collection_name=collection_name, - allow_legacy_fallback=False, - ) - if collection is None: - return 0 - - where = self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id) - if ids is None and where is None: - return 0 - - deleted_count = ( - len(ids) - if ids is not None - else len( - self.get_documents( - org_id, - source_types=source_types, - dashboard_id=dashboard_id, - include_documents=False, - collection_name=collection_name, - ) - ) + resolved_collection_name = collection_name or self.collection_name(org_id) + return self.chroma_store.delete_documents( + resolved_collection_name, + ids=ids, + where=self._build_vector_metadata_filter( + source_types=source_types, + dashboard_id=dashboard_id, + ), ) - collection.delete(ids=ids, where=where) - return deleted_count def upsert_documents( self, @@ -298,19 +163,19 @@ def upsert_documents( if not documents: return [] - collection = self.create_collection(org_id, collection_name=collection_name) contents = [document.content for document in documents] document_ids = [document.document_id for document in documents] metadatas = [document.metadata() for document in documents] embeddings = self.embedding_provider.embed_documents(contents) - - collection.upsert( + resolved_collection_name = collection_name or self.collection_name(org_id) + return self.chroma_store.upsert_documents( + resolved_collection_name, ids=document_ids, documents=contents, metadatas=metadatas, embeddings=embeddings, + collection_metadata={"org_id": str(org_id)}, ) - return document_ids def embed_query(self, query_text: str) -> list[float]: """Build one query embedding that can be reused across filtered retrieval calls.""" @@ -332,40 +197,33 @@ def query( org_id: int, query_text: str, n_results: int = 5, - source_types: list[DashboardChatSourceType | str] | None = None, + source_types: Sequence[DashboardChatSourceType] | None = None, dashboard_id: int | None = None, query_embedding: list[float] | None = None, collection_name: str | None = None, ) -> list[DashboardChatVectorQueryResult]: """Query the org-specific Chroma collection.""" - collection = self.load_collection(org_id, collection_name=collection_name) - if collection is None: - return [] - - where = self._build_where_clause(source_types=source_types, dashboard_id=dashboard_id) - result = collection.query( - query_embeddings=[query_embedding or self.embed_query(query_text)], + resolved_collection_name = collection_name or self.collection_name(org_id) + return self.chroma_store.query( + resolved_collection_name, + query_embedding=query_embedding or self.embed_query(query_text), n_results=n_results, - where=where, - include=["documents", "metadatas", "distances"], + where=self._build_vector_metadata_filter( + source_types=source_types, + dashboard_id=dashboard_id, + ), ) - return self._parse_query_result(result) @staticmethod - def _build_where_clause( - source_types: list[DashboardChatSourceType | str] | None = None, + def _build_vector_metadata_filter( + source_types: Sequence[DashboardChatSourceType] | None = None, dashboard_id: int | None = None, ) -> dict[str, Any] | None: """Build the metadata filter used for Chroma queries.""" filters: list[dict[str, Any]] = [] if source_types: - normalized_types = [ - source_type.value - if isinstance(source_type, DashboardChatSourceType) - else source_type - for source_type in source_types - ] + normalized_types = [source_type.value for source_type in source_types] if len(normalized_types) == 1: filters.append({"source_type": normalized_types[0]}) else: @@ -379,51 +237,3 @@ def _build_where_clause( if len(filters) == 1: return filters[0] return {"$and": filters} - - @staticmethod - def _parse_query_result(result: dict[str, Any]) -> list[DashboardChatVectorQueryResult]: - """Parse Chroma's nested result shape into flat typed rows.""" - ids = result.get("ids", [[]]) - documents = result.get("documents", [[]]) - metadatas = result.get("metadatas", [[]]) - distances = result.get("distances", [[]]) - - parsed_results: list[DashboardChatVectorQueryResult] = [] - for document_id, content, metadata, distance in zip( - ids[0] if ids else [], - documents[0] if documents else [], - metadatas[0] if metadatas else [], - distances[0] if distances else [], - ): - parsed_results.append( - DashboardChatVectorQueryResult( - document_id=document_id, - content=content, - metadata=metadata, - distance=distance, - ) - ) - return parsed_results - - @staticmethod - def _parse_get_result( - result: dict[str, Any], - include_documents: bool = False, - ) -> list[DashboardChatStoredDocument]: - """Parse Chroma's get result into typed stored-document rows.""" - ids = result.get("ids", []) - metadatas = result.get("metadatas", []) - documents = result.get("documents", []) if include_documents else [] - - parsed_results: list[DashboardChatStoredDocument] = [] - for index, document_id in enumerate(ids): - parsed_results.append( - DashboardChatStoredDocument( - document_id=document_id, - metadata=metadatas[index] if index < len(metadatas) else {}, - content=documents[index] - if include_documents and index < len(documents) - else None, - ) - ) - return parsed_results diff --git a/ddpui/core/dashboard_chat/warehouse_tools.py b/ddpui/core/dashboard_chat/warehouse_tools.py index c708693d6..005aeb41b 100644 --- a/ddpui/core/dashboard_chat/warehouse_tools.py +++ b/ddpui/core/dashboard_chat/warehouse_tools.py @@ -2,15 +2,19 @@ import json import logging +import re from typing import Any from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse from ddpui.utils import secretsmanager from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory +from ddpui.utils.warehouse.client.warehouse_interface import Warehouse, WarehouseType logger = logging.getLogger(__name__) +SAFE_WAREHOUSE_IDENTIFIER_PATTERN = re.compile(r"^[A-Za-z0-9_-]+$") + class DashboardChatWarehouseToolsError(Exception): """Raised when a warehouse-backed dashboard chat action cannot complete.""" @@ -23,7 +27,7 @@ def __init__( self, org: Org, org_warehouse: OrgWarehouse | None = None, - warehouse_client: Any = None, + warehouse_client: Warehouse | None = None, max_rows: int = 200, ): self.org = org @@ -41,7 +45,15 @@ def get_schema_snippets(self, tables: list[str]) -> dict[str, DashboardChatSchem snippets: dict[str, DashboardChatSchemaSnippet] = {} for table_name in list(dict.fromkeys(tables)): - parsed_table = self._parse_table_name(table_name) + try: + parsed_table = self._parse_table_name(table_name) + except DashboardChatWarehouseToolsError as error: + logger.warning( + "dashboard chat schema lookup skipped invalid table %s: %s", + table_name, + error, + ) + continue if parsed_table is None: continue schema_name, bare_table_name = parsed_table @@ -107,7 +119,7 @@ def _build_distinct_values_query( limit: int, ) -> str: """Build a warehouse-specific query for distinct values.""" - if self.org_warehouse.wtype == "postgres": + if self.org_warehouse.wtype == WarehouseType.POSTGRES: quoted_column = self._quote_postgres_identifier(column_name) return f""" SELECT DISTINCT {quoted_column} AS value @@ -118,7 +130,7 @@ def _build_distinct_values_query( LIMIT {int(limit)} """ - if self.org_warehouse.wtype == "bigquery": + if self.org_warehouse.wtype == WarehouseType.BIGQUERY: quoted_column = self._quote_bigquery_identifier(column_name) return f""" SELECT DISTINCT {quoted_column} AS value @@ -145,7 +157,19 @@ def _quote_bigquery_table_ref(self, schema_name: str, table_name: str) -> str: project_name = self._get_bigquery_project_id() if not project_name: raise DashboardChatWarehouseToolsError("BigQuery project id not configured") - return f"`{project_name}.{schema_name}.{table_name}`" + safe_project_name = self._normalize_identifier_component( + project_name, + "BigQuery project id", + ) + safe_schema_name = self._normalize_identifier_component( + schema_name, + "schema name", + ) + safe_table_name = self._normalize_identifier_component( + table_name, + "table name", + ) + return f"`{safe_project_name}.{safe_schema_name}.{safe_table_name}`" def _get_bigquery_project_id(self) -> str | None: """Read the BigQuery project id from stored warehouse credentials.""" @@ -184,6 +208,22 @@ def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: if not table_name or "." not in table_name: return None schema_name, bare_table_name = table_name.split(".", 1) - return schema_name.strip().strip('"').strip("`"), bare_table_name.strip().strip('"').strip( - "`" + return DashboardChatWarehouseTools._normalize_identifier_component( + schema_name, + "schema name", + ), DashboardChatWarehouseTools._normalize_identifier_component( + bare_table_name, + "table name", ) + + @staticmethod + def _normalize_identifier_component(component: str, component_name: str) -> str: + """Normalize a schema/table/project component and reject unsafe identifier text.""" + normalized_component = component.strip().strip('"').strip("`") + if not normalized_component: + raise DashboardChatWarehouseToolsError(f"{component_name} is required") + if not SAFE_WAREHOUSE_IDENTIFIER_PATTERN.fullmatch(normalized_component): + raise DashboardChatWarehouseToolsError( + f"Invalid {component_name} for dashboard chat warehouse access" + ) + return normalized_component diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py index 83af4f2fa..bd5ee5c5d 100644 --- a/ddpui/tests/core/dashboard_chat/test_llm_client.py +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -2,8 +2,8 @@ import json -import ddpui.core.dashboard_chat.llm_client as llm_client_module -from ddpui.core.dashboard_chat.llm_client import OpenAIDashboardChatLlmClient +import ddpui.core.dashboard_chat.openai_llm_client as llm_client_module +from ddpui.core.dashboard_chat.openai_llm_client import OpenAIDashboardChatLlmClient from ddpui.core.dashboard_chat.runtime_types import ( DashboardChatConversationContext, DashboardChatIntent, diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 90113ba0b..9e7bfa46b 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -12,7 +12,7 @@ DashboardChatAllowlistBuilder, ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig -from ddpui.core.dashboard_chat.runtime import DashboardChatRuntime +from ddpui.core.dashboard_chat.graph.orchestrator import DashboardChatRuntime from ddpui.core.dashboard_chat.runtime_types import ( DashboardChatConversationContext, DashboardChatConversationMessage, @@ -62,15 +62,22 @@ def query( "org_id": org_id, "query_text": query_text, "n_results": n_results, - "source_types": list(source_types) if source_types else [], + "source_types": [ + source_type.value if hasattr(source_type, "value") else source_type + for source_type in (source_types or []) + ], "dashboard_id": dashboard_id, "query_embedding": query_embedding, "collection_name": collection_name, } ) results = [] + normalized_source_types = { + source_type.value if hasattr(source_type, "value") else source_type + for source_type in (source_types or []) + } for row in self.rows: - if source_types and row.metadata.get("source_type") not in source_types: + if normalized_source_types and row.metadata.get("source_type") not in normalized_source_types: continue if dashboard_id is not None and row.metadata.get("dashboard_id") != dashboard_id: continue @@ -853,7 +860,7 @@ def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_d "warehouse_tools": None, } - result = runtime._tool_get_distinct_values( + result = runtime._handle_get_distinct_values_tool( { "table": "analytics.donor_funding_quarterly", "column": "donor_type", @@ -1506,12 +1513,12 @@ def test_runtime_dbt_tools_use_compact_allowlisted_index(): "dbt_index": dbt_index, } - search_result = runtime._tool_search_dbt_models( + search_result = runtime._handle_search_dbt_models_tool( {"query": "program reach", "limit": 5}, state, {}, ) - info_result = runtime._tool_get_dbt_model_info( + info_result = runtime._handle_get_dbt_model_info_tool( {"model_name": "analytics.program_reach"}, state, {}, @@ -1725,8 +1732,8 @@ def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashb llm_client=ContextToolLoopLlm(), source_config=DashboardChatSourceConfig( enabled_source_types=( - "dashboard_context", - "dashboard_export", + DashboardChatSourceType.DASHBOARD_CONTEXT, + DashboardChatSourceType.DASHBOARD_EXPORT, ) ), ) @@ -1760,7 +1767,7 @@ def test_list_tables_by_keyword_matches_allowlisted_table_names_without_schema_l } execution_context = {"schema_cache": {}, "warnings": []} - result = runtime._tool_list_tables_by_keyword( + result = runtime._handle_list_tables_by_keyword_tool( {"keyword": "district_funding_efficiency_quarterly", "limit": 10}, state, execution_context, @@ -1828,7 +1835,7 @@ def test_tool_document_payload_exposes_structured_chart_metadata(): llm_client=SmallTalkLlm(), ) - payload = runtime._tool_document_payload( + payload = runtime._build_tool_document_payload( DashboardChatRetrievedDocument( document_id="doc-chart", source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index 9a2ad5ab5..a595146d4 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -1,6 +1,7 @@ """Tests for dashboard chat session creation and reuse rules.""" from datetime import datetime, timezone +from unittest.mock import patch import pytest @@ -93,6 +94,41 @@ def other_orguser(org, seed_db): user.delete() +@pytest.fixture +def other_org(seed_db): + organization = Org.objects.create( + name="Other Dashboard Chat Org", + slug="other-dashchat", + airbyte_workspace_id="workspace-2", + ) + yield organization + organization.delete() + + +@pytest.fixture +def other_org_dashboard(other_org, seed_db): + owner = OrgUser.objects.create( + user=User.objects.create( + username="other-dashchat-owner", + email="other-dashchat-owner@test.com", + password="testpassword", + ), + org=other_org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + dashboard_instance = Dashboard.objects.create( + title="Other Impact Overview", + dashboard_type="native", + created_by=owner, + last_modified_by=owner, + org=other_org, + ) + yield dashboard_instance + dashboard_instance.delete() + owner.delete() + owner.user.delete() + + def test_get_or_create_dashboard_chat_session_creates_new_session(session_owner, dashboard): """A missing session_id should create a new session for the current user.""" session = get_or_create_dashboard_chat_session( @@ -107,6 +143,22 @@ def test_get_or_create_dashboard_chat_session_creates_new_session(session_owner, assert session.vector_collection_name is None +def test_get_or_create_dashboard_chat_session_rejects_cross_org_dashboard_on_create( + session_owner, + other_org_dashboard, +): + """New chat sessions must not be created against a dashboard from another org.""" + with pytest.raises( + DashboardChatSessionError, + match="outside the current organization", + ): + get_or_create_dashboard_chat_session( + orguser=session_owner, + dashboard=other_org_dashboard, + session_id=None, + ) + + def test_get_or_create_dashboard_chat_session_pins_active_vector_collection( session_owner, dashboard, @@ -121,14 +173,20 @@ def test_get_or_create_dashboard_chat_session_pins_active_vector_collection( session_owner.org.dbt = org_dbt session_owner.org.save(update_fields=["dbt"]) - session = get_or_create_dashboard_chat_session( - orguser=session_owner, - dashboard=dashboard, - session_id=None, - ) + with patch.dict( + "os.environ", + {"AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX": "tenant_"}, + clear=False, + ): + session = get_or_create_dashboard_chat_session( + orguser=session_owner, + dashboard=dashboard, + session_id=None, + ) assert session.vector_collection_name == build_dashboard_chat_collection_name( session_owner.org.id, + prefix="tenant_", version=org_dbt.vector_last_ingested_at, ) diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 2ba10442e..44ff220a5 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -11,7 +11,7 @@ run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) -from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionResult +from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildResult from ddpui.core.dashboard_chat.runtime_types import DashboardChatIntent, DashboardChatResponse from ddpui.models.org import Org, OrgDbt from ddpui.models.dashboard import Dashboard @@ -115,14 +115,14 @@ def test_build_dashboard_chat_context_for_org_skips_when_locked(orguser): with patch( "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client - ), patch("ddpui.celeryworkers.tasks.DashboardChatIngestionService") as ingestion_service: + ), patch("ddpui.celeryworkers.tasks.DashboardChatVectorBuildService") as vector_build_service: result = build_dashboard_chat_context_for_org.run(org.id) assert result == {"status": "skipped_locked", "org_id": org.id} - ingestion_service.assert_not_called() + vector_build_service.assert_not_called() -def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): +def test_build_dashboard_chat_context_for_org_runs_vector_build(orguser): org = orguser.org _create_org_dbt(org) OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=True) @@ -134,7 +134,7 @@ def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): redis_client = Mock() redis_client.lock.return_value = redis_lock - result_payload = DashboardChatIngestionResult( + result_payload = DashboardChatVectorBuildResult( org_id=org.id, docs_generated_at=timezone.now(), vector_ingested_at=timezone.now(), @@ -142,28 +142,28 @@ def test_build_dashboard_chat_context_for_org_runs_ingestion(orguser): upserted_document_ids=["abc"], deleted_document_ids=[], ) - ingestion_service = Mock() - ingestion_service.ingest_org.return_value = result_payload + vector_build_service = Mock() + vector_build_service.build_org_vector_context.return_value = result_payload with patch( "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client ), patch( - "ddpui.celeryworkers.tasks.DashboardChatIngestionService", - return_value=ingestion_service, + "ddpui.celeryworkers.tasks.DashboardChatVectorBuildService", + return_value=vector_build_service, ): result = build_dashboard_chat_context_for_org.run(org.id) assert result["status"] == "completed" assert result["org_id"] == org.id assert result["source_document_counts"] == {"dashboard_export": 2} - ingestion_service.ingest_org.assert_called_once() + vector_build_service.build_org_vector_context.assert_called_once() redis_lock.release.assert_called_once() @patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( - runtime_class, + get_runtime, publish_event, orguser, ): @@ -180,13 +180,15 @@ def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( role="user", content="Why did funding drop?", ) - runtime_class.return_value.run.return_value = DashboardChatResponse( + runtime = Mock() + runtime.run.return_value = DashboardChatResponse( answer_text="Funding dropped because donor inflows slowed this quarter.", intent=DashboardChatIntent.QUERY_WITH_SQL, warnings=["Example warning"], sql="SELECT 1", sql_results=[{"value": 1}], ) + get_runtime.return_value = runtime result = run_dashboard_chat_turn(str(session.session_id), user_message.id) @@ -199,9 +201,9 @@ def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( @patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( - runtime_class, + get_runtime, publish_event, orguser, ): @@ -218,7 +220,9 @@ def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( role="user", content="Why did funding drop?", ) - runtime_class.return_value.run.side_effect = RuntimeError("boom") + runtime = Mock() + runtime.run.side_effect = RuntimeError("boom") + get_runtime.return_value = runtime with pytest.raises(RuntimeError, match="boom"): run_dashboard_chat_turn(str(session.session_id), user_message.id) @@ -228,9 +232,9 @@ def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( @patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.DashboardChatRuntime") +@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") def test_run_dashboard_chat_turn_reuses_existing_assistant_reply( - runtime_class, + get_runtime, publish_event, orguser, ): @@ -262,5 +266,5 @@ def test_run_dashboard_chat_turn_reuses_existing_assistant_reply( "session_id": str(session.session_id), "assistant_message_id": assistant_message.id, } - runtime_class.return_value.run.assert_not_called() + get_runtime.assert_not_called() publish_event.assert_not_called() diff --git a/ddpui/tests/core/dashboard_chat/test_ingestion.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py similarity index 90% rename from ddpui/tests/core/dashboard_chat/test_ingestion.py rename to ddpui/tests/core/dashboard_chat/test_vector_building.py index 32c0efb09..a766ad4e6 100644 --- a/ddpui/tests/core/dashboard_chat/test_ingestion.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -15,8 +15,11 @@ generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.ingestion import DashboardChatIngestionService -from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.vector_documents import ( + DashboardChatSourceType, + build_dashboard_chat_collection_name, +) from ddpui.core.dashboard_chat.vector_store import DashboardChatStoredDocument from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpprefect import DBTCLIPROFILE @@ -32,7 +35,7 @@ class FakeDashboardChatVectorStore: - """In-memory vector store used to exercise ingest diffing logic.""" + """In-memory vector store used to exercise vector build diffing logic.""" def __init__(self): self.documents_by_collection = {} @@ -344,7 +347,7 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera mock_git_manager.pull_changes.assert_called_once_with() -def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, dashboard): """A repeated identical build should skip writes, and a removed source should be deleted.""" OrgAIContext.objects.create( org=org, @@ -410,19 +413,19 @@ def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, d }, generated_at=timezone.now(), ) - service = DashboardChatIngestionService( + service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) - first_result = service.ingest_org(org) - upsert_count_after_first_ingest = len(vector_store.upsert_calls) - second_result = service.ingest_org(org) + first_result = service.build_org_vector_context(org) + upsert_count_after_first_vector_build = len(vector_store.upsert_calls) + second_result = service.build_org_vector_context(org) dashboard_context.markdown = "" dashboard_context.updated_at = timezone.now() dashboard_context.save(update_fields=["markdown", "updated_at"]) - third_result = service.ingest_org(org) + third_result = service.build_org_vector_context(org) active_collection_name = build_dashboard_chat_collection_name( org.id, @@ -440,13 +443,13 @@ def test_ingest_org_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, d assert first_result.source_document_counts["dashboard_context"] == 1 assert second_result.upserted_document_ids assert second_result.deleted_document_ids == [] - assert len(vector_store.upsert_calls) == upsert_count_after_first_ingest + 2 + assert len(vector_store.upsert_calls) == upsert_count_after_first_vector_build + 2 assert third_result.source_document_counts["dashboard_context"] == 0 assert third_result.deleted_document_ids == [] assert "dashboard_context" not in stored_source_types -def test_ingest_org_keeps_collections_isolated_per_org(org, orgdbt, orguser, dashboard, seed_db): +def test_build_org_vector_context_keeps_collections_isolated_per_org(org, orgdbt, orguser, dashboard, seed_db): """The context build should never mix documents between org collections.""" other_org = Org.objects.create( name="Dashboard Chat Org 2", @@ -487,13 +490,13 @@ def test_ingest_org_keeps_collections_isolated_per_org(org, orgdbt, orguser, das generated_at=timezone.now(), ) vector_store = FakeDashboardChatVectorStore() - service = DashboardChatIngestionService( + service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) - service.ingest_org(org) - service.ingest_org(other_org) + service.build_org_vector_context(org) + service.build_org_vector_context(other_org) org_collection_names = vector_store.list_org_collection_names(org.id) other_collection_names = vector_store.list_org_collection_names(other_org.id) @@ -508,7 +511,7 @@ def test_ingest_org_keeps_collections_isolated_per_org(org, orgdbt, orguser, das other_org.delete() -def test_ingest_org_keeps_last_good_context_when_upsert_fails(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_keeps_last_good_context_when_upsert_fails(org, orgdbt, orguser, dashboard): """A failed rebuild should not delete the previously indexed documents.""" OrgAIContext.objects.create( org=org, @@ -522,12 +525,12 @@ def test_ingest_org_keeps_last_good_context_when_upsert_fails(org, orgdbt, orgus catalog_json={"sources": {}, "nodes": {}}, generated_at=timezone.now(), ) - service = DashboardChatIngestionService( + service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) - first_result = service.ingest_org(org) + first_result = service.build_org_vector_context(org) original_ids = set(first_result.upserted_document_ids) assert original_ids @@ -541,7 +544,7 @@ def _raise_on_upsert(org_id, documents, collection_name=None): vector_store.upsert_documents = _raise_on_upsert with pytest.raises(RuntimeError, match="upsert failed"): - service.ingest_org(org) + service.build_org_vector_context(org) remaining_ids = { document.document_id @@ -552,7 +555,7 @@ def _raise_on_upsert(org_id, documents, collection_name=None): assert vector_store.delete_calls == [] -def test_ingest_org_deletes_disabled_source_documents(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_deletes_disabled_source_documents(org, orgdbt, orguser, dashboard): """Disabled source types should be omitted from the target document set.""" OrgAIContext.objects.create( org=org, @@ -572,21 +575,25 @@ def test_ingest_org_deletes_disabled_source_documents(org, orgdbt, orguser, dash catalog_json={"sources": {}, "nodes": {}}, generated_at=timezone.now(), ) - initial_service = DashboardChatIngestionService( + initial_service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) - initial_service.ingest_org(org) + initial_service.build_org_vector_context(org) - disabled_source_service = DashboardChatIngestionService( + disabled_source_service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), source_config=DashboardChatSourceConfig( - enabled_source_types=("dashboard_context", "dashboard_export", "dbt_manifest") + enabled_source_types=( + DashboardChatSourceType.DASHBOARD_CONTEXT, + DashboardChatSourceType.DASHBOARD_EXPORT, + DashboardChatSourceType.DBT_MANIFEST, + ) ), ) - result = disabled_source_service.ingest_org(org) + result = disabled_source_service.build_org_vector_context(org) stored_source_types = { document.metadata["source_type"] for document in vector_store.get_documents(org.id, include_documents=False) @@ -598,19 +605,23 @@ def test_ingest_org_deletes_disabled_source_documents(org, orgdbt, orguser, dash assert "dbt_catalog" not in stored_source_types -def test_ingest_org_skips_dbt_docs_when_dbt_sources_are_disabled(org, orgdbt, dashboard): +def test_build_org_vector_context_skips_dbt_docs_when_dbt_sources_are_disabled(org, orgdbt, dashboard): """Disabling both dbt sources should skip dbt docs generation entirely.""" vector_store = FakeDashboardChatVectorStore() dbt_docs_generator = Mock(side_effect=AssertionError("dbt docs should not run")) - service = DashboardChatIngestionService( + service = DashboardChatVectorBuildService( vector_store=vector_store, dbt_docs_generator=dbt_docs_generator, source_config=DashboardChatSourceConfig( - enabled_source_types=("org_context", "dashboard_context", "dashboard_export") + enabled_source_types=( + DashboardChatSourceType.ORG_CONTEXT, + DashboardChatSourceType.DASHBOARD_CONTEXT, + DashboardChatSourceType.DASHBOARD_EXPORT, + ) ), ) - result = service.ingest_org(org) + result = service.build_org_vector_context(org) dbt_docs_generator.assert_not_called() assert result.docs_generated_at is None diff --git a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py index 4676d1308..6b8a82991 100644 --- a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py +++ b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py @@ -57,3 +57,26 @@ def test_quote_bigquery_table_ref_requires_project_id(): DashboardChatWarehouseToolsError, match="BigQuery project id not configured" ): tools._quote_bigquery_table_ref("analytics", "program_reach") + + +def test_quote_bigquery_table_ref_rejects_unsafe_identifier_components(): + """BigQuery table refs should reject unsafe project/schema/table identifier text.""" + with patch( + "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + return_value={"project_id": "analytics-project"}, + ): + tools = _build_bigquery_tools() + with pytest.raises( + DashboardChatWarehouseToolsError, + match="Invalid table name", + ): + tools._quote_bigquery_table_ref("analytics", "program`reach") + + +def test_parse_table_name_rejects_embedded_dots_and_backticks(): + """Schema-qualified table names must normalize safely before warehouse access.""" + with pytest.raises( + DashboardChatWarehouseToolsError, + match="Invalid table name", + ): + DashboardChatWarehouseTools._parse_table_name("analytics.program.reach") diff --git a/ddpui/tests/websockets/test_dashboard_chat_consumer.py b/ddpui/tests/websockets/test_dashboard_chat_consumer.py index fe9508522..55c22ab79 100644 --- a/ddpui/tests/websockets/test_dashboard_chat_consumer.py +++ b/ddpui/tests/websockets/test_dashboard_chat_consumer.py @@ -13,8 +13,8 @@ def test_dashboard_chat_consumer_send_message_requires_message(): consumer.websocket_receive({"text": json.dumps({"action": "send_message"})}) payload = json.loads(consumer.send.call_args.kwargs["text_data"]) - assert payload["event_type"] == "error" - assert payload["data"]["message"] == "Message is required" + assert payload["status"] == "error" + assert payload["message"] == "Message is required" def test_dashboard_chat_consumer_send_message_requires_available_chat(): @@ -34,8 +34,8 @@ def test_dashboard_chat_consumer_send_message_requires_available_chat(): ) payload = json.loads(consumer.send.call_args.kwargs["text_data"]) - assert payload["event_type"] == "error" - assert payload["data"]["message"] == "Chat unavailable" + assert payload["status"] == "error" + assert payload["message"] == "Chat unavailable" @patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") @@ -82,8 +82,10 @@ def test_dashboard_chat_consumer_send_message_creates_session_and_runs_inline( first_payload = json.loads(consumer.send.call_args_list[0].kwargs["text_data"]) second_payload = json.loads(consumer.send.call_args_list[1].kwargs["text_data"]) - assert first_payload["event_type"] == "progress" - assert second_payload["event_type"] == "assistant_message" + assert first_payload["status"] == "success" + assert first_payload["data"]["event_type"] == "progress" + assert second_payload["status"] == "success" + assert second_payload["data"]["event_type"] == "assistant_message" @patch( @@ -125,8 +127,8 @@ def test_dashboard_chat_consumer_send_message_returns_error_when_inline_turn_fai consumer._subscribe_to_session.assert_called_once_with("session-123") payload = json.loads(consumer.send.call_args_list[-1].kwargs["text_data"]) - assert payload["event_type"] == "error" - assert payload["data"]["message"] == "Something went wrong while generating the response" + assert payload["status"] == "error" + assert payload["message"] == "Something went wrong while generating the response" @patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") @@ -150,6 +152,7 @@ def test_dashboard_chat_consumer_reuses_existing_turn_without_running_duplicate_ consumer = DashboardChatConsumer() consumer.dashboard = Mock(id=42) consumer.orguser = Mock() + consumer.send = Mock() consumer._chat_available = Mock(return_value=(True, "")) consumer._subscribe_to_session = Mock() @@ -167,4 +170,5 @@ def test_dashboard_chat_consumer_reuses_existing_turn_without_running_duplicate_ consumer._subscribe_to_session.assert_called_once_with("session-123") payload = json.loads(consumer.send.call_args.kwargs["text_data"]) - assert payload["event_type"] == "assistant_message" + assert payload["status"] == "success" + assert payload["data"]["event_type"] == "assistant_message" diff --git a/ddpui/utils/openai_client.py b/ddpui/utils/openai_client.py new file mode 100644 index 000000000..95096013c --- /dev/null +++ b/ddpui/utils/openai_client.py @@ -0,0 +1,22 @@ +"""Shared OpenAI client helpers.""" + +from functools import lru_cache + +from openai import OpenAI + + +@lru_cache(maxsize=16) +def get_shared_openai_client( + api_key: str, + *, + timeout_seconds: float | None = None, + max_retries: int = 0, +) -> OpenAI: + """Return a shared OpenAI client for one api-key/timeout/retry tuple.""" + client_kwargs = { + "api_key": api_key, + "max_retries": max_retries, + } + if timeout_seconds is not None: + client_kwargs["timeout"] = timeout_seconds + return OpenAI(**client_kwargs) diff --git a/ddpui/utils/vector/__init__.py b/ddpui/utils/vector/__init__.py new file mode 100644 index 000000000..00cf8100d --- /dev/null +++ b/ddpui/utils/vector/__init__.py @@ -0,0 +1 @@ +"""Generic vector-database utilities.""" diff --git a/ddpui/utils/vector/chroma/__init__.py b/ddpui/utils/vector/chroma/__init__.py new file mode 100644 index 000000000..605abdc2c --- /dev/null +++ b/ddpui/utils/vector/chroma/__init__.py @@ -0,0 +1,12 @@ +"""Chroma helpers shared across Dalgo.""" + +from ddpui.utils.vector.chroma.client import get_shared_chroma_http_client +from ddpui.utils.vector.chroma.store import ChromaHttpVectorStore +from ddpui.utils.vector.chroma.types import ChromaQueryResult, ChromaStoredDocument + +__all__ = [ + "ChromaHttpVectorStore", + "ChromaQueryResult", + "ChromaStoredDocument", + "get_shared_chroma_http_client", +] diff --git a/ddpui/utils/vector/chroma/client.py b/ddpui/utils/vector/chroma/client.py new file mode 100644 index 000000000..5741de917 --- /dev/null +++ b/ddpui/utils/vector/chroma/client.py @@ -0,0 +1,11 @@ +"""Shared Chroma HTTP client helpers.""" + +from functools import lru_cache + +from chromadb import ClientAPI, HttpClient + + +@lru_cache(maxsize=8) +def get_shared_chroma_http_client(host: str, port: int, ssl: bool) -> ClientAPI: + """Return a shared Chroma HTTP client for one host/port/ssl tuple.""" + return HttpClient(host=host, port=port, ssl=ssl) diff --git a/ddpui/utils/vector/chroma/store.py b/ddpui/utils/vector/chroma/store.py new file mode 100644 index 000000000..55c9e95d7 --- /dev/null +++ b/ddpui/utils/vector/chroma/store.py @@ -0,0 +1,207 @@ +"""Generic Chroma wrapper that only knows how to talk to the Chroma server.""" + +from typing import Any + +from chromadb import ClientAPI +from chromadb.errors import InvalidCollectionException + +from ddpui.utils.vector.chroma.client import get_shared_chroma_http_client +from ddpui.utils.vector.chroma.types import ChromaQueryResult, ChromaStoredDocument + + +class ChromaHttpVectorStore: + """Thin generic wrapper around the Chroma HTTP client.""" + + def __init__( + self, + *, + host: str = "localhost", + port: int = 8000, + ssl: bool = False, + client: ClientAPI | None = None, + ): + self.client = client or get_shared_chroma_http_client(host, port, ssl) + + def create_collection( + self, + name: str, + *, + metadata: dict[str, Any] | None = None, + ) -> Any: + """Create or load a Chroma collection.""" + return self.client.get_or_create_collection(name=name, metadata=metadata) + + def load_collection(self, name: str) -> Any | None: + """Load one existing Chroma collection by name.""" + try: + return self.client.get_collection(name=name) + except (InvalidCollectionException, ValueError): + return None + + def delete_collection(self, name: str) -> bool: + """Delete one Chroma collection if it exists.""" + if self.load_collection(name) is None: + return False + self.client.delete_collection(name=name) + return True + + def list_collection_names(self) -> list[str]: + """Return all collection names available to this Chroma client.""" + raw_collections = self.client.list_collections() + collection_names: list[str] = [] + for collection in raw_collections: + if isinstance(collection, str): + collection_names.append(collection) + continue + name = getattr(collection, "name", None) + if name: + collection_names.append(str(name)) + return collection_names + + def get_documents( + self, + collection_name: str, + *, + where: dict[str, Any] | None = None, + include_documents: bool = False, + ) -> list[ChromaStoredDocument]: + """Read documents from one collection using an optional metadata filter.""" + collection = self.load_collection(collection_name) + if collection is None: + return [] + + include = ["metadatas"] + if include_documents: + include.append("documents") + result = collection.get(where=where, include=include) + return self._parse_chroma_get_response( + result, + include_documents=include_documents, + ) + + def delete_documents( + self, + collection_name: str, + *, + ids: list[str] | None = None, + where: dict[str, Any] | None = None, + ) -> int: + """Delete documents from one collection by ids and/or metadata filter.""" + collection = self.load_collection(collection_name) + if collection is None: + return 0 + + if ids is None and where is None: + return 0 + + deleted_count = ( + len(ids) + if ids is not None + else len( + self.get_documents( + collection_name, + where=where, + include_documents=False, + ) + ) + ) + collection.delete(ids=ids, where=where) + return deleted_count + + def upsert_documents( + self, + collection_name: str, + *, + ids: list[str], + documents: list[str], + metadatas: list[dict[str, Any]], + embeddings: list[list[float]], + collection_metadata: dict[str, Any] | None = None, + ) -> list[str]: + """Upsert documents into one collection.""" + if not ids: + return [] + + collection = self.create_collection( + collection_name, + metadata=collection_metadata, + ) + collection.upsert( + ids=ids, + documents=documents, + metadatas=metadatas, + embeddings=embeddings, + ) + return ids + + def query( + self, + collection_name: str, + *, + query_embedding: list[float], + n_results: int = 5, + where: dict[str, Any] | None = None, + ) -> list[ChromaQueryResult]: + """Query one collection using a precomputed embedding and optional metadata filter.""" + collection = self.load_collection(collection_name) + if collection is None: + return [] + + result = collection.query( + query_embeddings=[query_embedding], + n_results=n_results, + where=where, + include=["documents", "metadatas", "distances"], + ) + return self._parse_chroma_query_response(result) + + @staticmethod + def _parse_chroma_query_response( + result: dict[str, Any], + ) -> list[ChromaQueryResult]: + """Parse Chroma's nested query result shape into flat typed rows.""" + ids = result.get("ids", [[]]) + documents = result.get("documents", [[]]) + metadatas = result.get("metadatas", [[]]) + distances = result.get("distances", [[]]) + + parsed_results: list[ChromaQueryResult] = [] + for document_id, content, metadata, distance in zip( + ids[0] if ids else [], + documents[0] if documents else [], + metadatas[0] if metadatas else [], + distances[0] if distances else [], + ): + parsed_results.append( + ChromaQueryResult( + document_id=document_id, + content=content, + metadata=metadata, + distance=distance, + ) + ) + return parsed_results + + @staticmethod + def _parse_chroma_get_response( + result: dict[str, Any], + *, + include_documents: bool = False, + ) -> list[ChromaStoredDocument]: + """Parse Chroma's get result into typed stored-document rows.""" + ids = result.get("ids", []) + metadatas = result.get("metadatas", []) + documents = result.get("documents", []) if include_documents else [] + + parsed_results: list[ChromaStoredDocument] = [] + for index, document_id in enumerate(ids): + parsed_results.append( + ChromaStoredDocument( + document_id=document_id, + metadata=metadatas[index] if index < len(metadatas) else {}, + content=documents[index] + if include_documents and index < len(documents) + else None, + ) + ) + return parsed_results diff --git a/ddpui/utils/vector/chroma/types.py b/ddpui/utils/vector/chroma/types.py new file mode 100644 index 000000000..4759f9c70 --- /dev/null +++ b/ddpui/utils/vector/chroma/types.py @@ -0,0 +1,23 @@ +"""Generic Chroma result objects shared across Dalgo.""" + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class ChromaQueryResult: + """Single query result returned from Chroma.""" + + document_id: str + content: str + metadata: dict[str, Any] + distance: float | None = None + + +@dataclass(frozen=True) +class ChromaStoredDocument: + """Stored document metadata returned from Chroma collection reads.""" + + document_id: str + metadata: dict[str, Any] + content: str | None = None diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 717a592f3..f88fd6408 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -3,6 +3,7 @@ from asgiref.sync import async_to_sync +from ddpui.celeryworkers.tasks import execute_dashboard_chat_turn from ddpui.core.dashboard_chat.events import ( build_dashboard_chat_event, dashboard_chat_group_name, @@ -20,6 +21,7 @@ from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.feature_flags import get_all_feature_flags_for_org from ddpui.websockets import BaseConsumer +from ddpui.websockets.schemas import WebsocketResponse, WebsocketResponseStatus logger = CustomLogger("ddpui") @@ -50,21 +52,45 @@ def websocket_receive(self, message): try: payload = json.loads(message["text"]) except (KeyError, ValueError): - self._respond_error("Invalid websocket payload") + self.respond( + WebsocketResponse( + data={}, + message="Invalid websocket payload", + status=WebsocketResponseStatus.ERROR, + ) + ) return if payload.get("action") != "send_message": - self._respond_error("Unsupported websocket action") + self.respond( + WebsocketResponse( + data={}, + message="Unsupported websocket action", + status=WebsocketResponseStatus.ERROR, + ) + ) return raw_message = str(payload.get("message") or "").strip() if not raw_message: - self._respond_error("Message is required") + self.respond( + WebsocketResponse( + data={}, + message="Message is required", + status=WebsocketResponseStatus.ERROR, + ) + ) return available, unavailable_message = self._chat_available() if not available: - self._respond_error(unavailable_message) + self.respond( + WebsocketResponse( + data={}, + message=unavailable_message, + status=WebsocketResponseStatus.ERROR, + ) + ) return try: @@ -74,7 +100,13 @@ def websocket_receive(self, message): session_id=payload.get("session_id"), ) except DashboardChatSessionError as error: - self._respond_error(str(error)) + self.respond( + WebsocketResponse( + data={}, + message=str(error), + status=WebsocketResponseStatus.ERROR, + ) + ) return user_message_result = create_dashboard_chat_user_message_with_status( @@ -91,61 +123,95 @@ def websocket_receive(self, message): user_message=user_message, ) if assistant_message is not None: - self._send_event( - build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), - ), + self.respond( + WebsocketResponse( + data=build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ), + message="", + status=WebsocketResponseStatus.SUCCESS, + ) ) return - self._send_event( - build_dashboard_chat_event( - event_type="progress", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(user_message.id), - data={"label": "thinking"}, + self.respond( + WebsocketResponse( + data=build_dashboard_chat_event( + event_type="progress", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(user_message.id), + data={"label": "thinking"}, + ), + message="", + status=WebsocketResponseStatus.SUCCESS, ) ) try: - from ddpui.celeryworkers.tasks import execute_dashboard_chat_turn - result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) except Exception: logger.exception( "dashboard chat turn failed inline for session=%s", session.session_id, ) - self._respond_error("Something went wrong while generating the response") + self.respond( + WebsocketResponse( + data={}, + message="Something went wrong while generating the response", + status=WebsocketResponseStatus.ERROR, + ) + ) return assistant_message = result.get("assistant_message") if result["status"] in {"completed", "skipped_existing_reply"} and assistant_message is not None: - self._send_event( - build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), + self.respond( + WebsocketResponse( + data=build_dashboard_chat_event( + event_type="assistant_message", + session_id=str(session.session_id), + dashboard_id=self.dashboard.id, + message_id=str(assistant_message.id), + data=serialize_dashboard_chat_message(assistant_message), + ), + message="", + status=WebsocketResponseStatus.SUCCESS, ) ) return if result["status"] == "skipped_missing_session": - self._respond_error("Chat session could not be found") + self.respond( + WebsocketResponse( + data={}, + message="Chat session could not be found", + status=WebsocketResponseStatus.ERROR, + ) + ) return if result["status"] == "skipped_missing_message": - self._respond_error("Chat message could not be found") + self.respond( + WebsocketResponse( + data={}, + message="Chat message could not be found", + status=WebsocketResponseStatus.ERROR, + ) + ) return - self._respond_error("Something went wrong while generating the response") + self.respond( + WebsocketResponse( + data={}, + message="Something went wrong while generating the response", + status=WebsocketResponseStatus.ERROR, + ) + ) def websocket_disconnect(self, message): """Remove the socket from any joined session groups on disconnect.""" @@ -156,7 +222,13 @@ def websocket_disconnect(self, message): def dashboard_chat_event(self, event): """Forward dashboard chat events from the channel layer to the browser.""" - self.send(text_data=event["event"]) + self.respond( + WebsocketResponse( + data=event["event"], + message="", + status=WebsocketResponseStatus.SUCCESS, + ) + ) def _subscribe_to_session(self, session_id: str) -> None: """Join the session-scoped channel-layer group if not already subscribed.""" @@ -166,22 +238,6 @@ def _subscribe_to_session(self, session_id: str) -> None: async_to_sync(self.channel_layer.group_add)(group_name, self.channel_name) self.joined_session_groups.add(group_name) - def _respond_error(self, message: str) -> None: - """Send one direct websocket error event.""" - self._send_event( - build_dashboard_chat_event( - event_type="error", - dashboard_id=self.dashboard.id if getattr(self, "dashboard", None) else None, - data={"message": message}, - ) - ) - - def _send_event(self, event: dict) -> None: - """Send one websocket event directly to the current socket.""" - self.send( - text_data=json.dumps(event) - ) - def _chat_available(self) -> tuple[bool, str]: """Return whether the current org is ready for dashboard chat.""" feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get( From 137942c311f5ca545dfb6d833e0d870a6abe8b42 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Thu, 26 Mar 2026 11:22:44 +0530 Subject: [PATCH 17/49] refactor(ai-chat): align dashboard chat backend --- ddpui/celeryworkers/tasks.py | 16 +- ddpui/core/dashboard_chat/README.md | 809 ++++++++++++++++++ ddpui/core/dashboard_chat/agents/__init__.py | 0 .../answer_formatting.py} | 2 +- .../{llm_client.py => agents/interface.py} | 2 +- .../openai.py} | 6 +- .../{ => agents}/prompt_store.py | 19 +- ddpui/core/dashboard_chat/config.py | 2 +- ddpui/core/dashboard_chat/context/__init__.py | 0 .../dashboard_chat/{ => context}/allowlist.py | 0 .../dashboard_chat/{ => context}/dbt_docs.py | 0 .../core/dashboard_chat/contracts/__init__.py | 27 + .../dashboard_chat/contracts/conversation.py | 28 + .../core/dashboard_chat/contracts/intents.py | 42 + .../core/dashboard_chat/contracts/response.py | 55 ++ .../dashboard_chat/contracts/retrieval.py | 24 + ddpui/core/dashboard_chat/contracts/sql.py | 14 + ddpui/core/dashboard_chat/graph/definition.py | 45 - .../{graph => orchestration}/__init__.py | 0 .../{graph => orchestration}/bindings.py | 0 .../{graph => orchestration}/conversation.py | 4 +- .../orchestration/definition.py | 85 ++ .../{graph => orchestration}/message_stack.py | 2 +- .../{graph => orchestration}/nodes.py | 22 +- .../{graph => orchestration}/orchestrator.py | 37 +- .../{graph => orchestration}/presentation.py | 25 +- .../{graph => orchestration}/retrieval.py | 6 +- .../session_snapshot.py | 6 +- .../source_identifiers.py | 0 .../{graph => orchestration}/sql_execution.py | 6 +- .../{graph => orchestration}/sql_parsing.py | 6 +- .../{graph => orchestration}/state.py | 9 +- .../{graph => orchestration}/tool_handlers.py | 6 +- .../{graph => orchestration}/tool_loop.py | 28 +- .../tool_specifications.py | 0 ddpui/core/dashboard_chat/prompt_cache.py | 8 - ddpui/core/dashboard_chat/runtime_types.py | 146 ---- .../core/dashboard_chat/sessions/__init__.py | 0 .../{session_cache.py => sessions/cache.py} | 4 +- .../service.py} | 14 +- ddpui/core/dashboard_chat/vector/__init__.py | 0 .../builder.py} | 4 +- .../building.py} | 8 +- .../documents.py} | 0 .../dashboard_chat/{ => vector}/embeddings.py | 0 .../{vector_store.py => vector/store.py} | 4 +- .../core/dashboard_chat/warehouse/__init__.py | 0 .../{ => warehouse}/sql_guard.py | 42 +- .../tools.py} | 2 +- ...hatmessage_response_latency_ms_and_more.py | 22 + ddpui/models/dashboard_chat.py | 15 +- .../core/dashboard_chat/test_llm_client.py | 6 +- .../core/dashboard_chat/test_prompt_store.py | 18 +- .../tests/core/dashboard_chat/test_runtime.py | 143 ++-- .../dashboard_chat/test_session_service.py | 4 +- ddpui/tests/core/dashboard_chat/test_tasks.py | 16 +- .../dashboard_chat/test_vector_building.py | 28 +- .../core/dashboard_chat/test_vector_store.py | 4 +- .../dashboard_chat/test_warehouse_tools.py | 10 +- ddpui/websockets/dashboard_chat_consumer.py | 2 +- 60 files changed, 1380 insertions(+), 453 deletions(-) create mode 100644 ddpui/core/dashboard_chat/README.md create mode 100644 ddpui/core/dashboard_chat/agents/__init__.py rename ddpui/core/dashboard_chat/{llm_answer_formatting.py => agents/answer_formatting.py} (97%) rename ddpui/core/dashboard_chat/{llm_client.py => agents/interface.py} (96%) rename ddpui/core/dashboard_chat/{openai_llm_client.py => agents/openai.py} (98%) rename ddpui/core/dashboard_chat/{ => agents}/prompt_store.py (95%) create mode 100644 ddpui/core/dashboard_chat/context/__init__.py rename ddpui/core/dashboard_chat/{ => context}/allowlist.py (100%) rename ddpui/core/dashboard_chat/{ => context}/dbt_docs.py (100%) create mode 100644 ddpui/core/dashboard_chat/contracts/__init__.py create mode 100644 ddpui/core/dashboard_chat/contracts/conversation.py create mode 100644 ddpui/core/dashboard_chat/contracts/intents.py create mode 100644 ddpui/core/dashboard_chat/contracts/response.py create mode 100644 ddpui/core/dashboard_chat/contracts/retrieval.py create mode 100644 ddpui/core/dashboard_chat/contracts/sql.py delete mode 100644 ddpui/core/dashboard_chat/graph/definition.py rename ddpui/core/dashboard_chat/{graph => orchestration}/__init__.py (100%) rename ddpui/core/dashboard_chat/{graph => orchestration}/bindings.py (100%) rename ddpui/core/dashboard_chat/{graph => orchestration}/conversation.py (98%) create mode 100644 ddpui/core/dashboard_chat/orchestration/definition.py rename ddpui/core/dashboard_chat/{graph => orchestration}/message_stack.py (96%) rename ddpui/core/dashboard_chat/{graph => orchestration}/nodes.py (87%) rename ddpui/core/dashboard_chat/{graph => orchestration}/orchestrator.py (68%) rename ddpui/core/dashboard_chat/{graph => orchestration}/presentation.py (92%) rename ddpui/core/dashboard_chat/{graph => orchestration}/retrieval.py (98%) rename ddpui/core/dashboard_chat/{graph => orchestration}/session_snapshot.py (95%) rename ddpui/core/dashboard_chat/{graph => orchestration}/source_identifiers.py (100%) rename ddpui/core/dashboard_chat/{graph => orchestration}/sql_execution.py (98%) rename ddpui/core/dashboard_chat/{graph => orchestration}/sql_parsing.py (97%) rename ddpui/core/dashboard_chat/{graph => orchestration}/state.py (84%) rename ddpui/core/dashboard_chat/{graph => orchestration}/tool_handlers.py (98%) rename ddpui/core/dashboard_chat/{graph => orchestration}/tool_loop.py (84%) rename ddpui/core/dashboard_chat/{graph => orchestration}/tool_specifications.py (100%) delete mode 100644 ddpui/core/dashboard_chat/prompt_cache.py delete mode 100644 ddpui/core/dashboard_chat/runtime_types.py create mode 100644 ddpui/core/dashboard_chat/sessions/__init__.py rename ddpui/core/dashboard_chat/{session_cache.py => sessions/cache.py} (96%) rename ddpui/core/dashboard_chat/{session_service.py => sessions/service.py} (91%) create mode 100644 ddpui/core/dashboard_chat/vector/__init__.py rename ddpui/core/dashboard_chat/{vector_document_builder.py => vector/builder.py} (99%) rename ddpui/core/dashboard_chat/{vector_building.py => vector/building.py} (95%) rename ddpui/core/dashboard_chat/{vector_documents.py => vector/documents.py} (100%) rename ddpui/core/dashboard_chat/{ => vector}/embeddings.py (100%) rename ddpui/core/dashboard_chat/{vector_store.py => vector/store.py} (98%) create mode 100644 ddpui/core/dashboard_chat/warehouse/__init__.py rename ddpui/core/dashboard_chat/{ => warehouse}/sql_guard.py (81%) rename ddpui/core/dashboard_chat/{warehouse_tools.py => warehouse/tools.py} (99%) create mode 100644 ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index 4f14b0c5b..3971aa5e2 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -75,8 +75,8 @@ ) from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError -from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildService -from ddpui.core.dashboard_chat.graph.orchestrator import get_dashboard_chat_runtime +from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers from ddpui.ddpprefect.prefect_service import ( @@ -103,7 +103,7 @@ build_dashboard_chat_event, publish_dashboard_chat_event, ) -from ddpui.core.dashboard_chat.session_service import ( +from ddpui.core.dashboard_chat.sessions.service import ( create_dashboard_chat_assistant_message, find_dashboard_chat_assistant_reply, list_dashboard_chat_history, @@ -1493,11 +1493,21 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: assistant_payload = { key: value for key, value in response_payload.items() if key != "answer_text" } + timing_breakdown = ( + dict(response_payload.get("metadata") or {}).get("timing_breakdown") or {} + ) assistant_message = create_dashboard_chat_assistant_message( session=session, content=response.answer_text, payload=assistant_payload, + timing_breakdown=timing_breakdown, + ) + response_latency_ms = max( + 0, + int((assistant_message.created_at - user_message.created_at).total_seconds() * 1000), ) + assistant_message.response_latency_ms = response_latency_ms + assistant_message.save(update_fields=["response_latency_ms"]) return { "status": "completed", "session": session, diff --git a/ddpui/core/dashboard_chat/README.md b/ddpui/core/dashboard_chat/README.md new file mode 100644 index 000000000..dff3f7ddd --- /dev/null +++ b/ddpui/core/dashboard_chat/README.md @@ -0,0 +1,809 @@ +# Dashboard Chat + +This package implements "Chat with Dashboards" for one dashboard at a time. + +The feature is intentionally scoped to the current dashboard only: +- the user can ask about this dashboard's charts, datasets, dbt lineage, and attached org/dashboard context +- the assistant can run read-only SQL against the allowlisted warehouse tables behind this dashboard +- the assistant should not jump to other dashboards or search the whole org blindly + +## High-Level Request Flow + +``` ++---------------------+ +| browser chat drawer | ++---------------------+ + | + v ++--------------------------------+ +| DashboardChatConsumer | +| websocket entrypoint | ++--------------------------------+ + | + v ++--------------------------------+ +| get/create session | +| persist user message | ++--------------------------------+ + | + v ++--------------------------------+ +| execute_dashboard_chat_turn | ++--------------------------------+ + | + v ++--------------------------------+ +| DashboardChatRuntime.run | +| - load snapshot | +| - route intent | +| - retrieve / tools / SQL | +| - compose answer | +| - attach citations + metadata | ++--------------------------------+ + | + v ++--------------------------------+ +| persist assistant message | ++--------------------------------+ + | + v ++--------------------------------+ +| websocket response event | ++--------------------------------+ + | + v ++--------------------------------+ +| frontend renders markdown | +| + structured tables | ++--------------------------------+ +``` + +## What Happens For Each User Question + +Every user message is one turn. A turn always creates or reuses a `DashboardChatSession`, stores the user message, runs the LangGraph runtime, stores the assistant reply, and sends the reply back over websocket. + +### 1. Small talk +Examples: +- "hi" +- "thanks" +- "what can you do?" +- "who are you?" + +There are two ways a turn becomes `small_talk`: +- fast path: before LLM classification, the runtime checks the current user message only against `SMALL_TALK_FAST_PATH_PATTERN` (`hi`, `hello`, `hey`, `yo`, `good morning`, `good afternoon`, `good evening`, `thanks`, `thank you`, `what can you do`, `who are you`) +- normal routing: broader conversational prompts that are not in the fast path can still fall through to the intent classifier and come back as `small_talk` + +Flow: +``` ++---------------------+ +| user message | ++---------------------+ + | + v ++---------------------+ +| load_context | ++---------------------+ + | + v ++----------------------------------------------+ +| route_intent | +| - small-talk fast path match? | +| - else run normal intent classification | +| => final route = small_talk | ++----------------------------------------------+ + | + v ++---------------------+ +| handle_small_talk | ++---------------------+ + | + v ++---------------------+ +| finalize | ++---------------------+ +``` + +No retrieval or SQL runs here. + +### 2. Irrelevant question +Examples: +- "show me another dashboard" +- "which dashboard should I use for finance?" + +Flow: +``` ++---------------------+ +| user message | ++---------------------+ + | + v ++---------------------+ +| load_context | ++---------------------+ + | + v ++---------------------+ +| route_intent | +| => irrelevant | ++---------------------+ + | + v ++---------------------+ +| handle_irrelevant | ++---------------------+ + | + v ++---------------------+ +| finalize | ++---------------------+ +``` + +The runtime returns a scope-boundary answer and does not leave the current dashboard. + +### 3. Needs clarification +Examples: +- "is this improving?" +- "show me the data" + +Flow: +``` ++--------------------------+ +| user message | ++--------------------------+ + | + v ++--------------------------+ +| load_context | ++--------------------------+ + | + v ++--------------------------+ +| route_intent | +| => needs_clarification | ++--------------------------+ + | + v ++--------------------------+ +| handle_needs_clarification| ++--------------------------+ + | + v ++--------------------------+ +| finalize | ++--------------------------+ +``` + +The assistant asks for the missing dimension, metric, filter, or timeframe. + +### 4. Context / explanation question +Examples: +- "tell me about this dashboard" +- "what does this chart mean?" +- "what models are behind this chart?" + +Flow: +``` ++--------------------------+ +| user message | ++--------------------------+ + | + v ++--------------------------+ +| load_context | ++--------------------------+ + | + v ++--------------------------+ +| route_intent | +| => query_without_sql | ++--------------------------+ + | + v ++----------------------------------------------+ +| handle_query_without_sql | +| -> tool loop | +| - usually retrieve_docs | +| - may use search_dbt_models | +| - may use get_dbt_model_info | +| -> compose final answer | ++----------------------------------------------+ + | + v ++--------------------------+ +| finalize | ++--------------------------+ +``` + +These questions are usually answered from: +- vectorized dashboard/org/dbt docs +- deterministic dbt index lookups +- chart metadata in the dashboard export + +### 5. Data / SQL question +Examples: +- "give me district wise pass rates" +- "top 5 facilitators in q2" + +Flow: +``` ++--------------------------+ +| user message | ++--------------------------+ + | + v ++--------------------------+ +| load_context | ++--------------------------+ + | + v ++--------------------------+ +| route_intent | +| => query_with_sql | ++--------------------------+ + | + v ++----------------------------------------------+ +| handle_query_with_sql | +| -> tool loop | +| - retrieve_docs | +| - get_schema_snippets | +| - get_distinct_values | +| - run_sql_query | +| -> compose final answer | ++----------------------------------------------+ + | + v ++--------------------------+ +| finalize | ++--------------------------+ +``` + +Important behavior: +- SQL is allowlisted to the current dashboard's lineage +- only read-only SQL is allowed +- text filters should be validated with distinct values first +- the final answer is narrative markdown; structured SQL results are returned separately for table rendering in the UI +- non-fatal cautions are returned in `warnings` when the runtime had to adjust or flag something about the query + +### 6. Follow-up SQL question +Examples: +- "now split by district" +- "same but only for q2" + +Flow: +``` ++-------------------------------------------+ +| previous conversation + new message | ++-------------------------------------------+ + | + v ++--------------------------+ +| load_context | ++--------------------------+ + | + v ++--------------------------+ +| route_intent | +| => follow_up_sql | ++--------------------------+ + | + v ++----------------------------------------------+ +| handle_follow_up_sql | +| -> shorter tool loop | +| -> reuse conversation context + caches | +| -> run updated SQL | +| -> compose final answer | ++----------------------------------------------+ + | + v ++--------------------------+ +| finalize | ++--------------------------+ +``` + +### 7. Follow-up context question +Examples: +- "what does that mean?" +- "explain that chart more" + +Flow: +``` ++-------------------------------------------+ +| previous conversation + new message | ++-------------------------------------------+ + | + v ++--------------------------+ +| load_context | ++--------------------------+ + | + v ++--------------------------+ +| route_intent | +| => follow_up_context | ++--------------------------+ + | + v ++----------------------------------------------+ +| handle_follow_up_context | +| -> shorter tool loop | +| -> reuse prior context | +| -> compose final answer | ++----------------------------------------------+ + | + v ++--------------------------+ +| finalize | ++--------------------------+ +``` + +## LangGraph Shape + +The runtime uses a simple explicit intent graph, not a deeply branching agent graph. + +``` ++-------+ +----------------+ +----------------+ +| START | --> | load_context | --> | route_intent | ++-------+ +----------------+ +----------------+ + | + +--> handle_small_talk ---------+ + +--> handle_irrelevant ---------+ + +--> handle_needs_clarification + + +--> handle_query_with_sql -----+ + +--> handle_query_without_sql --+ + +--> handle_follow_up_sql ------+ + +--> handle_follow_up_context --+ + | + v + +-----------+ + | finalize | + +-----------+ + | + v + +-------+ + | END | + +-------+ +``` + +The important design choice is that all non-trivial routes eventually go through the same answer contract: +- markdown answer text +- citations +- warnings +- optional SQL + SQL results +- metadata for the frontend + +## Warnings + +`warnings` are non-fatal runtime cautions attached to the final response payload. + +They are not the same as hard errors: +- hard errors stop the current path and force the tool loop or the user to correct something +- warnings allow the turn to continue, but record something important about what happened + +Current warning sources include: +- SQL guard adjustments, for example: + - no `LIMIT` was present, so the guard added `LIMIT 200` + - `SELECT *` was used +- tool/runtime exceptions that were caught and surfaced as cautionary context during the turn + +Warnings are persisted with the assistant response and can be shown or inspected later from the payload. + +## Runtime Limits + +Current enforced limits: +- retrieval from Chroma per query: `6` results by default from runtime config +- `retrieve_docs` tool request limit: capped to `20` +- `search_dbt_models` tool request limit: capped to `20` +- `list_tables_by_keyword` tool request limit: capped to `50` +- `get_distinct_values` tool request limit: capped to `200` +- SQL result row limit: `200` +- SQL/context tool-loop turns for new questions: `15` +- SQL/context tool-loop turns for follow-up questions: `6` + +SQL-specific behavior: +- if generated SQL has no `LIMIT`, the SQL guard adds `LIMIT 200` +- if generated SQL asks for more than `200` rows, validation fails + +These limits exist to keep warehouse queries bounded, keep tool loops from wandering indefinitely, and keep the response/UI payloads manageable. + +## Tool Loop Shape + +Inside the SQL/context routes, the model runs an explicit tool loop with a bounded number of turns. + +``` ++------------------------------------+ +| build system + conversation msgs | ++------------------------------------+ + | + v ++------------------------------------+ +| LLM chooses tools | ++------------------------------------+ + | + v ++------------------------------------+ +| retrieve_docs | +| get_schema_snippets | +| search_dbt_models | +| get_dbt_model_info | +| get_distinct_values | +| list_tables_by_keyword | +| check_table_row_count | +| run_sql_query | ++------------------------------------+ + | + v ++------------------------------------+ +| append tool results to messages | ++------------------------------------+ + | + v ++------------------------------------+ +| next step | +| - ask for more tools | +| - finish with answer draft | +| - stop after successful SQL | ++------------------------------------+ +``` + +## Chroma Integration + +There are two layers here on purpose: + +### 1. Generic Chroma transport +Location: +- [`ddpui/utils/vector/chroma/client.py`](../../utils/vector/chroma/client.py) +- [`ddpui/utils/vector/chroma/store.py`](../../utils/vector/chroma/store.py) +- [`ddpui/utils/vector/chroma/types.py`](../../utils/vector/chroma/types.py) + +Responsibilities: +- shared HTTP client creation +- create/load/delete/list collections +- get/query/upsert/delete documents +- normalize Chroma result shapes + +This layer knows how to talk to Chroma, but does not know anything about dashboards, orgs, or dbt business logic. + +### 2. Dashboard-chat vector layer +Location: +- [`vector/store.py`](./vector/store.py) +- [`vector/documents.py`](./vector/documents.py) +- [`vector/builder.py`](./vector/builder.py) +- [`vector/building.py`](./vector/building.py) +- [`vector/embeddings.py`](./vector/embeddings.py) + +Responsibilities: +- build dashboard-chat collection names +- define dashboard-chat document/source types +- build embeddings +- filter retrieval by source type and dashboard id +- build org-scoped collections and rebuild them from app/dbt context + +### What Gets Vectorized + +Current source types: +- `org_context` +- `dashboard_context` +- `dashboard_export` +- `dbt_manifest` +- `dbt_catalog` + +At retrieval time, the runtime can search one or more of these source types depending on the question. + +## Background Vector Refresh + +Vector context is not rebuilt on every user message. It is refreshed in the background. + +Main Celery tasks: +- `schedule_dashboard_chat_context_builds` +- `build_dashboard_chat_context_for_org` + +Periodic schedule: +- every 3 hours via Celery beat + +Flow: +``` ++------------------------------+ +| Celery beat | ++------------------------------+ + | + v ++------------------------------+ +| schedule_dashboard_chat_ | +| context_builds | ++------------------------------+ + | + v ++------------------------------+ +| find eligible orgs | ++------------------------------+ + | + v ++------------------------------+ +| enqueue one build per org | ++------------------------------+ + | + v ++------------------------------+ +| build_dashboard_chat_ | +| context_for_org | ++------------------------------+ + | + v ++------------------------------+ +| acquire Redis lock | +| generate dbt docs if needed | +| build vector documents | +| write versioned collection | +| update vector_last_ingested | +| GC inactive collections | +| release lock | ++------------------------------+ +``` + +Eligibility is based on: +- org has dbt configured +- org has AI data sharing enabled +- org has `AI_DASHBOARD_CHAT` feature flag enabled + +If dbt is not configured: +- vector context is not built for that org +- the background build task skips the org +- live chat is rejected with "Chat with dashboards is not available because dbt is not configured" + +The feature flag and dbt requirement are separate: +- an org can have `AI_DASHBOARD_CHAT` enabled +- but if dbt is missing, chat is still unavailable at runtime + +## Main Runtime Data Sources + +The runtime combines several different kinds of context: + +- dashboard export + - chart metadata, filters, datasets for the current dashboard +- allowlist + - dashboard tables plus upstream dbt lineage tables +- compact dbt index + - deterministic model/column/lineage lookup for allowlisted dbt resources +- vector retrieval + - semantic matching across org/dashboard/dbt docs +- warehouse tools + - deterministic schema inspection, distinct validation, and SQL execution + +These sources are intentionally different: +- vector retrieval is good for fuzzy semantic matching +- the compact dbt index is good for deterministic dbt lookups (ex: upstream models) +- warehouse tools are good for trustworthy data answers + +## Why We Cache + +This feature uses caching for stability and cost control + +### 1. Session snapshot cache +Location: +- `orchestration/session_snapshot.py` +- `sessions/cache.py` + +What is cached: +- dashboard export +- compact dbt index +- allowlist +- schema snippet cache +- validated distinct-value cache + +What is not cached here: +- prior chat turns themselves + +Previous questions and answers are persisted separately as chat messages and are passed back in as conversation history on each new turn. + +TTL: +- 24 hours + +Why this exists: +- a chat session should keep using a stable dashboard context across follow-up turns +- other users or background refreshes should not change the dashboard/dbt context underneath an active conversation +- schema lookups and distinct-value validations should carry across follow-ups instead of starting cold every turn +- the runtime should not have to rebuild the same dashboard export, allowlist, and compact dbt index on every user message + +This is not LangGraph checkpoint persistence. It is app-level session context freezing. + +### 2. Shared process-level clients +Location: +- `orchestration/orchestrator.py` +- `ddpui/utils/vector/chroma/client.py` + +What is reused: +- shared dashboard-chat runtime +- shared Chroma HTTP client +- shared OpenAI clients inside their wrappers + +Why this exists: +- avoid rebuilding heavy clients/graph objects for every request +- reduce connection churn + +### 3. Prompt handling + +Prompt lookup works like this: +- read the prompt row from the DB if present +- otherwise fall back to the built-in default in `agents/prompt_store.py` + +## DB-Backed Logging / Trace + +The main trace for this feature lives in Postgres, not just in application logs. + +Primary tables: +- `dashboard_chat_session` +- `dashboard_chat_message` + +Persisted trace fields: + +- `dashboard_chat_session` + - fields: `org`, `orguser`, `dashboard`, `session_id`, `vector_collection_name` + - gives us: who the conversation belongs to, which dashboard it is scoped to, and which vector collection was pinned for that session +- `dashboard_chat_message` (`user`) + - fields: `content`, `client_message_id`, `created_at` + - gives us: what the user asked and when +- `dashboard_chat_message` (`assistant`) + - fields: `content`, `payload`, `created_at` + - gives us: the final answer plus structured trace data +- `dashboard_chat_message` (`assistant`) + - field: `response_latency_ms` + - gives us: end-to-end latency from persisted user message to persisted assistant reply +- `dashboard_chat_message` (`assistant`) + - field: `timing_breakdown` + - gives us: explicit backend step timings for retracing slow turns later + +Assistant `payload` includes: +- intent +- citations +- warnings +- SQL +- SQL results +- usage +- tool call summaries +- response metadata + +`timing_breakdown` currently captures: +- `graph_nodes_ms` + - timings for graph steps like `load_context`, `route_intent`, the route handler node, and `finalize` +- `tool_loop_ms` + - total time spent inside the tool loop for routed question turns +- `tool_calls_ms` + - per-tool-call durations +- `runtime_total_ms` + - total backend runtime time for the turn + +This gives us a DB-backed turn trace for questions like: +- which user in which org asked what? +- which session/dashboard was it in? +- what tools ran? +- what SQL ran? +- how long did the response take? +- which part of the backend was slow? + +This is the main place to look for later analysis of answer quality, latency, and flow behavior. + +There are still normal backend logs as well, but those are secondary compared to the persisted session/message trace. + +## File Guide + +This is the quickest way to navigate the package. + +### Root +- [`config.py`](./config.py) + - environment-driven runtime, vector, and source configuration +- [`events.py`](./events.py) + - websocket event helpers / channel group naming + +### `agents/` +- [`interface.py`](./agents/interface.py) + - LLM client protocol used by the runtime +- [`openai.py`](./agents/openai.py) + - OpenAI-backed intent classification, tool-loop, and final-answer composition +- [`answer_formatting.py`](./agents/answer_formatting.py) + - helpers for structured final answer composition +- [`prompt_store.py`](./agents/prompt_store.py) + - DB-backed prompt lookup with built-in defaults + +### `context/` +- [`allowlist.py`](./context/allowlist.py) + - dashboard export -> allowlisted tables/dbt lineage -> compact dbt index +- [`dbt_docs.py`](./context/dbt_docs.py) + - dbt docs generation/loading helpers for manifest/catalog artifacts + +### `contracts/` +- [`conversation.py`](./contracts/conversation.py) + - conversation history and follow-up context contracts +- [`intents.py`](./contracts/intents.py) + - intent enums and routing decisions +- [`response.py`](./contracts/response.py) + - final response, citations, usage, tool-call metadata +- [`retrieval.py`](./contracts/retrieval.py) + - retrieved document contracts +- [`sql.py`](./contracts/sql.py) + - SQL validation and schema snippet contracts + +### `orchestration/` +- [`orchestrator.py`](./orchestration/orchestrator.py) + - runtime entry point and shared runtime getter +- [`definition.py`](./orchestration/definition.py) + - explicit LangGraph wiring +- [`bindings.py`](./orchestration/bindings.py) + - binds split helper modules onto the runtime class +- [`state.py`](./orchestration/state.py) + - graph state shape and lightweight constants +- [`nodes.py`](./orchestration/nodes.py) + - node handlers and route-specific behavior +- [`tool_specifications.py`](./orchestration/tool_specifications.py) + - tool schema exposed to the LLM +- [`tool_loop.py`](./orchestration/tool_loop.py) + - bounded tool-loop execution +- [`tool_handlers.py`](./orchestration/tool_handlers.py) + - implementation of each tool +- [`retrieval.py`](./orchestration/retrieval.py) + - Chroma retrieval + citations +- [`conversation.py`](./orchestration/conversation.py) + - conversation-context extraction and follow-up helpers +- [`session_snapshot.py`](./orchestration/session_snapshot.py) + - session-stable snapshot load/persist logic +- [`sql_parsing.py`](./orchestration/sql_parsing.py) + - SQL parsing helpers used during validation/execution +- [`sql_execution.py`](./orchestration/sql_execution.py) + - safe SQL execution path +- [`presentation.py`](./orchestration/presentation.py) + - response format selection and final answer assembly +- [`message_stack.py`](./orchestration/message_stack.py) + - message-building helpers for the tool loop +- [`source_identifiers.py`](./orchestration/source_identifiers.py) + - parsing helpers for chart/dbt source identifiers + +### `sessions/` +- [`service.py`](./sessions/service.py) + - create/reuse sessions, persist messages, serialize message payloads +- [`cache.py`](./sessions/cache.py) + - cache key + serializer helpers for session snapshots + +### `vector/` +- [`documents.py`](./vector/documents.py) + - vector document dataclasses, source types, collection naming helpers +- [`embeddings.py`](./vector/embeddings.py) + - embedding provider protocol + OpenAI embeddings adapter +- [`store.py`](./vector/store.py) + - dashboard-chat adapter on top of the shared Chroma wrapper +- [`builder.py`](./vector/builder.py) + - build vector documents from org/dashboard/dbt context +- [`building.py`](./vector/building.py) + - end-to-end org vector rebuild workflow and collection GC + +### `warehouse/` +- [`tools.py`](./warehouse/tools.py) + - read-only warehouse helpers for schema, distincts, row counts, SQL execution +- [`sql_guard.py`](./warehouse/sql_guard.py) + - allowlist enforcement and SQL safety checks + +## Websocket + Persistence Integration + +Main files: +- `ddpui/websockets/dashboard_chat_consumer.py` +- `sessions/service.py` +- `ddpui/celeryworkers/tasks.py` + +Important behavior: +- websocket receives `send_message` +- session is validated against the current org/dashboard +- user message is persisted first +- assistant reply is persisted after runtime completion +- duplicate user messages reuse the existing assistant reply when possible +- frontend receives normalized websocket response envelopes, not custom ad hoc payloads + +## Practical Debugging Notes + +If chat is failing, the fastest places to inspect are: +- websocket consumer for auth/session issues +- `execute_dashboard_chat_turn` for persistence/runtime wiring +- `orchestrator/orchestrator.py` for runtime construction +- `orchestration/nodes.py` for route choice and final response creation +- `vector/building.py` if retrieval data is stale or missing +- `warehouse/sql_guard.py` if SQL is being rejected +- `sessions/cache.py` + `orchestration/session_snapshot.py` if follow-ups behave inconsistently diff --git a/ddpui/core/dashboard_chat/agents/__init__.py b/ddpui/core/dashboard_chat/agents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/llm_answer_formatting.py b/ddpui/core/dashboard_chat/agents/answer_formatting.py similarity index 97% rename from ddpui/core/dashboard_chat/llm_answer_formatting.py rename to ddpui/core/dashboard_chat/agents/answer_formatting.py index af41d0d2d..0f9361a52 100644 --- a/ddpui/core/dashboard_chat/llm_answer_formatting.py +++ b/ddpui/core/dashboard_chat/agents/answer_formatting.py @@ -3,7 +3,7 @@ import json from typing import Any -from ddpui.core.dashboard_chat.runtime_types import DashboardChatRetrievedDocument +from ddpui.core.dashboard_chat.contracts import DashboardChatRetrievedDocument TABLE_SUMMARY_JSON_INSTRUCTIONS = """ For table-like responses, return valid JSON only with this shape: diff --git a/ddpui/core/dashboard_chat/llm_client.py b/ddpui/core/dashboard_chat/agents/interface.py similarity index 96% rename from ddpui/core/dashboard_chat/llm_client.py rename to ddpui/core/dashboard_chat/agents/interface.py index d0e4b0494..30a7e3dd0 100644 --- a/ddpui/core/dashboard_chat/llm_client.py +++ b/ddpui/core/dashboard_chat/agents/interface.py @@ -2,7 +2,7 @@ from typing import Any, Protocol -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatIntent, DashboardChatIntentDecision, diff --git a/ddpui/core/dashboard_chat/openai_llm_client.py b/ddpui/core/dashboard_chat/agents/openai.py similarity index 98% rename from ddpui/core/dashboard_chat/openai_llm_client.py rename to ddpui/core/dashboard_chat/agents/openai.py index 23639512f..b283e15ec 100644 --- a/ddpui/core/dashboard_chat/openai_llm_client.py +++ b/ddpui/core/dashboard_chat/agents/openai.py @@ -8,14 +8,14 @@ from openai import OpenAI -from ddpui.core.dashboard_chat.llm_answer_formatting import ( +from ddpui.core.dashboard_chat.agents.answer_formatting import ( TABLE_SUMMARY_JSON_INSTRUCTIONS, build_final_answer_context_payload, format_table_summary_markdown, serialize_final_answer_context_payload, ) -from ddpui.core.dashboard_chat.prompt_store import DashboardChatPromptStore -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.agents.prompt_store import DashboardChatPromptStore +from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatFollowUpContext, DashboardChatIntent, diff --git a/ddpui/core/dashboard_chat/prompt_store.py b/ddpui/core/dashboard_chat/agents/prompt_store.py similarity index 95% rename from ddpui/core/dashboard_chat/prompt_store.py rename to ddpui/core/dashboard_chat/agents/prompt_store.py index 31b6b5e37..5b3b677db 100644 --- a/ddpui/core/dashboard_chat/prompt_store.py +++ b/ddpui/core/dashboard_chat/agents/prompt_store.py @@ -1,11 +1,4 @@ """Database-backed prompt template lookup for dashboard chat.""" - -from django.core.cache import cache - -from ddpui.core.dashboard_chat.prompt_cache import ( - DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS, - build_dashboard_chat_prompt_cache_key, -) from ddpui.models.dashboard_chat import ( DashboardChatPromptTemplate, DashboardChatPromptTemplateKey, @@ -236,28 +229,22 @@ class DashboardChatPromptStore: - """Cached lookup for dashboard chat prompt templates.""" + """Lookup helper for dashboard chat prompt templates.""" def get(self, prompt_key: DashboardChatPromptTemplateKey | str) -> str: - """Return one prompt template from cache, DB, or built-in defaults.""" + """Return one prompt template from the DB or built-in defaults.""" normalized_prompt_key = ( prompt_key.value if isinstance(prompt_key, DashboardChatPromptTemplateKey) else str(prompt_key) ) - cache_key = build_dashboard_chat_prompt_cache_key(normalized_prompt_key) - cached_prompt = cache.get(cache_key) - if cached_prompt is not None: - return cached_prompt stored_prompt = ( DashboardChatPromptTemplate.objects.filter(key=normalized_prompt_key) .values_list("prompt", flat=True) .first() ) - prompt = ( + return ( stored_prompt or DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey(normalized_prompt_key)] ) - cache.set(cache_key, prompt, DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS) - return prompt diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 43054efe2..6064424d0 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -4,7 +4,7 @@ from dataclasses import dataclass import os -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType def _parse_bool(value: str | None, default: bool) -> bool: diff --git a/ddpui/core/dashboard_chat/context/__init__.py b/ddpui/core/dashboard_chat/context/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/allowlist.py b/ddpui/core/dashboard_chat/context/allowlist.py similarity index 100% rename from ddpui/core/dashboard_chat/allowlist.py rename to ddpui/core/dashboard_chat/context/allowlist.py diff --git a/ddpui/core/dashboard_chat/dbt_docs.py b/ddpui/core/dashboard_chat/context/dbt_docs.py similarity index 100% rename from ddpui/core/dashboard_chat/dbt_docs.py rename to ddpui/core/dashboard_chat/context/dbt_docs.py diff --git a/ddpui/core/dashboard_chat/contracts/__init__.py b/ddpui/core/dashboard_chat/contracts/__init__.py new file mode 100644 index 000000000..fb1a3ad4f --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/__init__.py @@ -0,0 +1,27 @@ +"""Typed contracts for dashboard chat orchestration.""" + +from .conversation import ( + DashboardChatConversationContext, + DashboardChatConversationMessage, +) +from .intents import ( + DashboardChatFollowUpContext, + DashboardChatIntent, + DashboardChatIntentDecision, +) +from .response import DashboardChatCitation, DashboardChatResponse +from .retrieval import DashboardChatRetrievedDocument, DashboardChatSchemaSnippet +from .sql import DashboardChatSqlValidationResult + +__all__ = [ + "DashboardChatCitation", + "DashboardChatConversationContext", + "DashboardChatConversationMessage", + "DashboardChatFollowUpContext", + "DashboardChatIntent", + "DashboardChatIntentDecision", + "DashboardChatResponse", + "DashboardChatRetrievedDocument", + "DashboardChatSchemaSnippet", + "DashboardChatSqlValidationResult", +] diff --git a/ddpui/core/dashboard_chat/contracts/conversation.py b/ddpui/core/dashboard_chat/contracts/conversation.py new file mode 100644 index 000000000..4af710032 --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/conversation.py @@ -0,0 +1,28 @@ +"""Conversation-related dashboard chat contracts.""" + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass(frozen=True) +class DashboardChatConversationMessage: + """Single prior conversation message.""" + + role: str + content: str + payload: dict[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True) +class DashboardChatConversationContext: + """Reusable context extracted from prior assistant turns.""" + + last_sql_query: str | None = None + last_tables_used: list[str] = field(default_factory=list) + last_chart_ids: list[str] = field(default_factory=list) + last_metrics: list[str] = field(default_factory=list) + last_dimensions: list[str] = field(default_factory=list) + last_filters: list[str] = field(default_factory=list) + last_response_type: str | None = None + last_answer_text: str | None = None + last_intent: str | None = None diff --git a/ddpui/core/dashboard_chat/contracts/intents.py b/ddpui/core/dashboard_chat/contracts/intents.py new file mode 100644 index 000000000..cd93e2fd6 --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/intents.py @@ -0,0 +1,42 @@ +"""Intent-routing dashboard chat contracts.""" + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class DashboardChatIntent(str, Enum): + """Prototype-aligned top-level intents for dashboard chat.""" + + QUERY_WITH_SQL = "query_with_sql" + QUERY_WITHOUT_SQL = "query_without_sql" + FOLLOW_UP_SQL = "follow_up_sql" + FOLLOW_UP_CONTEXT = "follow_up_context" + NEEDS_CLARIFICATION = "needs_clarification" + SMALL_TALK = "small_talk" + IRRELEVANT = "irrelevant" + + +@dataclass(frozen=True) +class DashboardChatFollowUpContext: + """Prototype-style follow-up metadata returned by the router.""" + + is_follow_up: bool + follow_up_type: str | None = None + reusable_elements: dict[str, Any] = field(default_factory=dict) + modification_instruction: str | None = None + + +@dataclass(frozen=True) +class DashboardChatIntentDecision: + """Intent-routing outcome.""" + + intent: DashboardChatIntent + confidence: float + reason: str + missing_info: list[str] = field(default_factory=list) + force_tool_usage: bool = False + clarification_question: str | None = None + follow_up_context: DashboardChatFollowUpContext = field( + default_factory=lambda: DashboardChatFollowUpContext(is_follow_up=False) + ) diff --git a/ddpui/core/dashboard_chat/contracts/response.py b/ddpui/core/dashboard_chat/contracts/response.py new file mode 100644 index 000000000..477aa0a46 --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/response.py @@ -0,0 +1,55 @@ +"""Response-related dashboard chat contracts.""" + +from dataclasses import asdict, dataclass, field +import json +from typing import Any + +from django.core.serializers.json import DjangoJSONEncoder + +from .intents import DashboardChatIntent + + +@dataclass(frozen=True) +class DashboardChatCitation: + """Citation attached to a chat response.""" + + source_type: str + source_identifier: str + title: str + snippet: str + dashboard_id: int | None = None + table_name: str | None = None + + def to_dict(self) -> dict[str, Any]: + """Return a serializable citation payload.""" + return asdict(self) + + +@dataclass(frozen=True) +class DashboardChatResponse: + """Final runtime response returned by the LangGraph runner.""" + + answer_text: str + intent: DashboardChatIntent + citations: list[DashboardChatCitation] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + sql: str | None = None + sql_results: list[dict[str, Any]] | None = None + usage: dict[str, Any] = field(default_factory=dict) + tool_calls: list[dict[str, Any]] = field(default_factory=list) + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + """Return a serializable payload.""" + payload = { + "answer_text": self.answer_text, + "intent": self.intent.value, + "citations": [citation.to_dict() for citation in self.citations], + "warnings": self.warnings, + "sql": self.sql, + "sql_results": self.sql_results, + "usage": self.usage, + "tool_calls": self.tool_calls, + "metadata": self.metadata, + } + return json.loads(json.dumps(payload, cls=DjangoJSONEncoder)) diff --git a/ddpui/core/dashboard_chat/contracts/retrieval.py b/ddpui/core/dashboard_chat/contracts/retrieval.py new file mode 100644 index 000000000..ea9a07615 --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/retrieval.py @@ -0,0 +1,24 @@ +"""Retrieval-related dashboard chat contracts.""" + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class DashboardChatRetrievedDocument: + """Retrieved document returned from the vector store.""" + + document_id: str + source_type: str + source_identifier: str + content: str + dashboard_id: int | None = None + distance: float | None = None + + +@dataclass(frozen=True) +class DashboardChatSchemaSnippet: + """Schema description for a warehouse table.""" + + table_name: str + columns: list[dict[str, Any]] diff --git a/ddpui/core/dashboard_chat/contracts/sql.py b/ddpui/core/dashboard_chat/contracts/sql.py new file mode 100644 index 000000000..720c3e195 --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/sql.py @@ -0,0 +1,14 @@ +"""SQL-validation dashboard chat contracts.""" + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class DashboardChatSqlValidationResult: + """Outcome of SQL guard validation.""" + + is_valid: bool + sanitized_sql: str | None + tables: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + errors: list[str] = field(default_factory=list) diff --git a/ddpui/core/dashboard_chat/graph/definition.py b/ddpui/core/dashboard_chat/graph/definition.py deleted file mode 100644 index 9c3eef697..000000000 --- a/ddpui/core/dashboard_chat/graph/definition.py +++ /dev/null @@ -1,45 +0,0 @@ -"""Graph definition helpers for dashboard chat orchestration.""" - -from langgraph.graph import END, START, StateGraph - -from .state import DashboardChatRuntimeState - - -def build_dashboard_chat_graph(runtime): - """Build the explicit prototype-aligned intent graph.""" - graph = StateGraph(DashboardChatRuntimeState) - graph.add_node("load_context", runtime._node_load_context) - graph.add_node("route_intent", runtime._node_route_intent) - graph.add_node("handle_small_talk", runtime._node_handle_small_talk) - graph.add_node("handle_irrelevant", runtime._node_handle_irrelevant) - graph.add_node("handle_needs_clarification", runtime._node_handle_needs_clarification) - graph.add_node("handle_query_with_sql", runtime._node_handle_query_with_sql) - graph.add_node("handle_query_without_sql", runtime._node_handle_query_without_sql) - graph.add_node("handle_follow_up_sql", runtime._node_handle_follow_up_sql) - graph.add_node("handle_follow_up_context", runtime._node_handle_follow_up_context) - graph.add_node("finalize", runtime._node_finalize_response) - - graph.add_edge(START, "load_context") - graph.add_edge("load_context", "route_intent") - graph.add_conditional_edges( - "route_intent", - runtime._route_after_intent, - { - "small_talk": "handle_small_talk", - "irrelevant": "handle_irrelevant", - "needs_clarification": "handle_needs_clarification", - "query_with_sql": "handle_query_with_sql", - "query_without_sql": "handle_query_without_sql", - "follow_up_sql": "handle_follow_up_sql", - "follow_up_context": "handle_follow_up_context", - }, - ) - graph.add_edge("handle_small_talk", "finalize") - graph.add_edge("handle_irrelevant", "finalize") - graph.add_edge("handle_needs_clarification", "finalize") - graph.add_edge("handle_query_with_sql", "finalize") - graph.add_edge("handle_query_without_sql", "finalize") - graph.add_edge("handle_follow_up_sql", "finalize") - graph.add_edge("handle_follow_up_context", "finalize") - graph.add_edge("finalize", END) - return graph.compile() diff --git a/ddpui/core/dashboard_chat/graph/__init__.py b/ddpui/core/dashboard_chat/orchestration/__init__.py similarity index 100% rename from ddpui/core/dashboard_chat/graph/__init__.py rename to ddpui/core/dashboard_chat/orchestration/__init__.py diff --git a/ddpui/core/dashboard_chat/graph/bindings.py b/ddpui/core/dashboard_chat/orchestration/bindings.py similarity index 100% rename from ddpui/core/dashboard_chat/graph/bindings.py rename to ddpui/core/dashboard_chat/orchestration/bindings.py diff --git a/ddpui/core/dashboard_chat/graph/conversation.py b/ddpui/core/dashboard_chat/orchestration/conversation.py similarity index 98% rename from ddpui/core/dashboard_chat/graph/conversation.py rename to ddpui/core/dashboard_chat/orchestration/conversation.py index 9fdbb6a5a..2f6b46e8c 100644 --- a/ddpui/core/dashboard_chat/graph/conversation.py +++ b/ddpui/core/dashboard_chat/orchestration/conversation.py @@ -4,11 +4,11 @@ import re from typing import Any -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, ) -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from .source_identifiers import chart_id_from_source_identifier diff --git a/ddpui/core/dashboard_chat/orchestration/definition.py b/ddpui/core/dashboard_chat/orchestration/definition.py new file mode 100644 index 000000000..3dd569820 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/definition.py @@ -0,0 +1,85 @@ +"""Graph definition helpers for dashboard chat orchestration.""" + +from time import perf_counter + +from langgraph.graph import END, START, StateGraph + +from .state import DashboardChatRuntimeState + + +def _timed_node(node_name, handler): + """Wrap one graph node so per-node duration is persisted on state.""" + + def wrapped(state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: + started_at = perf_counter() + next_state = handler(state) + elapsed_ms = round((perf_counter() - started_at) * 1000, 2) + timing_breakdown = dict(next_state.get("timing_breakdown") or {}) + graph_nodes_ms = dict(timing_breakdown.get("graph_nodes_ms") or {}) + graph_nodes_ms[node_name] = elapsed_ms + timing_breakdown["graph_nodes_ms"] = graph_nodes_ms + next_state["timing_breakdown"] = timing_breakdown + return next_state + + return wrapped + + +def build_dashboard_chat_graph(runtime): + """Build the explicit prototype-aligned intent graph.""" + graph = StateGraph(DashboardChatRuntimeState) + graph.add_node("load_context", _timed_node("load_context", runtime._node_load_context)) + graph.add_node("route_intent", _timed_node("route_intent", runtime._node_route_intent)) + graph.add_node( + "handle_small_talk", + _timed_node("handle_small_talk", runtime._node_handle_small_talk), + ) + graph.add_node( + "handle_irrelevant", + _timed_node("handle_irrelevant", runtime._node_handle_irrelevant), + ) + graph.add_node( + "handle_needs_clarification", + _timed_node("handle_needs_clarification", runtime._node_handle_needs_clarification), + ) + graph.add_node( + "handle_query_with_sql", + _timed_node("handle_query_with_sql", runtime._node_handle_query_with_sql), + ) + graph.add_node( + "handle_query_without_sql", + _timed_node("handle_query_without_sql", runtime._node_handle_query_without_sql), + ) + graph.add_node( + "handle_follow_up_sql", + _timed_node("handle_follow_up_sql", runtime._node_handle_follow_up_sql), + ) + graph.add_node( + "handle_follow_up_context", + _timed_node("handle_follow_up_context", runtime._node_handle_follow_up_context), + ) + graph.add_node("finalize", _timed_node("finalize", runtime._node_finalize_response)) + + graph.add_edge(START, "load_context") + graph.add_edge("load_context", "route_intent") + graph.add_conditional_edges( + "route_intent", + runtime._route_after_intent, + { + "small_talk": "handle_small_talk", + "irrelevant": "handle_irrelevant", + "needs_clarification": "handle_needs_clarification", + "query_with_sql": "handle_query_with_sql", + "query_without_sql": "handle_query_without_sql", + "follow_up_sql": "handle_follow_up_sql", + "follow_up_context": "handle_follow_up_context", + }, + ) + graph.add_edge("handle_small_talk", "finalize") + graph.add_edge("handle_irrelevant", "finalize") + graph.add_edge("handle_needs_clarification", "finalize") + graph.add_edge("handle_query_with_sql", "finalize") + graph.add_edge("handle_query_without_sql", "finalize") + graph.add_edge("handle_follow_up_sql", "finalize") + graph.add_edge("handle_follow_up_context", "finalize") + graph.add_edge("finalize", END) + return graph.compile() diff --git a/ddpui/core/dashboard_chat/graph/message_stack.py b/ddpui/core/dashboard_chat/orchestration/message_stack.py similarity index 96% rename from ddpui/core/dashboard_chat/graph/message_stack.py rename to ddpui/core/dashboard_chat/orchestration/message_stack.py index 232b73bc8..a832fb965 100644 --- a/ddpui/core/dashboard_chat/graph/message_stack.py +++ b/ddpui/core/dashboard_chat/orchestration/message_stack.py @@ -3,7 +3,7 @@ from collections.abc import Sequence from typing import Any -from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage +from ddpui.core.dashboard_chat.contracts import DashboardChatConversationMessage from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey from .state import DashboardChatRuntimeState diff --git a/ddpui/core/dashboard_chat/graph/nodes.py b/ddpui/core/dashboard_chat/orchestration/nodes.py similarity index 87% rename from ddpui/core/dashboard_chat/graph/nodes.py rename to ddpui/core/dashboard_chat/orchestration/nodes.py index 04225c8f3..6897ce3db 100644 --- a/ddpui/core/dashboard_chat/graph/nodes.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes.py @@ -1,7 +1,7 @@ """LangGraph node handlers for dashboard chat.""" -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( DashboardChatCitation, DashboardChatIntent, DashboardChatResponse, @@ -153,6 +153,24 @@ def _run_intent_tool_loop( allowlist=allowlist, ) state["tool_calls"] = execution_result["tool_calls"] + existing_timing_breakdown = dict(state.get("timing_breakdown") or {}) + execution_timing_breakdown = dict(execution_result.get("timing_breakdown") or {}) + merged_timing_breakdown = dict(existing_timing_breakdown) + if "graph_nodes_ms" in existing_timing_breakdown or "graph_nodes_ms" in execution_timing_breakdown: + merged_timing_breakdown["graph_nodes_ms"] = { + **dict(existing_timing_breakdown.get("graph_nodes_ms") or {}), + **dict(execution_timing_breakdown.get("graph_nodes_ms") or {}), + } + if "tool_calls_ms" in existing_timing_breakdown or "tool_calls_ms" in execution_timing_breakdown: + merged_timing_breakdown["tool_calls_ms"] = list( + execution_timing_breakdown.get("tool_calls_ms") + or existing_timing_breakdown.get("tool_calls_ms") + or [] + ) + for key, value in execution_timing_breakdown.items(): + if key not in {"graph_nodes_ms", "tool_calls_ms"}: + merged_timing_breakdown[key] = value + state["timing_breakdown"] = merged_timing_breakdown state["sql"] = execution_result["sql"] state["sql_validation"] = execution_result["sql_validation"] state["sql_results"] = execution_result["sql_results"] diff --git a/ddpui/core/dashboard_chat/graph/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py similarity index 68% rename from ddpui/core/dashboard_chat/graph/orchestrator.py rename to ddpui/core/dashboard_chat/orchestration/orchestrator.py index 7651fcea2..74d882924 100644 --- a/ddpui/core/dashboard_chat/graph/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -2,18 +2,20 @@ from collections.abc import Callable, Sequence from functools import lru_cache +from time import perf_counter from typing import Any from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig -from ddpui.core.dashboard_chat.llm_client import DashboardChatLlmClient -from ddpui.core.dashboard_chat.openai_llm_client import OpenAIDashboardChatLlmClient -from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore -from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseTools +from ddpui.core.dashboard_chat.agents.interface import DashboardChatLlmClient +from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient +from ddpui.core.dashboard_chat.contracts import DashboardChatResponse +from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org from .bindings import bind_dashboard_chat_runtime_methods from .definition import build_dashboard_chat_graph -from .state import DashboardChatRuntimeState, GREETING_PATTERN +from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN from .tool_specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS @@ -67,11 +69,32 @@ def run( "vector_collection_name": vector_collection_name, "user_query": user_query, "conversation_history": self._normalize_conversation_history(conversation_history), + "timing_breakdown": { + "graph_nodes_ms": {}, + "tool_calls_ms": [], + }, "warnings": [], "usage": {}, } + runtime_started_at = perf_counter() final_state = self.graph.invoke(initial_state) - return final_state["response"] + runtime_total_ms = round((perf_counter() - runtime_started_at) * 1000, 2) + response = final_state["response"] + timing_breakdown = dict(final_state.get("timing_breakdown") or {}) + timing_breakdown["runtime_total_ms"] = runtime_total_ms + response_metadata = dict(response.metadata) + response_metadata["timing_breakdown"] = timing_breakdown + return DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=response.citations, + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ) @lru_cache(maxsize=1) def get_dashboard_chat_runtime() -> DashboardChatRuntime: @@ -83,6 +106,6 @@ def get_dashboard_chat_runtime() -> DashboardChatRuntime: __all__ = [ "DashboardChatRuntime", "DashboardChatRuntimeState", - "GREETING_PATTERN", + "SMALL_TALK_FAST_PATH_PATTERN", "get_dashboard_chat_runtime", ] diff --git a/ddpui/core/dashboard_chat/graph/presentation.py b/ddpui/core/dashboard_chat/orchestration/presentation.py similarity index 92% rename from ddpui/core/dashboard_chat/graph/presentation.py rename to ddpui/core/dashboard_chat/orchestration/presentation.py index 0e2920d66..a1c0b9a8c 100644 --- a/ddpui/core/dashboard_chat/graph/presentation.py +++ b/ddpui/core/dashboard_chat/orchestration/presentation.py @@ -4,13 +4,13 @@ import logging from typing import Any -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.contracts import ( DashboardChatIntent, DashboardChatIntentDecision, DashboardChatRetrievedDocument, ) -from .state import DashboardChatRuntimeState, GREETING_PATTERN +from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN logger = logging.getLogger(__name__) @@ -36,9 +36,12 @@ def _summarize_tool_call( tool_name: str, args: dict[str, Any], result: dict[str, Any], + duration_ms: float | None = None, ) -> dict[str, Any]: """Persist a compact execution trace for one tool call.""" entry: dict[str, Any] = {"name": tool_name, "args": args} + if duration_ms is not None: + entry["duration_ms"] = duration_ms if tool_name == "retrieve_docs": entry["count"] = result.get("count", 0) entry["doc_ids"] = [doc.get("doc_id") for doc in result.get("docs", [])[:6]] @@ -190,19 +193,29 @@ def _compose_small_talk_response(self, user_query: str) -> str: def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: - """Handle obvious greetings and thanks without an llm round trip.""" - if not GREETING_PATTERN.match(user_query.strip()): + """Handle obvious greetings, thanks, and basic capability prompts without an llm round trip.""" + if not SMALL_TALK_FAST_PATH_PATTERN.match(user_query.strip()): return None return DashboardChatIntentDecision( intent=DashboardChatIntent.SMALL_TALK, confidence=1.0, - reason="Obvious greeting or thanks", + reason="Obvious small-talk fast path", ) def _build_fast_path_small_talk_response(user_query: str) -> str: - """Keep greeting replies instant and deterministic.""" + """Keep basic small-talk replies instant and deterministic.""" normalized_query = user_query.strip().lower() + if "what can you do" in normalized_query: + return ( + "I can explain this dashboard, describe charts and metrics, look up dbt context, " + "and answer data questions with safe read-only SQL against this dashboard's data." + ) + if "who are you" in normalized_query: + return ( + "I'm the dashboard chat assistant for this dashboard. I can explain the charts, " + "data, dbt context, and answer questions about the data behind it." + ) if "thank" in normalized_query: return "You're welcome. Ask me anything about this dashboard or its data." if "good morning" in normalized_query: diff --git a/ddpui/core/dashboard_chat/graph/retrieval.py b/ddpui/core/dashboard_chat/orchestration/retrieval.py similarity index 98% rename from ddpui/core/dashboard_chat/graph/retrieval.py rename to ddpui/core/dashboard_chat/orchestration/retrieval.py index 8ac453911..f6b89a675 100644 --- a/ddpui/core/dashboard_chat/graph/retrieval.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval.py @@ -3,15 +3,15 @@ from collections.abc import Sequence from typing import Any -from ddpui.core.dashboard_chat.allowlist import ( +from ddpui.core.dashboard_chat.context.allowlist import ( DashboardChatAllowlist, build_dashboard_chat_table_name, ) -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.contracts import ( DashboardChatCitation, DashboardChatRetrievedDocument, ) -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType from .source_identifiers import ( chart_id_from_source_identifier, diff --git a/ddpui/core/dashboard_chat/graph/session_snapshot.py b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py similarity index 95% rename from ddpui/core/dashboard_chat/graph/session_snapshot.py rename to ddpui/core/dashboard_chat/orchestration/session_snapshot.py index 43c18c809..4be9b435d 100644 --- a/ddpui/core/dashboard_chat/graph/session_snapshot.py +++ b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py @@ -4,9 +4,9 @@ from django.core.cache import cache -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlistBuilder -from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet -from ddpui.core.dashboard_chat.session_cache import ( +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlistBuilder +from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.sessions.cache import ( DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, build_dashboard_chat_session_snapshot_cache_key, deserialize_allowlist, diff --git a/ddpui/core/dashboard_chat/graph/source_identifiers.py b/ddpui/core/dashboard_chat/orchestration/source_identifiers.py similarity index 100% rename from ddpui/core/dashboard_chat/graph/source_identifiers.py rename to ddpui/core/dashboard_chat/orchestration/source_identifiers.py diff --git a/ddpui/core/dashboard_chat/graph/sql_execution.py b/ddpui/core/dashboard_chat/orchestration/sql_execution.py similarity index 98% rename from ddpui/core/dashboard_chat/graph/sql_execution.py rename to ddpui/core/dashboard_chat/orchestration/sql_execution.py index 421d4ae84..7006800ff 100644 --- a/ddpui/core/dashboard_chat/graph/sql_execution.py +++ b/ddpui/core/dashboard_chat/orchestration/sql_execution.py @@ -7,9 +7,9 @@ from django.core.serializers.json import DjangoJSONEncoder -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.runtime_types import DashboardChatIntent -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from .state import DashboardChatRuntimeState diff --git a/ddpui/core/dashboard_chat/graph/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/sql_parsing.py similarity index 97% rename from ddpui/core/dashboard_chat/graph/sql_parsing.py rename to ddpui/core/dashboard_chat/orchestration/sql_parsing.py index ae86300da..d09d8fe01 100644 --- a/ddpui/core/dashboard_chat/graph/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/sql_parsing.py @@ -4,9 +4,9 @@ import re from typing import Any -from ddpui.core.dashboard_chat.allowlist import normalize_dashboard_chat_table_name -from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.context.allowlist import normalize_dashboard_chat_table_name +from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard def _primary_table_name(sql: str) -> str | None: diff --git a/ddpui/core/dashboard_chat/graph/state.py b/ddpui/core/dashboard_chat/orchestration/state.py similarity index 84% rename from ddpui/core/dashboard_chat/graph/state.py rename to ddpui/core/dashboard_chat/orchestration/state.py index a507ddabb..e81971314 100644 --- a/ddpui/core/dashboard_chat/graph/state.py +++ b/ddpui/core/dashboard_chat/orchestration/state.py @@ -3,8 +3,8 @@ from typing import Any, TypedDict import re -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( DashboardChatCitation, DashboardChatConversationContext, DashboardChatConversationMessage, @@ -16,8 +16,8 @@ ) from ddpui.models.org import Org -GREETING_PATTERN = re.compile( - r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you)\b[\s!.?]*$", +SMALL_TALK_FAST_PATH_PATTERN = re.compile( + r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you|what\s+can\s+you\s+do|who\s+are\s+you)\b[\s!.?]*$", re.IGNORECASE, ) @@ -42,6 +42,7 @@ class DashboardChatRuntimeState(TypedDict, total=False): retrieved_documents: list[DashboardChatRetrievedDocument] citations: list[DashboardChatCitation] tool_calls: list[dict[str, Any]] + timing_breakdown: dict[str, Any] sql: str | None sql_validation: DashboardChatSqlValidationResult | None sql_results: list[dict[str, Any]] | None diff --git a/ddpui/core/dashboard_chat/graph/tool_handlers.py b/ddpui/core/dashboard_chat/orchestration/tool_handlers.py similarity index 98% rename from ddpui/core/dashboard_chat/graph/tool_handlers.py rename to ddpui/core/dashboard_chat/orchestration/tool_handlers.py index 64bb854ad..d82cbb731 100644 --- a/ddpui/core/dashboard_chat/graph/tool_handlers.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_handlers.py @@ -4,9 +4,9 @@ import logging from typing import Any -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseTools +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org from .state import DashboardChatRuntimeState diff --git a/ddpui/core/dashboard_chat/graph/tool_loop.py b/ddpui/core/dashboard_chat/orchestration/tool_loop.py similarity index 84% rename from ddpui/core/dashboard_chat/graph/tool_loop.py rename to ddpui/core/dashboard_chat/orchestration/tool_loop.py index c7bd619cd..7ccf3f14b 100644 --- a/ddpui/core/dashboard_chat/graph/tool_loop.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_loop.py @@ -2,11 +2,12 @@ import json import logging +from time import perf_counter from typing import Any from django.core.serializers.json import DjangoJSONEncoder -from ddpui.core.dashboard_chat.warehouse_tools import DashboardChatWarehouseToolsError +from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseToolsError from .state import DashboardChatRuntimeState @@ -34,7 +35,11 @@ def _execute_tool_loop( "last_sql": None, "last_sql_results": None, "last_sql_validation": None, + "timing_breakdown": { + "tool_calls_ms": list((state.get("timing_breakdown") or {}).get("tool_calls_ms") or []), + }, } + tool_loop_started_at = perf_counter() self._seed_distinct_cache_from_previous_sql(state, execution_context) intent_decision = state["intent_decision"] @@ -80,6 +85,7 @@ def _execute_tool_loop( ), execution_context=execution_context, max_turns_reached=False, + tool_loop_started_at=tool_loop_started_at, ) for tool_call in tool_calls: @@ -90,17 +96,27 @@ def _execute_tool_loop( args = json.loads(raw_args) except json.JSONDecodeError: args = {} + tool_started_at = perf_counter() result = self._execute_tool_call( tool_name=str(tool_call.get("name") or ""), args=args, state=state, execution_context=execution_context, ) + tool_duration_ms = round((perf_counter() - tool_started_at) * 1000, 2) + tool_name = str(tool_call.get("name") or "") + execution_context["timing_breakdown"]["tool_calls_ms"].append( + { + "name": tool_name, + "duration_ms": tool_duration_ms, + } + ) execution_context["tool_calls"].append( self._summarize_tool_call( - tool_name=str(tool_call.get("name") or ""), + tool_name=tool_name, args=args, result=result, + duration_ms=tool_duration_ms, ) ) messages.append( @@ -113,11 +129,12 @@ def _execute_tool_loop( ), } ) - if str(tool_call.get("name") or "") == "run_sql_query" and result.get("success"): + if tool_name == "run_sql_query" and result.get("success"): return self._build_tool_loop_result( answer_text="", execution_context=execution_context, max_turns_reached=False, + tool_loop_started_at=tool_loop_started_at, ) return self._build_tool_loop_result( @@ -127,6 +144,7 @@ def _execute_tool_loop( ), execution_context=execution_context, max_turns_reached=True, + tool_loop_started_at=tool_loop_started_at, ) @@ -173,15 +191,19 @@ def _build_tool_loop_result( answer_text: str, execution_context: dict[str, Any], max_turns_reached: bool, + tool_loop_started_at: float, ) -> dict[str, Any]: """Normalize tool-loop state into one runtime response payload.""" if max_turns_reached: execution_context["tool_calls"].append({"name": "max_turns_reached"}) warnings = list(dict.fromkeys(execution_context["warnings"])) + timing_breakdown = dict(execution_context.get("timing_breakdown") or {}) + timing_breakdown["tool_loop_ms"] = round((perf_counter() - tool_loop_started_at) * 1000, 2) return { "answer_text": answer_text.strip(), "retrieved_documents": execution_context["retrieved_documents"], "tool_calls": execution_context["tool_calls"], + "timing_breakdown": timing_breakdown, "sql": execution_context["last_sql"], "sql_validation": execution_context["last_sql_validation"], "sql_results": execution_context["last_sql_results"], diff --git a/ddpui/core/dashboard_chat/graph/tool_specifications.py b/ddpui/core/dashboard_chat/orchestration/tool_specifications.py similarity index 100% rename from ddpui/core/dashboard_chat/graph/tool_specifications.py rename to ddpui/core/dashboard_chat/orchestration/tool_specifications.py diff --git a/ddpui/core/dashboard_chat/prompt_cache.py b/ddpui/core/dashboard_chat/prompt_cache.py deleted file mode 100644 index b27bdd544..000000000 --- a/ddpui/core/dashboard_chat/prompt_cache.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Cache helpers for dashboard chat prompt templates.""" - -DASHBOARD_CHAT_PROMPT_CACHE_TTL_SECONDS = 24 * 60 * 60 - - -def build_dashboard_chat_prompt_cache_key(prompt_key: str) -> str: - """Return the cache key used for one dashboard chat prompt template.""" - return f"dashboard_chat_prompt:{prompt_key}" diff --git a/ddpui/core/dashboard_chat/runtime_types.py b/ddpui/core/dashboard_chat/runtime_types.py deleted file mode 100644 index 6eec55e2f..000000000 --- a/ddpui/core/dashboard_chat/runtime_types.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Typed runtime contracts for dashboard chat orchestration.""" - -from dataclasses import asdict, dataclass, field -from enum import Enum -import json -from typing import Any - -from django.core.serializers.json import DjangoJSONEncoder - - -class DashboardChatIntent(str, Enum): - """Prototype-aligned top-level intents for dashboard chat.""" - - QUERY_WITH_SQL = "query_with_sql" - QUERY_WITHOUT_SQL = "query_without_sql" - FOLLOW_UP_SQL = "follow_up_sql" - FOLLOW_UP_CONTEXT = "follow_up_context" - NEEDS_CLARIFICATION = "needs_clarification" - SMALL_TALK = "small_talk" - IRRELEVANT = "irrelevant" - - -@dataclass(frozen=True) -class DashboardChatConversationMessage: - """Single prior conversation message.""" - - role: str - content: str - payload: dict[str, Any] = field(default_factory=dict) - - -@dataclass(frozen=True) -class DashboardChatConversationContext: - """Reusable context extracted from prior assistant turns.""" - - last_sql_query: str | None = None - last_tables_used: list[str] = field(default_factory=list) - last_chart_ids: list[str] = field(default_factory=list) - last_metrics: list[str] = field(default_factory=list) - last_dimensions: list[str] = field(default_factory=list) - last_filters: list[str] = field(default_factory=list) - last_response_type: str | None = None - last_answer_text: str | None = None - last_intent: str | None = None - - -@dataclass(frozen=True) -class DashboardChatFollowUpContext: - """Prototype-style follow-up metadata returned by the router.""" - - is_follow_up: bool - follow_up_type: str | None = None - reusable_elements: dict[str, Any] = field(default_factory=dict) - modification_instruction: str | None = None - - -@dataclass(frozen=True) -class DashboardChatIntentDecision: - """Intent-routing outcome.""" - - intent: DashboardChatIntent - confidence: float - reason: str - missing_info: list[str] = field(default_factory=list) - force_tool_usage: bool = False - clarification_question: str | None = None - follow_up_context: DashboardChatFollowUpContext = field( - default_factory=lambda: DashboardChatFollowUpContext(is_follow_up=False) - ) - - -@dataclass(frozen=True) -class DashboardChatRetrievedDocument: - """Retrieved document returned from the vector store.""" - - document_id: str - source_type: str - source_identifier: str - content: str - dashboard_id: int | None = None - distance: float | None = None - - -@dataclass(frozen=True) -class DashboardChatSchemaSnippet: - """Schema description for a warehouse table.""" - - table_name: str - columns: list[dict[str, Any]] - - -@dataclass(frozen=True) -class DashboardChatSqlValidationResult: - """Outcome of SQL guard validation.""" - - is_valid: bool - sanitized_sql: str | None - tables: list[str] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) - errors: list[str] = field(default_factory=list) - - -@dataclass(frozen=True) -class DashboardChatCitation: - """Citation attached to a chat response.""" - - source_type: str - source_identifier: str - title: str - snippet: str - dashboard_id: int | None = None - table_name: str | None = None - - def to_dict(self) -> dict[str, Any]: - """Return a serializable citation payload.""" - return asdict(self) - - -@dataclass(frozen=True) -class DashboardChatResponse: - """Final runtime response returned by the LangGraph runner.""" - - answer_text: str - intent: DashboardChatIntent - citations: list[DashboardChatCitation] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) - sql: str | None = None - sql_results: list[dict[str, Any]] | None = None - usage: dict[str, Any] = field(default_factory=dict) - tool_calls: list[dict[str, Any]] = field(default_factory=list) - metadata: dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict[str, Any]: - """Return a serializable payload.""" - payload = { - "answer_text": self.answer_text, - "intent": self.intent.value, - "citations": [citation.to_dict() for citation in self.citations], - "warnings": self.warnings, - "sql": self.sql, - "sql_results": self.sql_results, - "usage": self.usage, - "tool_calls": self.tool_calls, - "metadata": self.metadata, - } - return json.loads(json.dumps(payload, cls=DjangoJSONEncoder)) diff --git a/ddpui/core/dashboard_chat/sessions/__init__.py b/ddpui/core/dashboard_chat/sessions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/session_cache.py b/ddpui/core/dashboard_chat/sessions/cache.py similarity index 96% rename from ddpui/core/dashboard_chat/session_cache.py rename to ddpui/core/dashboard_chat/sessions/cache.py index d96216b20..ea9300e26 100644 --- a/ddpui/core/dashboard_chat/session_cache.py +++ b/ddpui/core/dashboard_chat/sessions/cache.py @@ -2,8 +2,8 @@ from typing import Any -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS = 24 * 60 * 60 diff --git a/ddpui/core/dashboard_chat/session_service.py b/ddpui/core/dashboard_chat/sessions/service.py similarity index 91% rename from ddpui/core/dashboard_chat/session_service.py rename to ddpui/core/dashboard_chat/sessions/service.py index 3a5807fc3..dac05688a 100644 --- a/ddpui/core/dashboard_chat/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/service.py @@ -9,8 +9,8 @@ from django.utils import timezone from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name -from ddpui.core.dashboard_chat.runtime_types import DashboardChatConversationMessage +from ddpui.core.dashboard_chat.vector.documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.contracts import DashboardChatConversationMessage from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import ( DashboardChatMessage, @@ -110,6 +110,8 @@ def create_dashboard_chat_assistant_message( session: DashboardChatSession, content: str, payload: dict | None, + response_latency_ms: int | None = None, + timing_breakdown: dict | None = None, ) -> DashboardChatMessage: """Persist one assistant message and advance the session timestamp.""" return _create_dashboard_chat_message( @@ -118,6 +120,8 @@ def create_dashboard_chat_assistant_message( content=content, client_message_id=None, payload=payload, + response_latency_ms=response_latency_ms, + timing_breakdown=timing_breakdown, ).message @@ -147,6 +151,8 @@ def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: "role": message.role, "content": message.content, "payload": message.payload or {}, + "response_latency_ms": message.response_latency_ms, + "timing_breakdown": message.timing_breakdown or {}, "created_at": message.created_at.isoformat(), } @@ -174,6 +180,8 @@ def _create_dashboard_chat_message( content: str, client_message_id: str | None, payload: dict | None, + response_latency_ms: int | None = None, + timing_breakdown: dict | None = None, ) -> DashboardChatMessageCreateResult: """Create a session-scoped chat message with a stable next sequence number.""" created = False @@ -204,6 +212,8 @@ def _create_dashboard_chat_message( content=content, client_message_id=client_message_id, payload=payload, + response_latency_ms=response_latency_ms, + timing_breakdown=timing_breakdown, ) created = True except IntegrityError: diff --git a/ddpui/core/dashboard_chat/vector/__init__.py b/ddpui/core/dashboard_chat/vector/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/vector_document_builder.py b/ddpui/core/dashboard_chat/vector/builder.py similarity index 99% rename from ddpui/core/dashboard_chat/vector_document_builder.py rename to ddpui/core/dashboard_chat/vector/builder.py index 7d13dd87b..04a64c29a 100644 --- a/ddpui/core/dashboard_chat/vector_document_builder.py +++ b/ddpui/core/dashboard_chat/vector/builder.py @@ -6,8 +6,8 @@ from django.utils import timezone from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.dbt_docs import DashboardChatDbtDocsArtifacts -from ddpui.core.dashboard_chat.vector_documents import ( +from ddpui.core.dashboard_chat.context.dbt_docs import DashboardChatDbtDocsArtifacts +from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, DashboardChatVectorDocument, ) diff --git a/ddpui/core/dashboard_chat/vector_building.py b/ddpui/core/dashboard_chat/vector/building.py similarity index 95% rename from ddpui/core/dashboard_chat/vector_building.py rename to ddpui/core/dashboard_chat/vector/building.py index 239c41bd6..2f6c8f63e 100644 --- a/ddpui/core/dashboard_chat/vector_building.py +++ b/ddpui/core/dashboard_chat/vector/building.py @@ -6,14 +6,14 @@ from django.utils import timezone -from ddpui.core.dashboard_chat.dbt_docs import ( +from ddpui.core.dashboard_chat.context.dbt_docs import ( DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.vector_document_builder import DashboardChatVectorDocumentBuilder -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.vector.builder import DashboardChatVectorDocumentBuilder +from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore from ddpui.models.dashboard_chat import DashboardChatSession from ddpui.models.org import Org diff --git a/ddpui/core/dashboard_chat/vector_documents.py b/ddpui/core/dashboard_chat/vector/documents.py similarity index 100% rename from ddpui/core/dashboard_chat/vector_documents.py rename to ddpui/core/dashboard_chat/vector/documents.py diff --git a/ddpui/core/dashboard_chat/embeddings.py b/ddpui/core/dashboard_chat/vector/embeddings.py similarity index 100% rename from ddpui/core/dashboard_chat/embeddings.py rename to ddpui/core/dashboard_chat/vector/embeddings.py diff --git a/ddpui/core/dashboard_chat/vector_store.py b/ddpui/core/dashboard_chat/vector/store.py similarity index 98% rename from ddpui/core/dashboard_chat/vector_store.py rename to ddpui/core/dashboard_chat/vector/store.py index 11d76e69b..148fa4139 100644 --- a/ddpui/core/dashboard_chat/vector_store.py +++ b/ddpui/core/dashboard_chat/vector/store.py @@ -5,12 +5,12 @@ from chromadb import ClientAPI -from ddpui.core.dashboard_chat.embeddings import ( +from ddpui.core.dashboard_chat.vector.embeddings import ( DashboardChatEmbeddingProvider, OpenAIEmbeddingProvider, ) from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector_documents import ( +from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, DashboardChatVectorDocument, build_dashboard_chat_collection_base_name, diff --git a/ddpui/core/dashboard_chat/warehouse/__init__.py b/ddpui/core/dashboard_chat/warehouse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/sql_guard.py b/ddpui/core/dashboard_chat/warehouse/sql_guard.py similarity index 81% rename from ddpui/core/dashboard_chat/sql_guard.py rename to ddpui/core/dashboard_chat/warehouse/sql_guard.py index 40fbffc9d..9681b3b96 100644 --- a/ddpui/core/dashboard_chat/sql_guard.py +++ b/ddpui/core/dashboard_chat/warehouse/sql_guard.py @@ -4,8 +4,8 @@ import sqlparse -from ddpui.core.dashboard_chat.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.runtime_types import DashboardChatSqlValidationResult +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import DashboardChatSqlValidationResult FORBIDDEN_SQL_KEYWORDS = { "INTO", @@ -32,13 +32,6 @@ r"\bMAX\s*\(", ) -PII_PATTERNS = [ - r"\b(name|phone|email|address|national_id|id_number)\b", - r"\b(contact|mobile|telephone|personal|identification)\b", - r"\b(firstname|lastname|full_name|participant_name|survivor_name)\b", -] - - class DashboardChatSqlGuard: """Validate SQL before it reaches the warehouse.""" @@ -96,15 +89,6 @@ def validate(self, sql: str) -> DashboardChatSqlValidationResult: if re.search(r"\bSELECT\s+\*", sql_upper): warnings.append("SELECT * detected. Prefer explicit column lists.") - if self._selects_row_level_pii(sanitized_sql): - errors.append( - "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." - ) - - for pii_pattern in PII_PATTERNS: - if re.search(pii_pattern, sanitized_sql, re.IGNORECASE): - warnings.append(f"Query may touch PII-like columns matching {pii_pattern}.") - tables = self._extract_table_names(sanitized_sql) for table_name in tables: if not self.allowlist.is_allowed(table_name): @@ -150,26 +134,6 @@ def _extract_table_names(cls, sql: str) -> list[str]: return list(dict.fromkeys(tables)) - @classmethod - def _selects_row_level_pii(cls, sql: str) -> bool: - """Detect row-level sensitive fields in the outer SELECT list.""" - select_clause = cls._extract_outer_select_clause(sql) - if not select_clause: - return False - - for expression in cls._split_select_expressions(select_clause): - normalized_expression = expression.strip() - if not normalized_expression: - continue - if cls._contains_aggregate(normalized_expression): - continue - if any( - re.search(pii_pattern, normalized_expression, re.IGNORECASE) - for pii_pattern in PII_PATTERNS - ): - return True - return False - @classmethod def _contains_select_into_clause(cls, sql: str) -> bool: """Detect SELECT ... INTO before the outer FROM clause.""" @@ -251,7 +215,7 @@ def _split_select_expressions(select_clause: str) -> list[str]: @staticmethod def _contains_aggregate(expression: str) -> bool: - """Treat aggregate projections as safe even if they mention sensitive columns.""" + """Return whether one SELECT expression uses an aggregate function.""" return any( re.search(pattern, expression, re.IGNORECASE) for pattern in AGGREGATE_FUNCTION_PATTERNS ) diff --git a/ddpui/core/dashboard_chat/warehouse_tools.py b/ddpui/core/dashboard_chat/warehouse/tools.py similarity index 99% rename from ddpui/core/dashboard_chat/warehouse_tools.py rename to ddpui/core/dashboard_chat/warehouse/tools.py index 005aeb41b..abde1bd56 100644 --- a/ddpui/core/dashboard_chat/warehouse_tools.py +++ b/ddpui/core/dashboard_chat/warehouse/tools.py @@ -5,7 +5,7 @@ import re from typing import Any -from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse from ddpui.utils import secretsmanager from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory diff --git a/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py b/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py new file mode 100644 index 000000000..41e42dd09 --- /dev/null +++ b/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2 on 2026-03-26 05:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0157_dashboardchatprompttemplate_final_answer_composition"), + ] + + operations = [ + migrations.AddField( + model_name="dashboardchatmessage", + name="response_latency_ms", + field=models.PositiveIntegerField(blank=True, null=True), + ), + migrations.AddField( + model_name="dashboardchatmessage", + name="timing_breakdown", + field=models.JSONField(blank=True, null=True), + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index cf7265976..f30be157f 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -1,14 +1,12 @@ import uuid from enum import Enum -from django.core.cache import cache from django.db import models from django.utils import timezone from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org from ddpui.models.org_user import OrgUser -from ddpui.core.dashboard_chat.prompt_cache import build_dashboard_chat_prompt_cache_key class DashboardChatMessageRole(str, Enum): @@ -62,17 +60,6 @@ class DashboardChatPromptTemplate(models.Model): class Meta: ordering = ["key"] - def save(self, *args, **kwargs): - """Persist the prompt template and invalidate its runtime cache entry.""" - super().save(*args, **kwargs) - cache.delete(build_dashboard_chat_prompt_cache_key(self.key)) - - def delete(self, *args, **kwargs): - """Delete the prompt template and invalidate its runtime cache entry.""" - cache_key = build_dashboard_chat_prompt_cache_key(self.key) - super().delete(*args, **kwargs) - cache.delete(cache_key) - class OrgAIContext(models.Model): """Organization-level markdown context used by dashboard chat.""" @@ -143,6 +130,8 @@ class DashboardChatMessage(models.Model): content = models.TextField(blank=True, default="") client_message_id = models.CharField(max_length=100, null=True, blank=True) payload = models.JSONField(null=True, blank=True) + response_latency_ms = models.PositiveIntegerField(null=True, blank=True) + timing_breakdown = models.JSONField(null=True, blank=True) created_at = models.DateTimeField(default=timezone.now) class Meta: diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py index bd5ee5c5d..bd6b36ddf 100644 --- a/ddpui/tests/core/dashboard_chat/test_llm_client.py +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -2,9 +2,9 @@ import json -import ddpui.core.dashboard_chat.openai_llm_client as llm_client_module -from ddpui.core.dashboard_chat.openai_llm_client import OpenAIDashboardChatLlmClient -from ddpui.core.dashboard_chat.runtime_types import ( +import ddpui.core.dashboard_chat.agents.openai as llm_client_module +from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient +from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatIntent, DashboardChatIntentDecision, diff --git a/ddpui/tests/core/dashboard_chat/test_prompt_store.py b/ddpui/tests/core/dashboard_chat/test_prompt_store.py index 88c0b6deb..10dcabb9e 100644 --- a/ddpui/tests/core/dashboard_chat/test_prompt_store.py +++ b/ddpui/tests/core/dashboard_chat/test_prompt_store.py @@ -1,9 +1,8 @@ -"""Tests for dashboard chat prompt template storage and caching.""" +"""Tests for dashboard chat prompt template lookup.""" import pytest -from django.core.cache import cache -from ddpui.core.dashboard_chat.prompt_store import ( +from ddpui.core.dashboard_chat.agents.prompt_store import ( DEFAULT_DASHBOARD_CHAT_PROMPTS, DashboardChatPromptStore, ) @@ -15,13 +14,6 @@ pytestmark = pytest.mark.django_db -@pytest.fixture(autouse=True) -def clear_cache(): - cache.clear() - yield - cache.clear() - - def test_prompt_store_returns_default_when_no_db_override_exists(): """Missing prompt rows should fall back to the built-in default prompt text.""" store = DashboardChatPromptStore() @@ -39,8 +31,8 @@ def test_prompt_store_returns_default_when_no_db_override_exists(): ) -def test_prompt_store_uses_db_override_and_invalidates_cache_on_save(): - """Saving a prompt template should invalidate the cached prompt immediately.""" +def test_prompt_store_uses_db_override_after_save(): + """Saving a prompt template should update the prompt returned at runtime.""" prompt_template = DashboardChatPromptTemplate.objects.get( key=DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM, ) @@ -57,7 +49,7 @@ def test_prompt_store_uses_db_override_and_invalidates_cache_on_save(): def test_prompt_store_falls_back_to_default_after_delete(): - """Deleting a prompt template should invalidate the cache and restore the default prompt.""" + """Deleting a prompt template should restore the default prompt text.""" prompt_template = DashboardChatPromptTemplate.objects.get( key=DashboardChatPromptTemplateKey.SMALL_TALK_CAPABILITIES, ) diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 9e7bfa46b..64f2b3f81 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -7,13 +7,13 @@ from django.core.cache import cache from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.allowlist import ( +from ddpui.core.dashboard_chat.context.allowlist import ( DashboardChatAllowlist, DashboardChatAllowlistBuilder, ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig -from ddpui.core.dashboard_chat.graph.orchestrator import DashboardChatRuntime -from ddpui.core.dashboard_chat.runtime_types import ( +from ddpui.core.dashboard_chat.orchestration.orchestrator import DashboardChatRuntime +from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, DashboardChatFollowUpContext, @@ -22,9 +22,9 @@ DashboardChatRetrievedDocument, DashboardChatResponse, ) -from ddpui.core.dashboard_chat.sql_guard import DashboardChatSqlGuard -from ddpui.core.dashboard_chat.vector_documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector_store import DashboardChatVectorQueryResult +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.store import DashboardChatVectorQueryResult from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org from ddpui.models.org_user import OrgUser @@ -243,7 +243,7 @@ def execute_sql(self, sql): @staticmethod def _schema_snippet(table_name, columns): - from ddpui.core.dashboard_chat.runtime_types import DashboardChatSchemaSnippet + from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet return DashboardChatSchemaSnippet(table_name=table_name, columns=columns) @@ -542,48 +542,28 @@ def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): raise AssertionError("Follow-up dimension guard LLM exceeded expected turns") -class PiiToolLoopLlm(PrototypeLlmBase): - """LLM stub that needs a safe failure response after SQL guard rejection.""" +class SmallTalkLlm(PrototypeLlmBase): + """LLM stub for prototype-style small talk.""" def classify_intent(self, *args, **kwargs): return DashboardChatIntentDecision( - intent=DashboardChatIntent.QUERY_WITH_SQL, - confidence=0.9, - reason="Needs data analysis", - force_tool_usage=True, + intent=DashboardChatIntent.SMALL_TALK, + confidence=0.97, + reason="Greeting or pleasantry", ) def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): - if self.turn == 0: - self.turn += 1 - return { - "content": "", - "tool_calls": [ - { - "id": "call-1", - "name": "run_sql_query", - "args": {"sql": "SELECT email FROM analytics.program_reach LIMIT 25"}, - } - ], - } - - tool_messages = [message for message in messages if message["role"] == "tool"] - assert any("aggregate the results or rephrase" in message["content"] for message in tool_messages) - return { - "content": "I couldn't answer that safely. Please aggregate the results or rephrase.", - "tool_calls": [], - } + raise AssertionError("Small talk should not enter the tool loop") -class SmallTalkLlm(PrototypeLlmBase): - """LLM stub for prototype-style small talk.""" +class FastPathOnlySmallTalkLlm(PrototypeLlmBase): + """LLM stub that fails if the runtime does not short-circuit obvious small talk.""" def classify_intent(self, *args, **kwargs): - return DashboardChatIntentDecision( - intent=DashboardChatIntent.SMALL_TALK, - confidence=0.97, - reason="Greeting or pleasantry", - ) + raise AssertionError("Fast-path small talk should skip LLM classification") + + def compose_small_talk(self, user_query): + raise AssertionError("Fast-path small talk should use deterministic response") def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): raise AssertionError("Small talk should not enter the tool loop") @@ -1144,6 +1124,48 @@ def test_small_talk_turn_returns_without_citations(primary_dashboard): assert response.citations == [] assert response.warnings == [] assert response.metadata["allowlisted_tables"] == ["analytics.program_reach"] + assert response.metadata["timing_breakdown"]["runtime_total_ms"] >= 0 + assert "load_context" in response.metadata["timing_breakdown"]["graph_nodes_ms"] + assert "route_intent" in response.metadata["timing_breakdown"]["graph_nodes_ms"] + assert "handle_small_talk" in response.metadata["timing_breakdown"]["graph_nodes_ms"] + assert "finalize" in response.metadata["timing_breakdown"]["graph_nodes_ms"] + + +@pytest.mark.parametrize( + ("user_query", "expected_text"), + [ + ( + "what can you do?", + "I can explain this dashboard", + ), + ( + "who are you?", + "I'm the dashboard chat assistant", + ), + ], +) +def test_small_talk_fast_path_handles_capability_prompts( + primary_dashboard, + user_query, + expected_text, +): + """Obvious capability/identity prompts should short-circuit before LLM classification.""" + runtime = DashboardChatRuntime( + vector_store=FakeVectorStore([]), + llm_client=FastPathOnlySmallTalkLlm(), + ) + + response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query=user_query, + ) + + assert response.intent == DashboardChatIntent.SMALL_TALK + assert expected_text in response.answer_text + assert response.citations == [] + assert response.warnings == [] + assert response.metadata["timing_breakdown"]["runtime_total_ms"] >= 0 def test_runtime_query_without_sql_returns_dashboard_scoped_citations( @@ -1677,30 +1699,6 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr assert validation is None -def test_runtime_rejects_row_level_pii_queries_before_execution(org, primary_dashboard): - """Unsafe PII SQL should be rejected by the SQL guard before warehouse execution.""" - fake_warehouse = FakeWarehouseTools() - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=PiiToolLoopLlm(), - warehouse_tools_factory=lambda org: fake_warehouse, - ) - - response = runtime.run( - org=org, - dashboard_id=primary_dashboard.id, - user_query="List email addresses for this dashboard", - ) - - assert fake_warehouse.executed_sql == [] - assert response.sql is None - assert response.sql_results is None - assert "aggregate the results or rephrase" in response.answer_text - assert response.metadata["sql_guard_errors"] == [ - "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." - ] - - def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashboard): """Disabled source types should not be queried by the retrieve_docs tool.""" vector_store = FakeVectorStore( @@ -1904,25 +1902,6 @@ def test_sql_guard_enforces_single_statement_allowlist_and_limit(): assert any("No LIMIT clause found" in warning for warning in allowed_query.warnings) -def test_sql_guard_rejects_row_level_pii_queries(): - """SQL guard should reject row-level projections of sensitive fields.""" - allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) - guard = DashboardChatSqlGuard(allowlist=allowlist, max_rows=200) - - pii_query = guard.validate( - "SELECT email, COUNT(*) AS beneficiary_count " - "FROM analytics.program_reach " - "GROUP BY email " - "LIMIT 50" - ) - - assert pii_query.is_valid is False - assert pii_query.sanitized_sql is None - assert pii_query.errors == [ - "Queries returning row-level sensitive data are not allowed. Please aggregate the results or rephrase." - ] - - def test_sql_guard_rejects_select_into_queries(): """SQL guard should reject SELECT ... INTO statements.""" allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index a595146d4..e1eaca7f4 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -8,13 +8,13 @@ from django.contrib.auth.models import User from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.session_service import ( +from ddpui.core.dashboard_chat.sessions.service import ( DashboardChatSessionError, create_dashboard_chat_user_message, create_dashboard_chat_user_message_with_status, get_or_create_dashboard_chat_session, ) -from ddpui.core.dashboard_chat.vector_documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.vector.documents import build_dashboard_chat_collection_name from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org import Org, OrgDbt diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 44ff220a5..064b4f172 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -11,8 +11,8 @@ run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) -from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildResult -from ddpui.core.dashboard_chat.runtime_types import DashboardChatIntent, DashboardChatResponse +from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildResult +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.models.org import Org, OrgDbt from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession @@ -187,6 +187,12 @@ def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( warnings=["Example warning"], sql="SELECT 1", sql_results=[{"value": 1}], + metadata={ + "timing_breakdown": { + "runtime_total_ms": 123.4, + "graph_nodes_ms": {"load_context": 10.0}, + } + }, ) get_runtime.return_value = runtime @@ -196,6 +202,12 @@ def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( assert assistant_message.sequence_number == 2 assert assistant_message.content == "Funding dropped because donor inflows slowed this quarter." assert assistant_message.payload["sql"] == "SELECT 1" + assert assistant_message.response_latency_ms is not None + assert assistant_message.response_latency_ms >= 0 + assert assistant_message.timing_breakdown == { + "runtime_total_ms": 123.4, + "graph_nodes_ms": {"load_context": 10.0}, + } assert result["status"] == "completed" publish_event.assert_called_once() diff --git a/ddpui/tests/core/dashboard_chat/test_vector_building.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py index a766ad4e6..a2a646aca 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_building.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -10,17 +10,17 @@ from django.utils import timezone from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.dbt_docs import ( +from ddpui.core.dashboard_chat.context.dbt_docs import ( DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.vector_building import DashboardChatVectorBuildService -from ddpui.core.dashboard_chat.vector_documents import ( +from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, build_dashboard_chat_collection_name, ) -from ddpui.core.dashboard_chat.vector_store import DashboardChatStoredDocument +from ddpui.core.dashboard_chat.vector.store import DashboardChatStoredDocument from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpprefect import DBTCLIPROFILE from ddpui.models.dashboard import Dashboard @@ -250,7 +250,7 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb (target_dir / "catalog.json").write_text(json.dumps(catalog_json), encoding="utf-8") with patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.gather_dbt_project_params", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.gather_dbt_project_params", return_value=DbtProjectParams( dbt_binary="/mock/dbt", dbt_env_dir="/mock/env", @@ -260,13 +260,13 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb org_project_dir=str(project_dir.parent), ), ), patch( - "ddpui.core.dashboard_chat.dbt_docs.prefect_service.get_dbt_cli_profile_block", + "ddpui.core.dashboard_chat.context.dbt_docs.prefect_service.get_dbt_cli_profile_block", return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, ), patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.run_dbt_command", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.run_dbt_command", return_value=Mock(stdout="ok", returncode=0), ) as mock_run_dbt, patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.get_dbt_project_dir", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.get_dbt_project_dir", return_value=str(project_dir), ): artifacts = generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) @@ -311,7 +311,7 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera mock_git_manager = Mock() with patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.gather_dbt_project_params", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.gather_dbt_project_params", return_value=DbtProjectParams( dbt_binary="/mock/dbt", dbt_env_dir="/mock/env", @@ -321,19 +321,19 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera org_project_dir=str(project_dir.parent), ), ), patch( - "ddpui.core.dashboard_chat.dbt_docs.prefect_service.get_dbt_cli_profile_block", + "ddpui.core.dashboard_chat.context.dbt_docs.prefect_service.get_dbt_cli_profile_block", return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, ), patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.run_dbt_command", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.run_dbt_command", return_value=Mock(stdout="ok", returncode=0), ), patch( - "ddpui.core.dashboard_chat.dbt_docs.DbtProjectManager.get_dbt_project_dir", + "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.get_dbt_project_dir", return_value=str(project_dir), ), patch( - "ddpui.core.dashboard_chat.dbt_docs.secretsmanager.retrieve_github_pat", + "ddpui.core.dashboard_chat.context.dbt_docs.secretsmanager.retrieve_github_pat", return_value="actual-pat", ) as mock_retrieve_pat, patch( - "ddpui.core.dashboard_chat.dbt_docs.GitManager", + "ddpui.core.dashboard_chat.context.dbt_docs.GitManager", return_value=mock_git_manager, ) as mock_git_manager_class: generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index b97c58bed..b555bf47a 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -4,12 +4,12 @@ from unittest.mock import patch from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector_documents import ( +from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, DashboardChatVectorDocument, build_dashboard_chat_collection_name, ) -from ddpui.core.dashboard_chat.vector_store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore class FakeEmbeddingProvider: diff --git a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py index 6b8a82991..bb30e2c0d 100644 --- a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py +++ b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py @@ -6,7 +6,7 @@ import pytest -from ddpui.core.dashboard_chat.warehouse_tools import ( +from ddpui.core.dashboard_chat.warehouse.tools import ( DashboardChatWarehouseTools, DashboardChatWarehouseToolsError, ) @@ -25,7 +25,7 @@ def _build_bigquery_tools(): def test_quote_bigquery_table_ref_uses_project_id_from_credentials(): """BigQuery table refs should use project_id from stored credentials, not dataset location.""" with patch( - "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", return_value={"project_id": "analytics-project"}, ): tools = _build_bigquery_tools() @@ -37,7 +37,7 @@ def test_quote_bigquery_table_ref_uses_project_id_from_credentials(): def test_quote_bigquery_table_ref_reads_nested_project_id_from_credentials_json(): """credentials_json payloads should still provide the BigQuery project id.""" with patch( - "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", return_value={"credentials_json": json.dumps({"project_id": "analytics-project"})}, ): tools = _build_bigquery_tools() @@ -49,7 +49,7 @@ def test_quote_bigquery_table_ref_reads_nested_project_id_from_credentials_json( def test_quote_bigquery_table_ref_requires_project_id(): """A missing project id should fail explicitly.""" with patch( - "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", return_value={"dataset_location": "asia-south1"}, ): tools = _build_bigquery_tools() @@ -62,7 +62,7 @@ def test_quote_bigquery_table_ref_requires_project_id(): def test_quote_bigquery_table_ref_rejects_unsafe_identifier_components(): """BigQuery table refs should reject unsafe project/schema/table identifier text.""" with patch( - "ddpui.core.dashboard_chat.warehouse_tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", return_value={"project_id": "analytics-project"}, ): tools = _build_bigquery_tools() diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index f88fd6408..c7cee02eb 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -8,7 +8,7 @@ build_dashboard_chat_event, dashboard_chat_group_name, ) -from ddpui.core.dashboard_chat.session_service import ( +from ddpui.core.dashboard_chat.sessions.service import ( DashboardChatSessionError, create_dashboard_chat_user_message_with_status, find_dashboard_chat_assistant_reply, From 278757413aa0cd6554d05590ec776a3c01bfec48 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Thu, 26 Mar 2026 21:01:12 +0530 Subject: [PATCH 18/49] first refactor --- .../dashboard_chat/{README.md => CLAUDE.md} | 0 .../dashboard_chat/orchestration/bindings.py | 241 ---------- .../orchestration/conversation.py | 61 ++- .../orchestration/definition.py | 85 ---- .../orchestration/message_stack.py | 56 +-- .../dashboard_chat/orchestration/nodes.py | 259 ----------- .../orchestration/nodes/__init__.py | 0 .../orchestration/nodes/finalize.py | 60 +++ .../orchestration/nodes/handle_data_query.py | 84 ++++ .../orchestration/nodes/handle_follow_up.py | 84 ++++ .../orchestration/nodes/handle_irrelevant.py | 23 + .../nodes/handle_needs_clarification.py | 25 ++ .../orchestration/nodes/handle_small_talk.py | 24 + .../orchestration/nodes/helpers.py | 33 ++ .../orchestration/nodes/load_context.py | 18 + .../orchestration/nodes/route_intent.py | 27 ++ .../orchestration/orchestrator.py | 144 +++++- .../orchestration/presentation.py | 120 +++-- .../dashboard_chat/orchestration/retrieval.py | 99 ++-- .../orchestration/session_snapshot.py | 19 +- .../orchestration/sql_execution.py | 306 ++++--------- .../orchestration/sql_parsing.py | 164 ++++--- .../orchestration/tool_handlers.py | 425 +++++++++++------- .../dashboard_chat/orchestration/tool_loop.py | 95 ++-- 24 files changed, 1175 insertions(+), 1277 deletions(-) rename ddpui/core/dashboard_chat/{README.md => CLAUDE.md} (100%) delete mode 100644 ddpui/core/dashboard_chat/orchestration/bindings.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/definition.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/nodes.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/__init__.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/finalize.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/helpers.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/load_context.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py diff --git a/ddpui/core/dashboard_chat/README.md b/ddpui/core/dashboard_chat/CLAUDE.md similarity index 100% rename from ddpui/core/dashboard_chat/README.md rename to ddpui/core/dashboard_chat/CLAUDE.md diff --git a/ddpui/core/dashboard_chat/orchestration/bindings.py b/ddpui/core/dashboard_chat/orchestration/bindings.py deleted file mode 100644 index c25380270..000000000 --- a/ddpui/core/dashboard_chat/orchestration/bindings.py +++ /dev/null @@ -1,241 +0,0 @@ -"""Method wiring for the dashboard chat runtime class.""" - -from . import conversation as conversation_methods -from . import message_stack as message_methods -from . import nodes as node_methods -from . import presentation as presentation_methods -from . import retrieval as retrieval_methods -from . import session_snapshot as snapshot_methods -from . import source_identifiers as source_identifier_methods -from . import sql_execution as sql_execution_methods -from . import sql_parsing as sql_parsing_methods -from . import tool_handlers as tool_handler_methods -from . import tool_loop as tool_loop_methods - - -def bind_dashboard_chat_runtime_methods(runtime_cls) -> None: - """Attach graph helper modules onto the runtime class.""" - runtime_cls._node_load_context = node_methods._node_load_context - runtime_cls._node_route_intent = node_methods._node_route_intent - runtime_cls._node_handle_small_talk = node_methods._node_handle_small_talk - runtime_cls._node_handle_irrelevant = node_methods._node_handle_irrelevant - runtime_cls._node_handle_needs_clarification = node_methods._node_handle_needs_clarification - runtime_cls._node_handle_query_with_sql = node_methods._node_handle_query_with_sql - runtime_cls._node_handle_query_without_sql = node_methods._node_handle_query_without_sql - runtime_cls._node_handle_follow_up_sql = node_methods._node_handle_follow_up_sql - runtime_cls._node_handle_follow_up_context = node_methods._node_handle_follow_up_context - runtime_cls._run_intent_tool_loop = node_methods._run_intent_tool_loop - runtime_cls._node_finalize_response = node_methods._node_finalize_response - runtime_cls._route_after_intent = staticmethod(node_methods._route_after_intent) - - runtime_cls._build_new_query_messages = message_methods._build_new_query_messages - runtime_cls._build_follow_up_messages = message_methods._build_follow_up_messages - runtime_cls._normalize_conversation_history = staticmethod( - message_methods._normalize_conversation_history - ) - - runtime_cls._execute_tool_loop = tool_loop_methods._execute_tool_loop - runtime_cls._execute_tool_call = tool_loop_methods._execute_tool_call - runtime_cls._build_tool_loop_result = tool_loop_methods._build_tool_loop_result - - runtime_cls._handle_retrieve_docs_tool = tool_handler_methods._handle_retrieve_docs_tool - runtime_cls._handle_get_schema_snippets_tool = ( - tool_handler_methods._handle_get_schema_snippets_tool - ) - runtime_cls._handle_search_dbt_models_tool = ( - tool_handler_methods._handle_search_dbt_models_tool - ) - runtime_cls._handle_get_dbt_model_info_tool = ( - tool_handler_methods._handle_get_dbt_model_info_tool - ) - runtime_cls._handle_get_distinct_values_tool = ( - tool_handler_methods._handle_get_distinct_values_tool - ) - runtime_cls._handle_list_tables_by_keyword_tool = ( - tool_handler_methods._handle_list_tables_by_keyword_tool - ) - runtime_cls._handle_check_table_row_count_tool = ( - tool_handler_methods._handle_check_table_row_count_tool - ) - runtime_cls._get_turn_warehouse_tools = tool_handler_methods._get_turn_warehouse_tools - runtime_cls._get_cached_schema_snippets = tool_handler_methods._get_cached_schema_snippets - runtime_cls._seed_distinct_cache_from_previous_sql = ( - tool_handler_methods._seed_distinct_cache_from_previous_sql - ) - runtime_cls._dbt_resources_by_unique_id = staticmethod( - tool_handler_methods._dbt_resources_by_unique_id - ) - runtime_cls._get_cached_query_embedding = tool_handler_methods._get_cached_query_embedding - - runtime_cls._extract_conversation_context = classmethod( - conversation_methods._extract_conversation_context - ) - runtime_cls._extract_chart_ids_from_payload = staticmethod( - conversation_methods._extract_chart_ids_from_payload - ) - runtime_cls._build_follow_up_context_prompt = classmethod( - conversation_methods._build_follow_up_context_prompt - ) - runtime_cls._detect_sql_modification_type = staticmethod( - conversation_methods._detect_sql_modification_type - ) - runtime_cls._extract_requested_follow_up_dimension = staticmethod( - conversation_methods._extract_requested_follow_up_dimension - ) - runtime_cls._extract_metrics_from_sql = staticmethod( - conversation_methods._extract_metrics_from_sql - ) - runtime_cls._extract_dimensions_from_sql = staticmethod( - conversation_methods._extract_dimensions_from_sql - ) - runtime_cls._extract_filters_from_sql = staticmethod( - conversation_methods._extract_filters_from_sql - ) - - runtime_cls._retrieve_vector_documents = retrieval_methods._retrieve_vector_documents - runtime_cls._filter_allowlisted_dbt_results = staticmethod( - retrieval_methods._filter_allowlisted_dbt_results - ) - runtime_cls._dedupe_retrieved_documents = staticmethod( - retrieval_methods._dedupe_retrieved_documents - ) - runtime_cls._build_citations = retrieval_methods._build_citations - runtime_cls._citation_title = staticmethod(retrieval_methods._citation_title) - runtime_cls._compact_snippet = staticmethod(retrieval_methods._compact_snippet) - runtime_cls._build_tool_document_payload = retrieval_methods._build_tool_document_payload - runtime_cls._build_chart_tool_metadata = classmethod( - retrieval_methods._build_chart_tool_metadata - ) - runtime_cls._prototype_doc_type = staticmethod(retrieval_methods._prototype_doc_type) - runtime_cls._chart_metric_columns = classmethod( - retrieval_methods._chart_metric_columns - ) - runtime_cls._chart_dimension_columns = classmethod( - retrieval_methods._chart_dimension_columns - ) - runtime_cls._chart_time_column = classmethod(retrieval_methods._chart_time_column) - runtime_cls._looks_like_time_dimension = staticmethod( - retrieval_methods._looks_like_time_dimension - ) - runtime_cls._chart_id_from_source_identifier = staticmethod( - source_identifier_methods.chart_id_from_source_identifier - ) - runtime_cls._unique_id_from_source_identifier = staticmethod( - source_identifier_methods.unique_id_from_source_identifier - ) - - runtime_cls._load_session_snapshot = snapshot_methods._load_session_snapshot - runtime_cls._build_session_snapshot = snapshot_methods._build_session_snapshot - runtime_cls._persist_session_schema_cache = snapshot_methods._persist_session_schema_cache - runtime_cls._persist_session_distinct_cache = snapshot_methods._persist_session_distinct_cache - - runtime_cls._validate_sql_allowlist = sql_execution_methods._validate_sql_allowlist - runtime_cls._run_sql_with_distinct_guard = ( - sql_execution_methods._run_sql_with_distinct_guard - ) - runtime_cls._missing_columns_in_primary_table = ( - sql_execution_methods._missing_columns_in_primary_table - ) - runtime_cls._structured_sql_execution_error = ( - sql_execution_methods._structured_sql_execution_error - ) - runtime_cls._validate_follow_up_dimension_usage = ( - sql_execution_methods._validate_follow_up_dimension_usage - ) - runtime_cls._missing_distinct = sql_execution_methods._missing_distinct - runtime_cls._normalize_distinct_value = staticmethod( - sql_execution_methods._normalize_distinct_value - ) - runtime_cls._has_validated_distinct_value = classmethod( - sql_execution_methods._has_validated_distinct_value - ) - runtime_cls._is_text_type = staticmethod(sql_execution_methods._is_text_type) - runtime_cls._record_validated_distinct_values = ( - sql_execution_methods._record_validated_distinct_values - ) - runtime_cls._record_validated_filters_from_sql = ( - sql_execution_methods._record_validated_filters_from_sql - ) - - runtime_cls._primary_table_name = staticmethod(sql_parsing_methods._primary_table_name) - runtime_cls._table_references = classmethod(sql_parsing_methods._table_references) - runtime_cls._resolve_table_qualifier = classmethod( - sql_parsing_methods._resolve_table_qualifier - ) - runtime_cls._table_columns = staticmethod(sql_parsing_methods._table_columns) - runtime_cls._tables_with_column = classmethod(sql_parsing_methods._tables_with_column) - runtime_cls._resolve_identifier_table = classmethod( - sql_parsing_methods._resolve_identifier_table - ) - runtime_cls._referenced_sql_identifier_refs = classmethod( - sql_parsing_methods._referenced_sql_identifier_refs - ) - runtime_cls._select_aliases = staticmethod(sql_parsing_methods._select_aliases) - runtime_cls._extract_identifier_refs_from_sql_segment = staticmethod( - sql_parsing_methods._extract_identifier_refs_from_sql_segment - ) - runtime_cls._best_table_for_missing_columns = staticmethod( - sql_parsing_methods._best_table_for_missing_columns - ) - runtime_cls._extract_text_filter_values = staticmethod( - sql_parsing_methods._extract_text_filter_values - ) - runtime_cls._find_tables_with_column = staticmethod( - sql_parsing_methods._find_tables_with_column - ) - runtime_cls._structural_dimensions_from_sql = classmethod( - sql_parsing_methods._structural_dimensions_from_sql - ) - runtime_cls._normalize_dimension_name = staticmethod( - sql_parsing_methods._normalize_dimension_name - ) - - runtime_cls._serialize_tool_result = staticmethod( - presentation_methods._serialize_tool_result - ) - runtime_cls._summarize_tool_call = presentation_methods._summarize_tool_call - runtime_cls._max_turns_message = presentation_methods._max_turns_message - runtime_cls._compose_final_answer_text = presentation_methods._compose_final_answer_text - runtime_cls._determine_response_format = staticmethod( - presentation_methods._determine_response_format - ) - runtime_cls._sql_result_columns = staticmethod( - presentation_methods._sql_result_columns - ) - runtime_cls._build_usage_summary = presentation_methods._build_usage_summary - runtime_cls._compose_small_talk_response = ( - presentation_methods._compose_small_talk_response - ) - runtime_cls._build_fast_path_intent = staticmethod( - presentation_methods._build_fast_path_intent - ) - runtime_cls._build_fast_path_small_talk_response = staticmethod( - presentation_methods._build_fast_path_small_talk_response - ) - runtime_cls._clarification_fallback = staticmethod( - presentation_methods._clarification_fallback - ) - runtime_cls._fallback_answer_text = staticmethod( - presentation_methods._fallback_answer_text - ) - runtime_cls._single_row_summary = staticmethod( - presentation_methods._single_row_summary - ) - runtime_cls._humanize_column_name = staticmethod( - presentation_methods._humanize_column_name - ) - runtime_cls._normalize_sql_results_for_answer = classmethod( - presentation_methods._normalize_sql_results_for_answer - ) - runtime_cls._normalize_sql_value_for_answer = classmethod( - presentation_methods._normalize_sql_value_for_answer - ) - runtime_cls._format_numeric_answer_value = classmethod( - presentation_methods._format_numeric_answer_value - ) - runtime_cls._parse_numeric_string = staticmethod( - presentation_methods._parse_numeric_string - ) - runtime_cls._looks_like_rate_metric = staticmethod( - presentation_methods._looks_like_rate_metric - ) diff --git a/ddpui/core/dashboard_chat/orchestration/conversation.py b/ddpui/core/dashboard_chat/orchestration/conversation.py index 2f6b46e8c..49bd20002 100644 --- a/ddpui/core/dashboard_chat/orchestration/conversation.py +++ b/ddpui/core/dashboard_chat/orchestration/conversation.py @@ -13,11 +13,10 @@ from .source_identifiers import chart_id_from_source_identifier -def _extract_conversation_context( - cls, +def extract_conversation_context( conversation_history: Sequence[DashboardChatConversationMessage], ) -> DashboardChatConversationContext: - """Extract reusable conversation context like the prototype conversation manager.""" + """Extract reusable conversation context from message history.""" context = DashboardChatConversationContext() recent_history = list(conversation_history)[-10:] @@ -29,7 +28,7 @@ def _extract_conversation_context( sql = payload.get("sql") metadata = payload.get("metadata") or {} citations = payload.get("citations") or [] - chart_ids = cls._extract_chart_ids_from_payload(payload) + chart_ids = extract_chart_ids_from_payload(payload) if chart_ids and context.last_sql_query and not context.last_chart_ids: context = DashboardChatConversationContext( @@ -63,9 +62,9 @@ def _extract_conversation_context( last_sql_query=str(sql), last_tables_used=list(dict.fromkeys(tables)), last_chart_ids=chart_ids, - last_metrics=cls._extract_metrics_from_sql(str(sql)), - last_dimensions=cls._extract_dimensions_from_sql(str(sql)), - last_filters=cls._extract_filters_from_sql(str(sql)), + last_metrics=extract_metrics_from_sql(str(sql)), + last_dimensions=extract_dimensions_from_sql(str(sql)), + last_filters=extract_filters_from_sql(str(sql)), last_response_type="sql_result", last_answer_text=message.content, last_intent=str(payload.get("intent") or ""), @@ -85,8 +84,8 @@ def _extract_conversation_context( return context -def _extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: - """Extract chart ids from persisted metadata/citations like the prototype chat history.""" +def extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: + """Extract chart ids from persisted metadata/citations.""" metadata = payload.get("metadata") or {} chart_ids = [str(chart_id) for chart_id in metadata.get("chart_ids_used") or [] if chart_id] if chart_ids: @@ -101,12 +100,11 @@ def _extract_chart_ids_from_payload(payload: dict[str, Any]) -> list[str]: return list(dict.fromkeys(extracted_chart_ids)) -def _build_follow_up_context_prompt( - cls, +def build_follow_up_context_prompt( conversation_context: DashboardChatConversationContext, user_query: str, ) -> str: - """Build the prototype follow-up context prompt.""" + """Build the follow-up context prompt injected into the message stack.""" return "\n".join( [ "PREVIOUS QUERY CONTEXT:", @@ -123,8 +121,8 @@ def _build_follow_up_context_prompt( ) -def _detect_sql_modification_type(user_query: str) -> str: - """Detect the same coarse follow-up modification categories as the prototype.""" +def detect_sql_modification_type(user_query: str) -> str: + """Detect the coarse follow-up modification category from the user's phrasing.""" lowered_query = user_query.lower() if any(keyword in lowered_query for keyword in ["by", "split by", "break down", "group by"]): return "add_dimension" @@ -143,8 +141,8 @@ def _detect_sql_modification_type(user_query: str) -> str: return "general_modification" -def _extract_requested_follow_up_dimension(text: str) -> str | None: - """Extract the requested follow-up dimension and normalize natural-language spaces.""" +def extract_requested_follow_up_dimension(text: str) -> str | None: + """Extract the requested follow-up dimension from the user's instruction.""" normalized_text = text.strip().lower() patterns = [ r"split\s+by\s+([a-zA-Z_][a-zA-Z0-9_\s]*)", @@ -168,8 +166,8 @@ def _extract_requested_follow_up_dimension(text: str) -> str | None: return None -def _extract_metrics_from_sql(sql: str) -> list[str]: - """Extract aggregate expressions from the previous SQL for follow-up prompts.""" +def extract_metrics_from_sql(sql: str) -> list[str]: + """Extract aggregate expressions from SQL for follow-up prompts.""" select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) if not select_clause: return [] @@ -183,8 +181,8 @@ def _extract_metrics_from_sql(sql: str) -> list[str]: return metrics[:5] -def _extract_dimensions_from_sql(sql: str) -> list[str]: - """Extract GROUP BY dimensions from the previous SQL.""" +def extract_dimensions_from_sql(sql: str) -> list[str]: + """Extract GROUP BY dimensions from SQL.""" match = re.search( r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", sql, @@ -199,8 +197,8 @@ def _extract_dimensions_from_sql(sql: str) -> list[str]: ][:5] -def _extract_filters_from_sql(sql: str) -> list[str]: - """Extract WHERE-clause filters from the previous SQL.""" +def extract_filters_from_sql(sql: str) -> list[str]: + """Extract WHERE-clause filters from SQL.""" match = re.search( r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", sql, @@ -221,3 +219,22 @@ def _extract_filters_from_sql(sql: str) -> list[str]: else: filters.append(str(filter_match)) return filters[:5] + + +def normalize_conversation_history( + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, +) -> list[DashboardChatConversationMessage]: + """Normalize stored history into the typed runtime message format.""" + normalized_messages: list[DashboardChatConversationMessage] = [] + for item in conversation_history or []: + if isinstance(item, DashboardChatConversationMessage): + normalized_messages.append(item) + continue + normalized_messages.append( + DashboardChatConversationMessage( + role=str(item.get("role") or "user"), + content=str(item.get("content") or ""), + payload=item.get("payload") or {}, + ) + ) + return normalized_messages diff --git a/ddpui/core/dashboard_chat/orchestration/definition.py b/ddpui/core/dashboard_chat/orchestration/definition.py deleted file mode 100644 index 3dd569820..000000000 --- a/ddpui/core/dashboard_chat/orchestration/definition.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Graph definition helpers for dashboard chat orchestration.""" - -from time import perf_counter - -from langgraph.graph import END, START, StateGraph - -from .state import DashboardChatRuntimeState - - -def _timed_node(node_name, handler): - """Wrap one graph node so per-node duration is persisted on state.""" - - def wrapped(state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - started_at = perf_counter() - next_state = handler(state) - elapsed_ms = round((perf_counter() - started_at) * 1000, 2) - timing_breakdown = dict(next_state.get("timing_breakdown") or {}) - graph_nodes_ms = dict(timing_breakdown.get("graph_nodes_ms") or {}) - graph_nodes_ms[node_name] = elapsed_ms - timing_breakdown["graph_nodes_ms"] = graph_nodes_ms - next_state["timing_breakdown"] = timing_breakdown - return next_state - - return wrapped - - -def build_dashboard_chat_graph(runtime): - """Build the explicit prototype-aligned intent graph.""" - graph = StateGraph(DashboardChatRuntimeState) - graph.add_node("load_context", _timed_node("load_context", runtime._node_load_context)) - graph.add_node("route_intent", _timed_node("route_intent", runtime._node_route_intent)) - graph.add_node( - "handle_small_talk", - _timed_node("handle_small_talk", runtime._node_handle_small_talk), - ) - graph.add_node( - "handle_irrelevant", - _timed_node("handle_irrelevant", runtime._node_handle_irrelevant), - ) - graph.add_node( - "handle_needs_clarification", - _timed_node("handle_needs_clarification", runtime._node_handle_needs_clarification), - ) - graph.add_node( - "handle_query_with_sql", - _timed_node("handle_query_with_sql", runtime._node_handle_query_with_sql), - ) - graph.add_node( - "handle_query_without_sql", - _timed_node("handle_query_without_sql", runtime._node_handle_query_without_sql), - ) - graph.add_node( - "handle_follow_up_sql", - _timed_node("handle_follow_up_sql", runtime._node_handle_follow_up_sql), - ) - graph.add_node( - "handle_follow_up_context", - _timed_node("handle_follow_up_context", runtime._node_handle_follow_up_context), - ) - graph.add_node("finalize", _timed_node("finalize", runtime._node_finalize_response)) - - graph.add_edge(START, "load_context") - graph.add_edge("load_context", "route_intent") - graph.add_conditional_edges( - "route_intent", - runtime._route_after_intent, - { - "small_talk": "handle_small_talk", - "irrelevant": "handle_irrelevant", - "needs_clarification": "handle_needs_clarification", - "query_with_sql": "handle_query_with_sql", - "query_without_sql": "handle_query_without_sql", - "follow_up_sql": "handle_follow_up_sql", - "follow_up_context": "handle_follow_up_context", - }, - ) - graph.add_edge("handle_small_talk", "finalize") - graph.add_edge("handle_irrelevant", "finalize") - graph.add_edge("handle_needs_clarification", "finalize") - graph.add_edge("handle_query_with_sql", "finalize") - graph.add_edge("handle_query_without_sql", "finalize") - graph.add_edge("handle_follow_up_sql", "finalize") - graph.add_edge("handle_follow_up_context", "finalize") - graph.add_edge("finalize", END) - return graph.compile() diff --git a/ddpui/core/dashboard_chat/orchestration/message_stack.py b/ddpui/core/dashboard_chat/orchestration/message_stack.py index a832fb965..af452910d 100644 --- a/ddpui/core/dashboard_chat/orchestration/message_stack.py +++ b/ddpui/core/dashboard_chat/orchestration/message_stack.py @@ -1,48 +1,37 @@ """Prompt-message stack helpers for dashboard chat graph execution.""" -from collections.abc import Sequence from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatConversationMessage from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey +from .conversation import build_follow_up_context_prompt, detect_sql_modification_type from .state import DashboardChatRuntimeState -def _build_new_query_messages( - self, +def build_new_query_messages( + llm_client, state: DashboardChatRuntimeState, ) -> list[dict[str, Any]]: - """Build the prototype new-query message stack.""" - system_prompt = self.llm_client.get_prompt( - DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM - ) + """Build the new-query message stack.""" + system_prompt = llm_client.get_prompt(DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM) return [ - { - "role": "system", - "content": system_prompt, - }, + {"role": "system", "content": system_prompt}, {"role": "user", "content": state["user_query"]}, ] -def _build_follow_up_messages( - self, +def build_follow_up_messages( + llm_client, state: DashboardChatRuntimeState, ) -> list[dict[str, Any]]: - """Build the prototype follow-up message stack.""" - modification_type = self._detect_sql_modification_type(state["user_query"]) - system_prompt = self.llm_client.get_prompt( - DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM - ) + """Build the follow-up message stack.""" + modification_type = detect_sql_modification_type(state["user_query"]) + system_prompt = llm_client.get_prompt(DashboardChatPromptTemplateKey.FOLLOW_UP_SYSTEM) return [ + {"role": "system", "content": system_prompt}, { "role": "system", - "content": system_prompt, - }, - { - "role": "system", - "content": self._build_follow_up_context_prompt( + "content": build_follow_up_context_prompt( state["conversation_context"], state["user_query"], ), @@ -50,22 +39,3 @@ def _build_follow_up_messages( {"role": "system", "content": f"MODIFICATION_TYPE: {modification_type}"}, {"role": "user", "content": state["user_query"]}, ] - - -def _normalize_conversation_history( - conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, -) -> list[DashboardChatConversationMessage]: - """Normalize stored history into the typed runtime message format.""" - normalized_messages: list[DashboardChatConversationMessage] = [] - for item in conversation_history or []: - if isinstance(item, DashboardChatConversationMessage): - normalized_messages.append(item) - continue - normalized_messages.append( - DashboardChatConversationMessage( - role=str(item.get("role") or "user"), - content=str(item.get("content") or ""), - payload=item.get("payload") or {}, - ) - ) - return normalized_messages diff --git a/ddpui/core/dashboard_chat/orchestration/nodes.py b/ddpui/core/dashboard_chat/orchestration/nodes.py deleted file mode 100644 index 6897ce3db..000000000 --- a/ddpui/core/dashboard_chat/orchestration/nodes.py +++ /dev/null @@ -1,259 +0,0 @@ -"""LangGraph node handlers for dashboard chat.""" - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatCitation, - DashboardChatIntent, - DashboardChatResponse, -) - -from .state import DashboardChatRuntimeState - - -def _node_load_context(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Load or reuse the session-stable dashboard context snapshot.""" - snapshot = self._load_session_snapshot(state) - state["dashboard_export"] = snapshot["dashboard_export"] - state["dbt_index"] = snapshot["dbt_index"] - state["allowlist"] = snapshot["allowlist"] - state["session_schema_cache"] = snapshot["schema_cache"] - state["session_distinct_cache"] = snapshot["distinct_cache"] - return state - - -def _node_route_intent(self, state: DashboardChatRuntimeState) -> DashboardChatRuntimeState: - """Use the prototype router prompt for all non-trivial routing.""" - conversation_context = self._extract_conversation_context(state["conversation_history"]) - fast_path_intent = self._build_fast_path_intent(state["user_query"]) - if fast_path_intent is not None: - state["conversation_context"] = conversation_context - state["intent_decision"] = fast_path_intent - state["small_talk_response"] = self._build_fast_path_small_talk_response( - state["user_query"] - ) - return state - intent_decision = self.llm_client.classify_intent( - user_query=state["user_query"], - conversation_context=conversation_context, - ) - state["conversation_context"] = conversation_context - state["intent_decision"] = intent_decision - return state - - -def _node_handle_small_talk( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Handle simple social turns without any tool use.""" - state["response"] = DashboardChatResponse( - answer_text=state.get("small_talk_response") - or self._compose_small_talk_response(state["user_query"]), - intent=DashboardChatIntent.SMALL_TALK, - usage=self._build_usage_summary(), - ) - return state - - -def _node_handle_irrelevant( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Handle questions outside dashboard chat scope.""" - state["response"] = DashboardChatResponse( - answer_text=( - "I can only answer questions about this dashboard, its charts, and the data behind them." - ), - intent=DashboardChatIntent.IRRELEVANT, - usage=self._build_usage_summary(), - ) - return state - - -def _node_handle_needs_clarification( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Ask for clarification when the router says the query is underspecified.""" - intent_decision = state["intent_decision"] - state["response"] = DashboardChatResponse( - answer_text=( - intent_decision.clarification_question - or self._clarification_fallback(intent_decision.missing_info) - ), - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - usage=self._build_usage_summary(), - ) - return state - - -def _node_handle_query_with_sql( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Run the prototype new-query tool loop for SQL-routed questions.""" - return self._run_intent_tool_loop(state, max_turns=15, follow_up=False) - - -def _node_handle_query_without_sql( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Run the prototype new-query tool loop for context-only questions.""" - return self._run_intent_tool_loop(state, max_turns=15, follow_up=False) - - -def _node_handle_follow_up_sql( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Run the prototype follow-up loop for SQL-modifying turns.""" - return self._run_intent_tool_loop(state, max_turns=6, follow_up=True) - - -def _node_handle_follow_up_context( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Run the prototype follow-up loop for explanatory follow-ups.""" - return self._run_intent_tool_loop(state, max_turns=6, follow_up=True) - - -def _run_intent_tool_loop( - self, - state: DashboardChatRuntimeState, - *, - max_turns: int, - follow_up: bool, -) -> DashboardChatRuntimeState: - """Execute one prototype-style tool loop and store the response on state.""" - allowlist = state["allowlist"] - - query_embedding = self._get_cached_query_embedding( - state["user_query"], - embedding_cache={}, - ) - - messages = ( - self._build_follow_up_messages(state) - if follow_up - else self._build_new_query_messages(state) - ) - execution_result = self._execute_tool_loop( - state=state, - messages=messages, - max_turns=max_turns, - initial_embedding_cache={state["user_query"]: query_embedding}, - ) - - state["retrieved_documents"] = execution_result["retrieved_documents"] - state["citations"] = self._build_citations( - retrieved_documents=execution_result["retrieved_documents"], - dashboard_export=state["dashboard_export"], - allowlist=allowlist, - ) - state["tool_calls"] = execution_result["tool_calls"] - existing_timing_breakdown = dict(state.get("timing_breakdown") or {}) - execution_timing_breakdown = dict(execution_result.get("timing_breakdown") or {}) - merged_timing_breakdown = dict(existing_timing_breakdown) - if "graph_nodes_ms" in existing_timing_breakdown or "graph_nodes_ms" in execution_timing_breakdown: - merged_timing_breakdown["graph_nodes_ms"] = { - **dict(existing_timing_breakdown.get("graph_nodes_ms") or {}), - **dict(execution_timing_breakdown.get("graph_nodes_ms") or {}), - } - if "tool_calls_ms" in existing_timing_breakdown or "tool_calls_ms" in execution_timing_breakdown: - merged_timing_breakdown["tool_calls_ms"] = list( - execution_timing_breakdown.get("tool_calls_ms") - or existing_timing_breakdown.get("tool_calls_ms") - or [] - ) - for key, value in execution_timing_breakdown.items(): - if key not in {"graph_nodes_ms", "tool_calls_ms"}: - merged_timing_breakdown[key] = value - state["timing_breakdown"] = merged_timing_breakdown - state["sql"] = execution_result["sql"] - state["sql_validation"] = execution_result["sql_validation"] - state["sql_results"] = execution_result["sql_results"] - state["warnings"] = execution_result["warnings"] - response_format = self._determine_response_format( - user_query=state["user_query"], - sql_results=execution_result["sql_results"], - ) - state["response"] = DashboardChatResponse( - answer_text=self._compose_final_answer_text( - state, - execution_result, - response_format=response_format, - ), - intent=state["intent_decision"].intent, - citations=state["citations"], - warnings=execution_result["warnings"], - sql=execution_result["sql"], - sql_results=execution_result["sql_results"], - usage=self._build_usage_summary(), - tool_calls=execution_result["tool_calls"], - metadata={ - "response_format": response_format, - "table_columns": self._sql_result_columns(execution_result["sql_results"]), - }, - ) - return state - - -def _node_finalize_response( - self, - state: DashboardChatRuntimeState, -) -> DashboardChatRuntimeState: - """Attach warehouse citations and metadata to the finished response.""" - response = state["response"] - citations = list(response.citations) - sql_validation = state.get("sql_validation") - if ( - sql_validation is not None - and sql_validation.is_valid - and sql_validation.sanitized_sql is not None - ): - citations.extend( - DashboardChatCitation( - source_type="warehouse_table", - source_identifier=table_name, - title=f"Warehouse table: {table_name}", - snippet=f"SQL executed against {table_name}.", - table_name=table_name, - ) - for table_name in sql_validation.tables - if table_name - ) - - allowlist = state.get("allowlist") or DashboardChatAllowlist() - response_metadata = dict(response.metadata) - response_metadata.update( - { - "dashboard_id": state["dashboard_id"], - "retrieved_document_ids": [ - document.document_id for document in state.get("retrieved_documents") or [] - ], - "allowlisted_tables": sorted(allowlist.allowed_tables), - "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], - "intent_reason": state["intent_decision"].reason, - "missing_info": state["intent_decision"].missing_info, - "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, - } - ) - state["response"] = DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=list(dict.fromkeys(citations)), - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata=response_metadata, - ) - return state - - -def _route_after_intent(state: DashboardChatRuntimeState) -> str: - """Route to one explicit handler per prototype intent.""" - return state["intent_decision"].intent.value diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/__init__.py b/ddpui/core/dashboard_chat/orchestration/nodes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py new file mode 100644 index 000000000..ea35eb01b --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -0,0 +1,60 @@ +"""Finalize node for dashboard chat graph.""" + +from typing import Any + +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import DashboardChatCitation, DashboardChatResponse + +from ..state import DashboardChatRuntimeState + + +def finalize_node(state: DashboardChatRuntimeState) -> dict[str, Any]: + """Attach warehouse citations and metadata to the finished response.""" + response = state["response"] + citations = list(response.citations) + sql_validation = state.get("sql_validation") + if ( + sql_validation is not None + and sql_validation.is_valid + and sql_validation.sanitized_sql is not None + ): + citations.extend( + DashboardChatCitation( + source_type="warehouse_table", + source_identifier=table_name, + title=f"Warehouse table: {table_name}", + snippet=f"SQL executed against {table_name}.", + table_name=table_name, + ) + for table_name in sql_validation.tables + if table_name + ) + + allowlist = state.get("allowlist") or DashboardChatAllowlist() + response_metadata = dict(response.metadata) + response_metadata.update( + { + "dashboard_id": state["dashboard_id"], + "retrieved_document_ids": [ + document.document_id for document in state.get("retrieved_documents") or [] + ], + "allowlisted_tables": sorted(allowlist.allowed_tables), + "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], + "intent_reason": state["intent_decision"].reason, + "missing_info": state["intent_decision"].missing_info, + "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, + } + ) + return { + "response": DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ) + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py new file mode 100644 index 000000000..2d2616e7a --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py @@ -0,0 +1,84 @@ +"""Handle-data-query node for dashboard chat graph (covers query_with_sql and query_without_sql).""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatResponse + +from ..message_stack import build_new_query_messages +from ..presentation import ( + build_usage_summary, + compose_final_answer_text, + determine_response_format, + sql_result_columns, +) +from ..retrieval import build_citations, get_cached_query_embedding +from ..state import DashboardChatRuntimeState +from ..tool_loop import execute_tool_loop +from .helpers import merge_tool_loop_timing + + +def handle_data_query_node( + state: DashboardChatRuntimeState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Execute the new-query tool loop for SQL and context-only questions.""" + allowlist = state["allowlist"] + query_embedding = get_cached_query_embedding( + vector_store, state["user_query"], embedding_cache={} + ) + messages = build_new_query_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=15, + initial_embedding_cache={state["user_query"]: query_embedding}, + ) + + citations = build_citations( + retrieved_documents=execution_result["retrieved_documents"], + dashboard_export=state["dashboard_export"], + allowlist=allowlist, + ) + response_format = determine_response_format( + user_query=state["user_query"], + sql_results=execution_result["sql_results"], + ) + + return { + "retrieved_documents": execution_result["retrieved_documents"], + "citations": citations, + "tool_calls": execution_result["tool_calls"], + "sql": execution_result["sql"], + "sql_validation": execution_result["sql_validation"], + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "response": DashboardChatResponse( + answer_text=compose_final_answer_text( + llm_client, state, execution_result, response_format=response_format + ), + intent=state["intent_decision"].intent, + citations=citations, + warnings=execution_result["warnings"], + sql=execution_result["sql"], + sql_results=execution_result["sql_results"], + usage=build_usage_summary(llm_client, vector_store), + tool_calls=execution_result["tool_calls"], + metadata={ + "response_format": response_format, + "table_columns": sql_result_columns(execution_result["sql_results"]), + }, + ), + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py new file mode 100644 index 000000000..e81edba89 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py @@ -0,0 +1,84 @@ +"""Handle-follow-up node for dashboard chat graph (covers follow_up_sql and follow_up_context).""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatResponse + +from ..message_stack import build_follow_up_messages +from ..presentation import ( + build_usage_summary, + compose_final_answer_text, + determine_response_format, + sql_result_columns, +) +from ..retrieval import build_citations, get_cached_query_embedding +from ..state import DashboardChatRuntimeState +from ..tool_loop import execute_tool_loop +from .helpers import merge_tool_loop_timing + + +def handle_follow_up_node( + state: DashboardChatRuntimeState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Execute the follow-up tool loop for SQL-modifying and explanatory follow-up questions.""" + allowlist = state["allowlist"] + query_embedding = get_cached_query_embedding( + vector_store, state["user_query"], embedding_cache={} + ) + messages = build_follow_up_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=6, + initial_embedding_cache={state["user_query"]: query_embedding}, + ) + + citations = build_citations( + retrieved_documents=execution_result["retrieved_documents"], + dashboard_export=state["dashboard_export"], + allowlist=allowlist, + ) + response_format = determine_response_format( + user_query=state["user_query"], + sql_results=execution_result["sql_results"], + ) + + return { + "retrieved_documents": execution_result["retrieved_documents"], + "citations": citations, + "tool_calls": execution_result["tool_calls"], + "sql": execution_result["sql"], + "sql_validation": execution_result["sql_validation"], + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "response": DashboardChatResponse( + answer_text=compose_final_answer_text( + llm_client, state, execution_result, response_format=response_format + ), + intent=state["intent_decision"].intent, + citations=citations, + warnings=execution_result["warnings"], + sql=execution_result["sql"], + sql_results=execution_result["sql_results"], + usage=build_usage_summary(llm_client, vector_store), + tool_calls=execution_result["tool_calls"], + metadata={ + "response_format": response_format, + "table_columns": sql_result_columns(execution_result["sql_results"]), + }, + ), + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py new file mode 100644 index 000000000..c7296f927 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py @@ -0,0 +1,23 @@ +"""Handle-irrelevant node for dashboard chat graph.""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse + +from ..presentation import build_usage_summary +from ..state import DashboardChatRuntimeState + + +def handle_irrelevant_node( + state: DashboardChatRuntimeState, llm_client, vector_store +) -> dict[str, Any]: + """Handle questions outside dashboard chat scope.""" + return { + "response": DashboardChatResponse( + answer_text=( + "I can only answer questions about this dashboard, its charts, and the data behind them." + ), + intent=DashboardChatIntent.IRRELEVANT, + usage=build_usage_summary(llm_client, vector_store), + ) + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py new file mode 100644 index 000000000..1be10fa3c --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py @@ -0,0 +1,25 @@ +"""Handle-needs-clarification node for dashboard chat graph.""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse + +from ..presentation import build_usage_summary, clarification_fallback +from ..state import DashboardChatRuntimeState + + +def handle_needs_clarification_node( + state: DashboardChatRuntimeState, llm_client, vector_store +) -> dict[str, Any]: + """Ask for clarification when the router says the query is underspecified.""" + intent_decision = state["intent_decision"] + return { + "response": DashboardChatResponse( + answer_text=( + intent_decision.clarification_question + or clarification_fallback(intent_decision.missing_info) + ), + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + usage=build_usage_summary(llm_client, vector_store), + ) + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py new file mode 100644 index 000000000..807222647 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py @@ -0,0 +1,24 @@ +"""Handle-small-talk node for dashboard chat graph.""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse + +from ..presentation import build_usage_summary, compose_small_talk_response +from ..state import DashboardChatRuntimeState + + +def handle_small_talk_node( + state: DashboardChatRuntimeState, llm_client, vector_store +) -> dict[str, Any]: + """Handle simple social turns without any tool use.""" + return { + "response": DashboardChatResponse( + answer_text=( + state.get("small_talk_response") + or compose_small_talk_response(llm_client, state["user_query"]) + ), + intent=DashboardChatIntent.SMALL_TALK, + usage=build_usage_summary(llm_client, vector_store), + ) + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py b/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py new file mode 100644 index 000000000..f236d5477 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py @@ -0,0 +1,33 @@ +"""Shared helpers for dashboard chat graph nodes.""" + +from typing import Any + +from ..state import DashboardChatRuntimeState + + +def route_after_intent(state: DashboardChatRuntimeState) -> str: + """Route to one explicit handler per prototype intent.""" + return state["intent_decision"].intent.value + + +def merge_tool_loop_timing( + state: DashboardChatRuntimeState, + execution_result: dict[str, Any], +) -> dict[str, Any]: + """Merge timing from the tool loop into the state's existing timing breakdown.""" + existing = dict(state.get("timing_breakdown") or {}) + from_loop = dict(execution_result.get("timing_breakdown") or {}) + merged = dict(existing) + if "graph_nodes_ms" in existing or "graph_nodes_ms" in from_loop: + merged["graph_nodes_ms"] = { + **dict(existing.get("graph_nodes_ms") or {}), + **dict(from_loop.get("graph_nodes_ms") or {}), + } + if "tool_calls_ms" in existing or "tool_calls_ms" in from_loop: + merged["tool_calls_ms"] = list( + from_loop.get("tool_calls_ms") or existing.get("tool_calls_ms") or [] + ) + for key, value in from_loop.items(): + if key not in {"graph_nodes_ms", "tool_calls_ms"}: + merged[key] = value + return merged diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py new file mode 100644 index 000000000..6e5c98a3f --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py @@ -0,0 +1,18 @@ +"""Load-context node for dashboard chat graph.""" + +from typing import Any + +from ..session_snapshot import load_session_snapshot +from ..state import DashboardChatRuntimeState + + +def load_context_node(state: DashboardChatRuntimeState) -> dict[str, Any]: + """Load or reuse the session-stable dashboard context snapshot.""" + snapshot = load_session_snapshot(state) + return { + "dashboard_export": snapshot["dashboard_export"], + "dbt_index": snapshot["dbt_index"], + "allowlist": snapshot["allowlist"], + "session_schema_cache": snapshot["schema_cache"], + "session_distinct_cache": snapshot["distinct_cache"], + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py new file mode 100644 index 000000000..09bc128c3 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py @@ -0,0 +1,27 @@ +"""Route-intent node for dashboard chat graph.""" + +from typing import Any + +from ..conversation import extract_conversation_context +from ..presentation import build_fast_path_intent, build_fast_path_small_talk_response +from ..state import DashboardChatRuntimeState + + +def route_intent_node(state: DashboardChatRuntimeState, llm_client) -> dict[str, Any]: + """Use the prototype router prompt for all non-trivial routing.""" + conversation_context = extract_conversation_context(state["conversation_history"]) + fast_path_intent = build_fast_path_intent(state["user_query"]) + if fast_path_intent is not None: + return { + "conversation_context": conversation_context, + "intent_decision": fast_path_intent, + "small_talk_response": build_fast_path_small_talk_response(state["user_query"]), + } + intent_decision = llm_client.classify_intent( + user_query=state["user_query"], + conversation_context=conversation_context, + ) + return { + "conversation_context": conversation_context, + "intent_decision": intent_decision, + } diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index 74d882924..f65d6c512 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -5,6 +5,8 @@ from time import perf_counter from typing import Any +from langgraph.graph import END, START, StateGraph + from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.agents.interface import DashboardChatLlmClient from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient @@ -13,17 +15,128 @@ from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org -from .bindings import bind_dashboard_chat_runtime_methods -from .definition import build_dashboard_chat_graph +from .conversation import normalize_conversation_history +from .nodes.finalize import finalize_node +from .nodes.handle_data_query import handle_data_query_node +from .nodes.handle_follow_up import handle_follow_up_node +from .nodes.handle_irrelevant import handle_irrelevant_node +from .nodes.handle_needs_clarification import handle_needs_clarification_node +from .nodes.handle_small_talk import handle_small_talk_node +from .nodes.helpers import route_after_intent +from .nodes.load_context import load_context_node +from .nodes.route_intent import route_intent_node from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN from .tool_specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS +def _timed_node(node_name: str, handler): + """Wrap one graph node so per-node duration is recorded in timing_breakdown.""" + + def wrapped(state: DashboardChatRuntimeState) -> dict: + started_at = perf_counter() + updates = handler(state) + elapsed_ms = round((perf_counter() - started_at) * 1000, 2) + existing_timing = dict(state.get("timing_breakdown") or {}) + new_timing = dict(updates.get("timing_breakdown") or existing_timing) + graph_nodes_ms = dict(new_timing.get("graph_nodes_ms") or {}) + graph_nodes_ms[node_name] = elapsed_ms + new_timing["graph_nodes_ms"] = graph_nodes_ms + updates["timing_breakdown"] = new_timing + return updates + + return wrapped + + +def _build_graph( + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +): + """Build the intent-routing graph with all deps injected via closures.""" + + def _load_context(state): + return load_context_node(state) + + def _route_intent(state): + return route_intent_node(state, llm_client) + + def _handle_small_talk(state): + return handle_small_talk_node(state, llm_client, vector_store) + + def _handle_irrelevant(state): + return handle_irrelevant_node(state, llm_client, vector_store) + + def _handle_needs_clarification(state): + return handle_needs_clarification_node(state, llm_client, vector_store) + + def _handle_data_query(state): + return handle_data_query_node( + state, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, + ) + + def _handle_follow_up(state): + return handle_follow_up_node( + state, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, + ) + + def _finalize(state): + return finalize_node(state) + + graph = StateGraph(DashboardChatRuntimeState) + graph.add_node("load_context", _timed_node("load_context", _load_context)) + graph.add_node("route_intent", _timed_node("route_intent", _route_intent)) + graph.add_node("handle_small_talk", _timed_node("handle_small_talk", _handle_small_talk)) + graph.add_node("handle_irrelevant", _timed_node("handle_irrelevant", _handle_irrelevant)) + graph.add_node( + "handle_needs_clarification", + _timed_node("handle_needs_clarification", _handle_needs_clarification), + ) + graph.add_node("handle_data_query", _timed_node("handle_data_query", _handle_data_query)) + graph.add_node("handle_follow_up", _timed_node("handle_follow_up", _handle_follow_up)) + graph.add_node("finalize", _timed_node("finalize", _finalize)) + + graph.add_edge(START, "load_context") + graph.add_edge("load_context", "route_intent") + graph.add_conditional_edges( + "route_intent", + route_after_intent, + { + "small_talk": "handle_small_talk", + "irrelevant": "handle_irrelevant", + "needs_clarification": "handle_needs_clarification", + "query_with_sql": "handle_data_query", + "query_without_sql": "handle_data_query", + "follow_up_sql": "handle_follow_up", + "follow_up_context": "handle_follow_up", + }, + ) + graph.add_edge("handle_small_talk", "finalize") + graph.add_edge("handle_irrelevant", "finalize") + graph.add_edge("handle_needs_clarification", "finalize") + graph.add_edge("handle_data_query", "finalize") + graph.add_edge("handle_follow_up", "finalize") + graph.add_edge("finalize", END) + return graph.compile() + + class DashboardChatRuntime: """Run dashboard chat turns with the prototype's explicit intent routing and tool loop.""" - TOOL_SPECIFICATIONS = DASHBOARD_CHAT_TOOL_SPECIFICATIONS - def __init__( self, vector_store: ChromaDashboardChatVectorStore | None = None, @@ -46,7 +159,14 @@ def __init__( max_rows=self.runtime_config.max_query_rows, ) ) - self.graph = build_dashboard_chat_graph(self) + self.graph = _build_graph( + self.llm_client, + self.vector_store, + self.warehouse_tools_factory, + self.runtime_config, + self.source_config, + DASHBOARD_CHAT_TOOL_SPECIFICATIONS, + ) def run( self, @@ -56,7 +176,7 @@ def run( session_id: str | None = None, vector_collection_name: str | None = None, conversation_history: Sequence[dict[str, Any]] | None = None, - ): + ) -> DashboardChatResponse: """Run one dashboard chat turn.""" if hasattr(self.llm_client, "reset_usage"): self.llm_client.reset_usage() @@ -68,7 +188,7 @@ def run( "session_id": session_id, "vector_collection_name": vector_collection_name, "user_query": user_query, - "conversation_history": self._normalize_conversation_history(conversation_history), + "conversation_history": normalize_conversation_history(conversation_history), "timing_breakdown": { "graph_nodes_ms": {}, "tool_calls_ms": [], @@ -96,16 +216,8 @@ def run( metadata=response_metadata, ) + @lru_cache(maxsize=1) def get_dashboard_chat_runtime() -> DashboardChatRuntime: """Return the shared dashboard chat runtime used by live chat turns.""" return DashboardChatRuntime() - -bind_dashboard_chat_runtime_methods(DashboardChatRuntime) - -__all__ = [ - "DashboardChatRuntime", - "DashboardChatRuntimeState", - "SMALL_TALK_FAST_PATH_PATTERN", - "get_dashboard_chat_runtime", -] diff --git a/ddpui/core/dashboard_chat/orchestration/presentation.py b/ddpui/core/dashboard_chat/orchestration/presentation.py index a1c0b9a8c..38247b972 100644 --- a/ddpui/core/dashboard_chat/orchestration/presentation.py +++ b/ddpui/core/dashboard_chat/orchestration/presentation.py @@ -2,6 +2,7 @@ from collections.abc import Sequence import logging +import re from typing import Any from ddpui.core.dashboard_chat.contracts import ( @@ -15,7 +16,7 @@ logger = logging.getLogger(__name__) -def _serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: +def serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: """Trim large tool payloads before feeding them back into the model.""" serialized = dict(result) docs = serialized.get("docs") @@ -30,8 +31,7 @@ def _serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: return serialized -def _summarize_tool_call( - self, +def summarize_tool_call( *, tool_name: str, args: dict[str, Any], @@ -73,12 +73,11 @@ def _summarize_tool_call( return entry -def _max_turns_message( - self, +def max_turns_message( user_query: str, retrieved_documents: Sequence[DashboardChatRetrievedDocument], ) -> str: - """Return a bounded fallback when the prototype tool loop exhausts its budget.""" + """Return a bounded fallback when the tool loop exhausts its budget.""" if retrieved_documents: return ( "I found relevant dashboard context, but I couldn't complete the analysis safely. " @@ -90,21 +89,19 @@ def _max_turns_message( ) -def _compose_final_answer_text( - self, +def compose_final_answer_text( + llm_client, state: DashboardChatRuntimeState, execution_result: dict[str, Any], *, response_format: str, ) -> str: """Compose one final markdown answer for all non-trivial routes.""" - normalized_sql_results = self._normalize_sql_results_for_answer( - execution_result.get("sql_results") - ) + normalized_sql_results = normalize_sql_results_for_answer(execution_result.get("sql_results")) draft_answer = (execution_result.get("answer_text") or "").strip() or None - if hasattr(self.llm_client, "compose_final_answer"): + if hasattr(llm_client, "compose_final_answer"): try: - answer_text = self.llm_client.compose_final_answer( + answer_text = llm_client.compose_final_answer( user_query=state["user_query"], intent=state["intent_decision"].intent, response_format=response_format, @@ -118,7 +115,7 @@ def _compose_final_answer_text( return answer_text except Exception: logger.exception("Dashboard chat final answer composition failed") - return self._fallback_answer_text( + return fallback_answer_text( execution_result.get("retrieved_documents") or [], normalized_sql_results, response_format=response_format, @@ -126,7 +123,7 @@ def _compose_final_answer_text( ) -def _determine_response_format( +def determine_response_format( *, user_query: str, sql_results: list[dict[str, Any]] | None, @@ -158,7 +155,7 @@ def _determine_response_format( return "text" -def _sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: +def sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: """Return table columns for frontend rendering metadata.""" if not sql_results: return [] @@ -168,32 +165,32 @@ def _sql_result_columns(sql_results: list[dict[str, Any]] | None) -> list[str]: return list(first_row.keys()) -def _build_usage_summary(self) -> dict[str, Any]: - """Collect per-turn usage from the llm client and embedding provider when supported.""" +def build_usage_summary(llm_client, vector_store) -> dict[str, Any]: + """Collect per-turn usage from the llm client and embedding provider.""" usage: dict[str, Any] = {} - if hasattr(self.llm_client, "usage_summary"): - llm_usage = self.llm_client.usage_summary() + if hasattr(llm_client, "usage_summary"): + llm_usage = llm_client.usage_summary() if llm_usage: usage["llm"] = llm_usage - if hasattr(self.vector_store, "usage_summary"): - embedding_usage = self.vector_store.usage_summary() + if hasattr(vector_store, "usage_summary"): + embedding_usage = vector_store.usage_summary() if embedding_usage: usage["embeddings"] = embedding_usage return usage -def _compose_small_talk_response(self, user_query: str) -> str: - """Generate the prototype small-talk response or fall back to a fixed helper.""" - if hasattr(self.llm_client, "compose_small_talk"): +def compose_small_talk_response(llm_client, user_query: str) -> str: + """Generate the small-talk response or fall back to a fixed helper.""" + if hasattr(llm_client, "compose_small_talk"): try: - return self.llm_client.compose_small_talk(user_query) + return llm_client.compose_small_talk(user_query) except Exception: logger.exception("Dashboard chat small-talk generation failed") return "Hi! I can help with your program data and metrics. What would you like to know?" -def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: - """Handle obvious greetings, thanks, and basic capability prompts without an llm round trip.""" +def build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | None: + """Handle obvious greetings without an LLM round trip.""" if not SMALL_TALK_FAST_PATH_PATTERN.match(user_query.strip()): return None return DashboardChatIntentDecision( @@ -203,7 +200,7 @@ def _build_fast_path_intent(user_query: str) -> DashboardChatIntentDecision | No ) -def _build_fast_path_small_talk_response(user_query: str) -> str: +def build_fast_path_small_talk_response(user_query: str) -> str: """Keep basic small-talk replies instant and deterministic.""" normalized_query = user_query.strip().lower() if "what can you do" in normalized_query: @@ -227,8 +224,8 @@ def _build_fast_path_small_talk_response(user_query: str) -> str: return "Hi. Ask me anything about this dashboard or the data behind it." -def _clarification_fallback(missing_info: Sequence[str]) -> str: - """Mirror the prototype's specific clarification nudges when the router omits a question.""" +def clarification_fallback(missing_info: Sequence[str]) -> str: + """Return a specific clarification nudge when the router omits a question.""" missing = {item.lower() for item in missing_info} prompts: list[str] = [] if "metric" in missing: @@ -242,7 +239,7 @@ def _clarification_fallback(missing_info: Sequence[str]) -> str: return "Could you clarify " + ", ".join(prompts) + "?" -def _fallback_answer_text( +def fallback_answer_text( retrieved_documents: Sequence[DashboardChatRetrievedDocument], sql_results: list[dict[str, Any]] | None, *, @@ -250,72 +247,66 @@ def _fallback_answer_text( draft_answer: str | None = None, ) -> str: """Fallback response when the model returns no final text.""" + from .retrieval import compact_snippet + if draft_answer: return draft_answer if sql_results is not None: if not sql_results: return "I didn't find any matching rows for that question." if response_format in {"text_with_table", "table"}: - return f"I found {len(sql_results)} matching rows. See the table below for the breakdown." + return ( + f"I found {len(sql_results)} matching rows. See the table below for the breakdown." + ) if len(sql_results) == 1: - return _single_row_summary(sql_results[0]) + return single_row_summary(sql_results[0]) return f"I found {len(sql_results)} matching rows." if retrieved_documents: - return _compact_snippet(retrieved_documents[0].content) + return compact_snippet(retrieved_documents[0].content) return "I couldn't find enough context to answer that." -def _single_row_summary(row: dict[str, Any]) -> str: +def single_row_summary(row: dict[str, Any]) -> str: """Return a readable fallback when one structured row is available.""" - parts = [ - f"{_humanize_column_name(column)}: {value}" - for column, value in row.items() - ] + parts = [f"{humanize_column_name(col)}: {value}" for col, value in row.items()] return "; ".join(parts) -def _humanize_column_name(column_name: str) -> str: +def humanize_column_name(column_name: str) -> str: """Convert snake_case warehouse columns into human labels.""" return str(column_name).replace("_", " ").strip().title() -def _normalize_sql_results_for_answer( - cls, +def normalize_sql_results_for_answer( sql_results: list[dict[str, Any]] | None, ) -> list[dict[str, Any]] | None: - """Normalize SQL results into llm-friendly values for final answer writing.""" + """Normalize SQL results into LLM-friendly values for final answer writing.""" if sql_results is None: return None - normalized_rows: list[dict[str, Any]] = [] - for row in sql_results: - normalized_row: dict[str, Any] = {} - for column_name, value in row.items(): - normalized_row[column_name] = cls._normalize_sql_value_for_answer( - column_name, - value, - ) - normalized_rows.append(normalized_row) - return normalized_rows + return [ + {col: normalize_sql_value_for_answer(col, val) for col, val in row.items()} + for row in sql_results + ] -def _normalize_sql_value_for_answer(cls, column_name: str, value: Any) -> Any: +def normalize_sql_value_for_answer(column_name: str, value: Any) -> Any: """Format warehouse values into user-friendly forms for answer composition.""" if value is None: return None if isinstance(value, bool): return value if isinstance(value, (int, float)): - return cls._format_numeric_answer_value(column_name, value) + return format_numeric_answer_value(column_name, value) text_value = str(value) - numeric_value = cls._parse_numeric_string(text_value) + numeric_value = parse_numeric_string(text_value) if numeric_value is None: return text_value - return cls._format_numeric_answer_value(column_name, numeric_value) + return format_numeric_answer_value(column_name, numeric_value) -def _format_numeric_answer_value(cls, column_name: str, value: float | int) -> str | int | float: +def format_numeric_answer_value(column_name: str, value: float | int) -> str | int | float: """Format numeric values for answer composition.""" - if cls._looks_like_rate_metric(column_name) and 0 <= float(value) <= 1: + if looks_like_rate_metric(column_name) and 0 <= float(value) <= 1: percentage_value = f"{float(value) * 100:.1f}".rstrip("0").rstrip(".") return f"{percentage_value}%" rounded_value = round(float(value), 2) @@ -324,13 +315,11 @@ def _format_numeric_answer_value(cls, column_name: str, value: float | int) -> s return f"{rounded_value:.2f}".rstrip("0").rstrip(".") -def _parse_numeric_string(value: str) -> float | None: +def parse_numeric_string(value: str) -> float | None: """Parse decimal-like strings emitted by DjangoJSONEncoder.""" normalized_value = value.strip() if not normalized_value: return None - import re - if not re.fullmatch(r"-?\d+(?:\.\d+)?(?:E-?\d+)?", normalized_value, flags=re.IGNORECASE): return None try: @@ -339,13 +328,10 @@ def _parse_numeric_string(value: str) -> float | None: return None -def _looks_like_rate_metric(column_name: str) -> bool: +def looks_like_rate_metric(column_name: str) -> bool: """Return whether a metric name likely represents a percentage/rate.""" normalized_column = column_name.lower() return any( token in normalized_column for token in ["rate", "ratio", "percentage", "percent", "share", "pct"] ) - - -from .retrieval import _compact_snippet diff --git a/ddpui/core/dashboard_chat/orchestration/retrieval.py b/ddpui/core/dashboard_chat/orchestration/retrieval.py index f6b89a675..f1ba55448 100644 --- a/ddpui/core/dashboard_chat/orchestration/retrieval.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval.py @@ -19,8 +19,9 @@ ) -def _retrieve_vector_documents( - self, +def retrieve_vector_documents( + vector_store, + runtime_config, *, org, collection_name: str | None, @@ -33,10 +34,10 @@ def _retrieve_vector_documents( if not source_types: return [] - results = self.vector_store.query( + results = vector_store.query( org.id, query_text=query_text, - n_results=self.runtime_config.retrieval_limit, + n_results=runtime_config.retrieval_limit, source_types=source_types, dashboard_id=dashboard_id, query_embedding=query_embedding, @@ -55,27 +56,26 @@ def _retrieve_vector_documents( ] -def _filter_allowlisted_dbt_results( +def filter_allowlisted_dbt_results( results: Sequence[DashboardChatRetrievedDocument], allowlist: DashboardChatAllowlist, ) -> list[DashboardChatRetrievedDocument]: """Keep only dbt docs that belong to the dashboard lineage.""" filtered_results: list[DashboardChatRetrievedDocument] = [] for result in results: - unique_id = _unique_id_from_source_identifier(result.source_identifier) + unique_id = unique_id_from_source_identifier(result.source_identifier) if allowlist.is_unique_id_allowed(unique_id): filtered_results.append(result) return filtered_results -def _dedupe_retrieved_documents( +def dedupe_retrieved_documents( results: Sequence[DashboardChatRetrievedDocument], ) -> list[DashboardChatRetrievedDocument]: """Deduplicate retrieved documents while preserving better-ranked items.""" - scored_results: list[tuple[float, DashboardChatRetrievedDocument]] = [] - for result in results: - scored_results.append((result.distance if result.distance is not None else 999.0, result)) - + scored_results = [ + (result.distance if result.distance is not None else 999.0, result) for result in results + ] merged_results: list[DashboardChatRetrievedDocument] = [] seen_document_ids: set[str] = set() for _, result in sorted(scored_results, key=lambda item: item[0]): @@ -86,8 +86,7 @@ def _dedupe_retrieved_documents( return merged_results -def _build_citations( - self, +def build_citations( *, retrieved_documents: Sequence[DashboardChatRetrievedDocument], dashboard_export: dict[str, Any], @@ -112,13 +111,13 @@ def _build_citations( DashboardChatCitation( source_type=document.source_type, source_identifier=document.source_identifier, - title=self._citation_title( + title=citation_title( document=document, dashboard_title=dashboard_title, chart_lookup=chart_lookup, table_name=table_name, ), - snippet=_compact_snippet(document.content), + snippet=compact_snippet(document.content), dashboard_id=document.dashboard_id, table_name=table_name, ) @@ -126,7 +125,7 @@ def _build_citations( return citations -def _citation_title( +def citation_title( *, document: DashboardChatRetrievedDocument, dashboard_title: str, @@ -150,23 +149,22 @@ def _citation_title( return document.source_identifier -def _compact_snippet(content: str, max_length: int = 220) -> str: - """Collapse whitespace and trim long snippets for citations and suggestions.""" +def compact_snippet(content: str, max_length: int = 220) -> str: + """Collapse whitespace and trim long snippets for citations.""" normalized = " ".join(content.split()) if len(normalized) <= max_length: return normalized return normalized[: max_length - 3].rstrip() + "..." -def _build_tool_document_payload( - self, +def build_tool_document_payload( document: DashboardChatRetrievedDocument, allowlist: DashboardChatAllowlist, dashboard_export: dict[str, Any], ) -> dict[str, Any]: - """Convert a runtime retrieval result into the prototype tool payload shape.""" + """Convert a runtime retrieval result into the tool payload shape.""" metadata: dict[str, Any] = { - "type": self._prototype_doc_type(document.source_type), + "type": prototype_doc_type(document.source_type), "source_type": document.source_type, "source_identifier": document.source_identifier, } @@ -174,9 +172,9 @@ def _build_tool_document_payload( if chart_id is not None: metadata["chart_id"] = chart_id metadata["dashboard_id"] = document.dashboard_id - chart_metadata = self._build_chart_tool_metadata(chart_id, dashboard_export) - if chart_metadata: - metadata.update(chart_metadata) + chart_meta = build_chart_tool_metadata(chart_id, dashboard_export) + if chart_meta: + metadata.update(chart_meta) unique_id = unique_id_from_source_identifier(document.source_identifier) if unique_id: metadata["dbt_unique_id"] = unique_id @@ -189,18 +187,13 @@ def _build_tool_document_payload( } -def _build_chart_tool_metadata( - cls, +def build_chart_tool_metadata( chart_id: int, dashboard_export: dict[str, Any], ) -> dict[str, Any]: """Return structured chart metadata that nudges the tool loop toward exact chart fields.""" chart = next( - ( - candidate - for candidate in (dashboard_export.get("charts") or []) - if candidate.get("id") == chart_id - ), + (c for c in (dashboard_export.get("charts") or []) if c.get("id") == chart_id), None, ) if chart is None: @@ -210,25 +203,25 @@ def _build_chart_tool_metadata( chart.get("schema_name"), chart.get("table_name"), ) - metric_columns = cls._chart_metric_columns(chart) - dimension_columns = cls._chart_dimension_columns(chart) - time_column = cls._chart_time_column(chart, dimension_columns) + metric_cols = chart_metric_columns(chart) + dimension_cols = chart_dimension_columns(chart) + time_col = chart_time_column(chart, dimension_cols) payload: dict[str, Any] = { "chart_title": str(chart.get("title") or ""), "chart_type": str(chart.get("chart_type") or ""), } if preferred_table: payload["preferred_table"] = preferred_table - if metric_columns: - payload["metric_columns"] = metric_columns - if dimension_columns: - payload["dimension_columns"] = dimension_columns - if time_column: - payload["time_column"] = time_column + if metric_cols: + payload["metric_columns"] = metric_cols + if dimension_cols: + payload["dimension_columns"] = dimension_cols + if time_col: + payload["time_column"] = time_col return payload -def _prototype_doc_type(source_type: str) -> str: +def prototype_doc_type(source_type: str) -> str: """Map Dalgo source types into the prototype doc-type vocabulary.""" if source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: return "chart" @@ -240,7 +233,7 @@ def _prototype_doc_type(source_type: str) -> str: return "context" -def _chart_metric_columns(cls, chart: dict[str, Any]) -> list[str]: +def chart_metric_columns(chart: dict[str, Any]) -> list[str]: """Extract the most likely metric columns from one chart export payload.""" extra_config = chart.get("extra_config") or {} metrics: list[str] = [] @@ -268,7 +261,7 @@ def _chart_metric_columns(cls, chart: dict[str, Any]) -> list[str]: return list(dict.fromkeys(metrics)) -def _chart_dimension_columns(cls, chart: dict[str, Any]) -> list[str]: +def chart_dimension_columns(chart: dict[str, Any]) -> list[str]: """Extract dimension-like fields from one chart export payload.""" extra_config = chart.get("extra_config") or {} dimensions: list[str] = [] @@ -282,8 +275,7 @@ def _chart_dimension_columns(cls, chart: dict[str, Any]) -> list[str]: return list(dict.fromkeys(dimensions)) -def _chart_time_column( - cls, +def chart_time_column( chart: dict[str, Any], dimension_columns: Sequence[str], ) -> str | None: @@ -294,15 +286,26 @@ def _chart_time_column( if isinstance(value, str) and value.strip(): return value.strip() for dimension in dimension_columns: - if cls._looks_like_time_dimension(dimension): + if looks_like_time_dimension(dimension): return dimension return None -def _looks_like_time_dimension(column_name: str) -> bool: +def looks_like_time_dimension(column_name: str) -> bool: """Return whether a dimension name probably represents time bucketing.""" normalized_column = column_name.lower() return any( token in normalized_column for token in ["date", "day", "week", "month", "quarter", "year", "time"] ) + + +def get_cached_query_embedding( + vector_store, + query_text: str, + embedding_cache: dict[str, list[float]], +) -> list[float]: + """Cache embeddings per query string during one turn.""" + if query_text not in embedding_cache: + embedding_cache[query_text] = vector_store.embed_query(query_text) + return embedding_cache[query_text] diff --git a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py index 4be9b435d..19609b704 100644 --- a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py +++ b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py @@ -21,11 +21,11 @@ from .state import DashboardChatRuntimeState -def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: +def load_session_snapshot(state: DashboardChatRuntimeState) -> dict[str, Any]: """Return the current session's frozen dashboard context snapshot.""" session_id = state.get("session_id") if not session_id: - return self._build_session_snapshot(state) + return build_session_snapshot(state) cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) cached_snapshot = cache.get(cache_key) @@ -38,7 +38,7 @@ def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, "distinct_cache": deserialize_distinct_cache(cached_snapshot.get("distinct_cache")), } - snapshot = self._build_session_snapshot(state) + snapshot = build_session_snapshot(state) cache.set( cache_key, { @@ -53,7 +53,7 @@ def _load_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, return snapshot -def _build_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, Any]: +def build_session_snapshot(state: DashboardChatRuntimeState) -> dict[str, Any]: """Build one session-stable snapshot of dashboard-specific runtime context.""" dashboard_export = DashboardService.export_dashboard_context( state["dashboard_id"], @@ -66,18 +66,14 @@ def _build_session_snapshot(self, state: DashboardChatRuntimeState) -> dict[str, ) return { "dashboard_export": dashboard_export, - "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index( - manifest_json, - allowlist, - ), + "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index(manifest_json, allowlist), "allowlist": allowlist, "schema_cache": {}, "distinct_cache": set(), } -def _persist_session_schema_cache( - self, +def persist_session_schema_cache( state: DashboardChatRuntimeState, schema_cache: dict[str, DashboardChatSchemaSnippet], ) -> None: @@ -96,8 +92,7 @@ def _persist_session_schema_cache( state["session_schema_cache"] = dict(schema_cache) -def _persist_session_distinct_cache( - self, +def persist_session_distinct_cache( state: DashboardChatRuntimeState, distinct_cache: set[tuple[str, str, str]], ) -> None: diff --git a/ddpui/core/dashboard_chat/orchestration/sql_execution.py b/ddpui/core/dashboard_chat/orchestration/sql_execution.py index 7006800ff..62b08e479 100644 --- a/ddpui/core/dashboard_chat/orchestration/sql_execution.py +++ b/ddpui/core/dashboard_chat/orchestration/sql_execution.py @@ -1,6 +1,5 @@ """SQL execution and guardrail helpers for dashboard chat graph execution.""" -from collections.abc import Sequence import json import re from typing import Any @@ -11,11 +10,32 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from .conversation import extract_requested_follow_up_dimension +from .sql_parsing import ( + table_references, + resolve_identifier_table, + tables_with_column, + extract_text_filter_values, + find_tables_with_column, + primary_table_name, + referenced_sql_identifier_refs, + resolve_table_qualifier, + best_table_for_missing_columns, + structural_dimensions_from_sql, + normalize_dimension_name, +) from .state import DashboardChatRuntimeState +from .tool_handlers import ( + get_turn_warehouse_tools, + get_cached_schema_snippets, + has_validated_distinct_value, + is_text_type, + record_validated_distinct_values, + record_validated_filters_from_sql, +) -def _validate_sql_allowlist( - self, +def validate_sql_allowlist( sql: str, allowlist: DashboardChatAllowlist, ) -> dict[str, Any]: @@ -37,8 +57,9 @@ def _validate_sql_allowlist( return {"valid": True, "invalid_tables": [], "message": ""} -def _run_sql_with_distinct_guard( - self, +def run_sql_with_distinct_guard( + warehouse_tools_factory, + runtime_config, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -48,7 +69,7 @@ def _run_sql_with_distinct_guard( if not sql: return {"error": "sql_missing", "message": "SQL is required"} - allowlist_validation = self._validate_sql_allowlist(sql, state["allowlist"]) + allowlist_validation = validate_sql_allowlist(sql, state["allowlist"]) if not allowlist_validation["valid"]: return { "error": "table_not_allowed", @@ -56,14 +77,18 @@ def _run_sql_with_distinct_guard( "message": allowlist_validation["message"], } - follow_up_dimension_validation = self._validate_follow_up_dimension_usage( + follow_up_dimension_validation = validate_follow_up_dimension_usage( + warehouse_tools_factory, sql=sql, state=state, execution_context=execution_context, ) if follow_up_dimension_validation is not None: return follow_up_dimension_validation - missing_distinct = self._missing_distinct(sql, state, execution_context) + + missing_distinct = check_missing_distinct( + warehouse_tools_factory, sql, state, execution_context + ) if missing_distinct: return { "error": "must_fetch_distinct_values", @@ -75,7 +100,7 @@ def _run_sql_with_distinct_guard( validation = DashboardChatSqlGuard( allowlist=state["allowlist"], - max_rows=self.runtime_config.max_query_rows, + max_rows=runtime_config.max_query_rows, ).validate(sql) execution_context["last_sql_validation"] = validation if not validation.is_valid or not validation.sanitized_sql: @@ -85,7 +110,8 @@ def _run_sql_with_distinct_guard( "warnings": validation.warnings, } - missing_columns = self._missing_columns_in_primary_table( + missing_columns = missing_columns_in_primary_table( + warehouse_tools_factory, sql=validation.sanitized_sql, state=state, execution_context=execution_context, @@ -95,14 +121,14 @@ def _run_sql_with_distinct_guard( execution_context["last_sql"] = validation.sanitized_sql try: - rows = self._get_turn_warehouse_tools( + rows = get_turn_warehouse_tools( + warehouse_tools_factory, execution_context, state["org"], - ).execute_sql( - validation.sanitized_sql - ) + ).execute_sql(validation.sanitized_sql) except Exception as error: - structured_error = self._structured_sql_execution_error( + structured_error = structured_sql_execution_error( + warehouse_tools_factory, sql=validation.sanitized_sql, error=error, state=state, @@ -118,7 +144,7 @@ def _run_sql_with_distinct_guard( serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) execution_context["last_sql_results"] = serialized_rows - self._record_validated_filters_from_sql( + record_validated_filters_from_sql( state=state, execution_context=execution_context, sql=validation.sanitized_sql, @@ -133,38 +159,37 @@ def _run_sql_with_distinct_guard( } -def _missing_columns_in_primary_table( - self, +def missing_columns_in_primary_table( + warehouse_tools_factory, *, sql: str, state: DashboardChatRuntimeState, execution_context: dict[str, Any], ) -> dict[str, Any] | None: """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" - table_references = self._table_references(sql) + table_refs = table_references(sql) referenced_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") + reference["table_name"] for reference in table_refs if reference.get("table_name") ] if not referenced_tables: return None - schema_cache = self._get_cached_schema_snippets( + schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, state, execution_context, tables=referenced_tables, ) - all_schema_cache = self._get_cached_schema_snippets(state, execution_context) + all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) missing_columns_by_table: dict[str, set[str]] = {} candidate_tables_by_column: dict[str, list[str]] = {} tables_in_query = list(dict.fromkeys(referenced_tables)) - for qualifier, column_name in self._referenced_sql_identifier_refs(sql): - resolved_table = self._resolve_identifier_table( + for qualifier, column_name in referenced_sql_identifier_refs(sql): + resolved_table = resolve_identifier_table( qualifier=qualifier, column_name=column_name, - table_references=table_references, + table_refs=table_refs, schema_cache=schema_cache, ) if resolved_table is not None: @@ -172,46 +197,33 @@ def _missing_columns_in_primary_table( if qualifier is not None: target_table = ( - self._resolve_table_qualifier(qualifier, table_references) - or self._primary_table_name(sql) + resolve_table_qualifier(qualifier, table_refs) + or primary_table_name(sql) or tables_in_query[0] ) else: - matching_tables = self._tables_with_column( - column_name, - tables_in_query, - schema_cache, - ) + matching_tables = tables_with_column(column_name, tables_in_query, schema_cache) if len(matching_tables) > 1: continue - target_table = self._primary_table_name(sql) or tables_in_query[0] + target_table = primary_table_name(sql) or tables_in_query[0] missing_columns_by_table.setdefault(target_table, set()).add(column_name) - candidate_tables_by_column[column_name] = self._find_tables_with_column( + candidate_tables_by_column[column_name] = find_tables_with_column( column_name, all_schema_cache, ) missing_columns = sorted( - { - column_name - for columns in missing_columns_by_table.values() - for column_name in columns - } + {column_name for columns in missing_columns_by_table.values() for column_name in columns} ) if not missing_columns: return None - primary_table = self._primary_table_name(sql) or tables_in_query[0] + primary = primary_table_name(sql) or tables_in_query[0] target_table = ( - next(iter(missing_columns_by_table)) - if len(missing_columns_by_table) == 1 - else primary_table - ) - best_table = self._best_table_for_missing_columns( - missing_columns, - all_schema_cache, + next(iter(missing_columns_by_table)) if len(missing_columns_by_table) == 1 else primary ) + best_table = best_table_for_missing_columns(missing_columns, all_schema_cache) message = ( f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." @@ -233,8 +245,8 @@ def _missing_columns_in_primary_table( return result -def _structured_sql_execution_error( - self, +def structured_sql_execution_error( + warehouse_tools_factory, *, sql: str, error: Exception, @@ -250,11 +262,11 @@ def _structured_sql_execution_error( ) if missing_column_match: missing_column = missing_column_match.group(1).lower() - schema_cache = self._get_cached_schema_snippets(state, execution_context) - candidate_tables = self._find_tables_with_column(missing_column, schema_cache) + schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) + candidate_tables = find_tables_with_column(missing_column, schema_cache) return { "error": "column_not_in_table", - "table": self._primary_table_name(sql), + "table": primary_table_name(sql), "column": missing_column, "missing_columns": [missing_column], "candidates": candidate_tables, @@ -269,8 +281,8 @@ def _structured_sql_execution_error( return None -def _validate_follow_up_dimension_usage( - self, +def validate_follow_up_dimension_usage( + warehouse_tools_factory, *, sql: str, state: DashboardChatRuntimeState, @@ -283,25 +295,25 @@ def _validate_follow_up_dimension_usage( if intent_decision.follow_up_context.follow_up_type != "add_dimension": return None - requested_dimension = self._extract_requested_follow_up_dimension( + requested_dimension = extract_requested_follow_up_dimension( intent_decision.follow_up_context.modification_instruction or state["user_query"] ) if not requested_dimension: return None previous_sql = state["conversation_context"].last_sql_query or "" - current_dimensions = self._structural_dimensions_from_sql(sql) - previous_dimensions = self._structural_dimensions_from_sql(previous_sql) - normalized_requested_dimension = self._normalize_dimension_name(requested_dimension) + current_dimensions = structural_dimensions_from_sql(sql) + previous_dimensions = structural_dimensions_from_sql(previous_sql) + normalized_requested_dimension = normalize_dimension_name(requested_dimension) if ( normalized_requested_dimension in current_dimensions and normalized_requested_dimension not in previous_dimensions ): return None - candidate_tables = self._find_tables_with_column( + candidate_tables = find_tables_with_column( requested_dimension, - self._get_cached_schema_snippets(state, execution_context), + get_cached_schema_snippets(warehouse_tools_factory, state, execution_context), ) return { "error": "requested_dimension_missing", @@ -316,8 +328,8 @@ def _validate_follow_up_dimension_usage( } -def _missing_distinct( - self, +def check_missing_distinct( + warehouse_tools_factory, sql: str, state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -331,59 +343,51 @@ def _missing_distinct( if not where_match: return [] - table_references = self._table_references(sql) + table_refs = table_references(sql) query_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") + reference["table_name"] for reference in table_refs if reference.get("table_name") ] if not query_tables: return [] - primary_table = self._primary_table_name(sql) or query_tables[0] + primary = primary_table_name(sql) or query_tables[0] - full_schema_cache = self._get_cached_schema_snippets( + full_schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, state, execution_context, tables=query_tables, ) - all_schema_cache = self._get_cached_schema_snippets(state, execution_context) + all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) column_types = { table_name: { - str(column.get("name") or "").lower(): str( - column.get("data_type") or column.get("type") or "" - ).lower() + str(column.get("name") or "") + .lower(): str(column.get("data_type") or column.get("type") or "") + .lower() for column in getattr(snippet, "columns", []) } for table_name, snippet in full_schema_cache.items() } missing: list[dict[str, Any]] = [] - for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): normalized_column = column_name.lower() - resolved_table = self._resolve_identifier_table( + resolved_table = resolve_identifier_table( qualifier=qualifier, column_name=normalized_column, - table_references=table_references, + table_refs=table_refs, schema_cache=full_schema_cache, ) if resolved_table is None and qualifier is None: - matching_tables = self._tables_with_column( - normalized_column, - query_tables, - full_schema_cache, - ) + matching_tables = tables_with_column(normalized_column, query_tables, full_schema_cache) if len(matching_tables) > 1: continue if resolved_table is None: - candidate_tables = self._find_tables_with_column( - normalized_column, - all_schema_cache, - ) + candidate_tables = find_tables_with_column(normalized_column, all_schema_cache) if qualifier is None and candidate_tables: continue missing.append( { - "table": primary_table, + "table": primary, "column": column_name, "error": "column_not_in_table", "candidates": candidate_tables, @@ -393,127 +397,13 @@ def _missing_distinct( data_type = column_types.get(resolved_table, {}).get(normalized_column, "") if not data_type: continue - if not self._is_text_type(data_type): + if not is_text_type(data_type): continue - if ( - not self._has_validated_distinct_value( - execution_context["distinct_cache"], - table_name=resolved_table, - column_name=normalized_column, - value=value, - ) + if not has_validated_distinct_value( + execution_context["distinct_cache"], + table_name=resolved_table, + column_name=normalized_column, + value=value, ): - missing.append( - {"table": resolved_table, "column": column_name, "value": value} - ) + missing.append({"table": resolved_table, "column": column_name, "value": value}) return missing - - -def _normalize_distinct_value(value: Any) -> str: - """Normalize one distinct value for exact cache lookups.""" - return str(value).strip().lower() - - -def _has_validated_distinct_value( - cls, - distinct_cache: set[tuple[Any, ...]], - *, - table_name: str, - column_name: str, - value: Any, -) -> bool: - """Return whether this exact text filter value was already validated in-session.""" - normalized_value = cls._normalize_distinct_value(value) - normalized_column = column_name.lower() - normalized_table = table_name.lower() - return ( - (normalized_table, normalized_column, normalized_value) in distinct_cache - or ("*", normalized_column, normalized_value) in distinct_cache - or (normalized_table, normalized_column) in distinct_cache - or ("*", normalized_column) in distinct_cache - ) - - -def _is_text_type(data_type: str) -> bool: - """Treat common string-like warehouse types as requiring distinct-value lookup.""" - return any(text_token in data_type for text_token in ["char", "text", "string", "varchar"]) - - -def _record_validated_distinct_values( - self, - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - table_name: str, - column_name: str, - values: Sequence[Any], -) -> None: - """Persist exact validated filter values for the current session.""" - normalized_table = table_name.lower() - normalized_column = column_name.lower() - distinct_cache = execution_context["distinct_cache"] - for value in values: - normalized_value = self._normalize_distinct_value(value) - distinct_cache.add((normalized_table, normalized_column, normalized_value)) - distinct_cache.add(("*", normalized_column, normalized_value)) - self._persist_session_distinct_cache(state, distinct_cache) - - -def _record_validated_filters_from_sql( - self, - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - sql: str, -) -> None: - """Seed exact validated filter values from a successful SQL statement.""" - table_references = self._table_references(sql) - if not table_references: - return - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return - - query_tables = [ - reference["table_name"] - for reference in table_references - if reference.get("table_name") - ] - schema_cache = dict(execution_context.get("schema_cache") or {}) - values_by_target: dict[tuple[str, str], list[str]] = {} - for qualifier, column_name, value in self._extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = self._resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_references=table_references, - schema_cache=schema_cache, - ) - if resolved_table is None and qualifier is None: - if schema_cache: - matching_tables = self._tables_with_column( - normalized_column, - query_tables, - schema_cache, - ) - if len(matching_tables) == 1: - resolved_table = matching_tables[0] - elif len(query_tables) == 1: - resolved_table = query_tables[0] - values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) - - if not values_by_target: - return - - for (table_name, column_name), values in values_by_target.items(): - self._record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=table_name, - column_name=column_name, - values=values, - ) diff --git a/ddpui/core/dashboard_chat/orchestration/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/sql_parsing.py index d09d8fe01..de8dfcf4e 100644 --- a/ddpui/core/dashboard_chat/orchestration/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/sql_parsing.py @@ -9,7 +9,7 @@ from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -def _primary_table_name(sql: str) -> str | None: +def primary_table_name(sql: str) -> str | None: """Return the primary FROM table for single-query correction logic.""" table_match = re.search(r"\bFROM\s+([`\"]?)([\w\.]+)\1", sql, re.IGNORECASE) if not table_match: @@ -17,7 +17,7 @@ def _primary_table_name(sql: str) -> str | None: return normalize_dashboard_chat_table_name(table_match.group(2)) -def _table_references(cls, sql: str) -> list[dict[str, str | None]]: +def table_references(sql: str) -> list[dict[str, str | None]]: """Return normalized FROM/JOIN table references and aliases from one SQL statement.""" references: list[dict[str, str | None]] = [] for match in re.finditer( @@ -39,16 +39,15 @@ def _table_references(cls, sql: str) -> list[dict[str, str | None]]: return references -def _resolve_table_qualifier( - cls, +def resolve_table_qualifier( qualifier: str, - table_references: Sequence[dict[str, str | None]], + table_refs: Sequence[dict[str, str | None]], ) -> str | None: """Resolve a qualifier like `f` or `analytics_table` to one query table.""" normalized_qualifier = qualifier.lower().strip().strip('`"') matches = [ str(reference["table_name"]) - for reference in table_references + for reference in table_refs if normalized_qualifier in { str(reference.get("alias") or ""), @@ -62,16 +61,14 @@ def _resolve_table_qualifier( return None -def _table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: +def table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: """Return the normalized column names available on one schema snippet.""" return { - str(column.get("name") or "").lower() - for column in getattr(snippet, "columns", []) or [] + str(column.get("name") or "").lower() for column in getattr(snippet, "columns", []) or [] } -def _tables_with_column( - cls, +def tables_with_column( column_name: str, table_names: Sequence[str], schema_cache: dict[str, Any], @@ -81,78 +78,36 @@ def _tables_with_column( return [ table_name for table_name in table_names - if normalized_column_name in cls._table_columns(schema_cache.get(table_name)) + if normalized_column_name in table_columns(schema_cache.get(table_name)) ] -def _resolve_identifier_table( - cls, +def resolve_identifier_table( *, qualifier: str | None, column_name: str, - table_references: Sequence[dict[str, str | None]], + table_refs: Sequence[dict[str, str | None]], schema_cache: dict[str, Any], ) -> str | None: """Resolve one referenced column to a concrete query table when it is unambiguous.""" if qualifier is not None: - resolved_table = cls._resolve_table_qualifier(qualifier, table_references) + resolved_table = resolve_table_qualifier(qualifier, table_refs) if not resolved_table: return None - if column_name.lower() in cls._table_columns(schema_cache.get(resolved_table)): + if column_name.lower() in table_columns(schema_cache.get(resolved_table)): return resolved_table return None query_tables = [ - str(reference["table_name"]) - for reference in table_references - if reference.get("table_name") + str(reference["table_name"]) for reference in table_refs if reference.get("table_name") ] - matching_tables = cls._tables_with_column(column_name, query_tables, schema_cache) + matching_tables = tables_with_column(column_name, query_tables, schema_cache) if len(matching_tables) == 1: return matching_tables[0] return None -def _referenced_sql_identifier_refs(cls, sql: str) -> list[tuple[str | None, str]]: - """Extract likely physical identifier references from the outer SQL.""" - table_aliases = { - alias.lower() - for alias in re.findall( - r"\b(?:FROM|JOIN)\s+[`\"]?[\w\.]+[`\"]?(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", - sql, - flags=re.IGNORECASE, - ) - if alias - } - select_aliases = cls._select_aliases(sql) - referenced_identifiers: list[tuple[str | None, str]] = [] - - select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) - if select_clause: - for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): - referenced_identifiers.extend( - cls._extract_identifier_refs_from_sql_segment(expression, table_aliases) - ) - - for pattern in [ - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", - r"\bORDER\s+BY\s+(.+?)(?:\bLIMIT\b|$)", - ]: - match = re.search(pattern, sql, flags=re.IGNORECASE | re.DOTALL) - if match: - referenced_identifiers.extend( - cls._extract_identifier_refs_from_sql_segment( - match.group(1), - table_aliases, - ignored_identifiers=select_aliases, - ) - ) - - return list(dict.fromkeys(referenced_identifiers)) - - -def _select_aliases(sql: str) -> set[str]: +def select_aliases(sql: str) -> set[str]: """Return aliases introduced by the outer SELECT clause.""" select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) if not select_clause: @@ -170,7 +125,7 @@ def _select_aliases(sql: str) -> set[str]: return aliases -def _extract_identifier_refs_from_sql_segment( +def extract_identifier_refs_from_sql_segment( segment: str, table_aliases: set[str], ignored_identifiers: set[str] | None = None, @@ -236,22 +191,59 @@ def _extract_identifier_refs_from_sql_segment( return identifiers -def _best_table_for_missing_columns( +def referenced_sql_identifier_refs(sql: str) -> list[tuple[str | None, str]]: + """Extract likely physical identifier references from the outer SQL.""" + table_aliases = { + alias.lower() + for alias in re.findall( + r"\b(?:FROM|JOIN)\s+[`\"]?[\w\.]+[`\"]?(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?", + sql, + flags=re.IGNORECASE, + ) + if alias + } + sql_select_aliases = select_aliases(sql) + referenced_identifiers: list[tuple[str | None, str]] = [] + + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) + if select_clause: + for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): + referenced_identifiers.extend( + extract_identifier_refs_from_sql_segment(expression, table_aliases) + ) + + for pattern in [ + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + r"\bGROUP\s+BY\s+(.+?)(?:\bORDER\b|\bLIMIT\b|$)", + r"\bORDER\s+BY\s+(.+?)(?:\bLIMIT\b|$)", + ]: + match = re.search(pattern, sql, flags=re.IGNORECASE | re.DOTALL) + if match: + referenced_identifiers.extend( + extract_identifier_refs_from_sql_segment( + match.group(1), + table_aliases, + ignored_identifiers=sql_select_aliases, + ) + ) + + return list(dict.fromkeys(referenced_identifiers)) + + +def best_table_for_missing_columns( missing_columns: Sequence[str], schema_cache: dict[str, Any], ) -> str | None: """Return the first allowlisted table that covers all missing columns.""" wanted_columns = {column_name.lower() for column_name in missing_columns} for table_name, snippet in schema_cache.items(): - available_columns = { - str(column.get("name") or "").lower() for column in snippet.columns - } + available_columns = {str(column.get("name") or "").lower() for column in snippet.columns} if wanted_columns.issubset(available_columns): return table_name return None -def _extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, str]]: +def extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, str]]: """Extract quoted text filter values from one WHERE clause.""" extracted_values: list[tuple[str | None, str, str]] = [] for qualifier, column_name, value in re.findall( @@ -269,13 +261,11 @@ def _extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str qualifier = match.group(1) column_name = match.group(2) for value in re.findall(r"'([^']+)'", match.group(3)): - extracted_values.append( - (qualifier.lower() if qualifier else None, column_name, value) - ) + extracted_values.append((qualifier.lower() if qualifier else None, column_name, value)) return extracted_values -def _find_tables_with_column( +def find_tables_with_column( column_name: str, schema_cache: dict[str, Any], limit: int = 10, @@ -294,27 +284,24 @@ def _find_tables_with_column( return matches -def _structural_dimensions_from_sql(cls, sql: str) -> set[str]: +def structural_dimensions_from_sql(sql: str) -> set[str]: """Return normalized non-aggregate dimensions used by one SQL statement.""" + from .conversation import extract_dimensions_from_sql + if not sql: return set() dimensions: set[str] = set() - for dimension in cls._extract_dimensions_from_sql(sql): - identifier_refs = cls._extract_identifier_refs_from_sql_segment( - dimension, - table_aliases=set(), - ) + for dimension in extract_dimensions_from_sql(sql): + identifier_refs = extract_identifier_refs_from_sql_segment(dimension, table_aliases=set()) if identifier_refs: - dimensions.update( - cls._normalize_dimension_name(column_name) - for _, column_name in identifier_refs - ) + dimensions.update(normalize_dimension_name(col) for _, col in identifier_refs) continue - dimensions.add(cls._normalize_dimension_name(dimension)) + dimensions.add(normalize_dimension_name(dimension)) + select_clause = DashboardChatSqlGuard._extract_outer_select_clause(sql) if not select_clause: - return {dimension for dimension in dimensions if dimension} + return {d for d in dimensions if d} for expression in DashboardChatSqlGuard._split_select_expressions(select_clause): normalized_expression = expression.strip() @@ -322,16 +309,17 @@ def _structural_dimensions_from_sql(cls, sql: str) -> set[str]: normalized_expression ): continue - for _, column_name in cls._extract_identifier_refs_from_sql_segment( + for _, column_name in extract_identifier_refs_from_sql_segment( normalized_expression, table_aliases=set(), - ignored_identifiers=cls._select_aliases(sql), + ignored_identifiers=select_aliases(sql), ): - dimensions.add(cls._normalize_dimension_name(column_name)) - return {dimension for dimension in dimensions if dimension} + dimensions.add(normalize_dimension_name(column_name)) + + return {d for d in dimensions if d} -def _normalize_dimension_name(value: str) -> str: +def normalize_dimension_name(value: str) -> str: """Normalize dimension names from SQL expressions and natural-language follow-ups.""" normalized_value = value.strip().strip('`"').lower() normalized_value = normalized_value.split(".")[-1] diff --git a/ddpui/core/dashboard_chat/orchestration/tool_handlers.py b/ddpui/core/dashboard_chat/orchestration/tool_handlers.py index d82cbb731..b36ad8c2b 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_handlers.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_handlers.py @@ -1,7 +1,8 @@ -"""Tool handlers and turn-scoped tool helpers for dashboard chat.""" +"""Tool handlers and execution-context cache helpers for dashboard chat.""" from collections.abc import Sequence import logging +import re from typing import Any from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard @@ -9,13 +10,218 @@ from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org +from .retrieval import ( + retrieve_vector_documents, + filter_allowlisted_dbt_results, + dedupe_retrieved_documents, + build_tool_document_payload, + get_cached_query_embedding, +) +from .session_snapshot import persist_session_schema_cache, persist_session_distinct_cache +from .sql_parsing import ( + table_references as sql_table_references, + resolve_identifier_table, + tables_with_column, + extract_text_filter_values, + find_tables_with_column, +) from .state import DashboardChatRuntimeState logger = logging.getLogger(__name__) -def _handle_retrieve_docs_tool( - self, +# --------------------------------------------------------------------------- +# Warehouse tools (lazily initialized per-turn) +# --------------------------------------------------------------------------- + + +def get_turn_warehouse_tools( + warehouse_tools_factory, + execution_context: dict[str, Any], + org: Org, +) -> DashboardChatWarehouseTools: + """Build the warehouse tool helper lazily for the turn.""" + warehouse_tools = execution_context.get("warehouse_tools") + if warehouse_tools is None: + warehouse_tools = warehouse_tools_factory(org) + execution_context["warehouse_tools"] = warehouse_tools + return warehouse_tools + + +# --------------------------------------------------------------------------- +# Schema snippet cache +# --------------------------------------------------------------------------- + + +def get_cached_schema_snippets( + warehouse_tools_factory, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + tables: Sequence[str] | None = None, +) -> dict[str, Any]: + """Load and cache schema snippets for allowlisted tables.""" + requested_tables = [ + table_name.lower() + for table_name in ( + tables if tables is not None else state["allowlist"].prioritized_tables() + ) + if state["allowlist"].is_allowed(table_name) + ] + cache = execution_context["schema_cache"] + missing_tables = [table_name for table_name in requested_tables if table_name not in cache] + if missing_tables: + snippets = get_turn_warehouse_tools( + warehouse_tools_factory, + execution_context, + state["org"], + ).get_schema_snippets(missing_tables) + for table_name, snippet in snippets.items(): + cache[table_name.lower()] = snippet + if snippets: + persist_session_schema_cache(state, cache) + if tables is None: + return cache + return {table_name: cache[table_name] for table_name in requested_tables if table_name in cache} + + +# --------------------------------------------------------------------------- +# Distinct value cache helpers +# --------------------------------------------------------------------------- + + +def normalize_distinct_value(value: Any) -> str: + """Normalize one distinct value for exact cache lookups.""" + return str(value).strip().lower() + + +def has_validated_distinct_value( + distinct_cache: set[tuple[Any, ...]], + *, + table_name: str, + column_name: str, + value: Any, +) -> bool: + """Return whether this exact text filter value was already validated in-session.""" + normalized_value = normalize_distinct_value(value) + normalized_column = column_name.lower() + normalized_table = table_name.lower() + return ( + (normalized_table, normalized_column, normalized_value) in distinct_cache + or ("*", normalized_column, normalized_value) in distinct_cache + or (normalized_table, normalized_column) in distinct_cache + or ("*", normalized_column) in distinct_cache + ) + + +def is_text_type(data_type: str) -> bool: + """Treat common string-like warehouse types as requiring distinct-value lookup.""" + return any(token in data_type for token in ["char", "text", "string", "varchar"]) + + +def record_validated_distinct_values( + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + table_name: str, + column_name: str, + values: Sequence[Any], +) -> None: + """Persist exact validated filter values for the current session.""" + normalized_table = table_name.lower() + normalized_column = column_name.lower() + distinct_cache = execution_context["distinct_cache"] + for value in values: + normalized_value = normalize_distinct_value(value) + distinct_cache.add((normalized_table, normalized_column, normalized_value)) + distinct_cache.add(("*", normalized_column, normalized_value)) + persist_session_distinct_cache(state, distinct_cache) + + +def record_validated_filters_from_sql( + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + sql: str, +) -> None: + """Seed exact validated filter values from a successful SQL statement.""" + table_refs = sql_table_references(sql) + if not table_refs: + return + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return + + query_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + schema_cache = dict(execution_context.get("schema_cache") or {}) + values_by_target: dict[tuple[str, str], list[str]] = {} + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_refs=table_refs, + schema_cache=schema_cache, + ) + if resolved_table is None and qualifier is None: + if schema_cache: + matching = tables_with_column(normalized_column, query_tables, schema_cache) + if len(matching) == 1: + resolved_table = matching[0] + elif len(query_tables) == 1: + resolved_table = query_tables[0] + values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) + + for (tbl, col), vals in values_by_target.items(): + record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=tbl, + column_name=col, + values=vals, + ) + + +def seed_distinct_cache_from_previous_sql( + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> None: + """Treat text filters from the previous successful SQL as already validated for follow-ups.""" + previous_sql = state["conversation_context"].last_sql_query + if not previous_sql: + return + record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=previous_sql, + ) + + +# --------------------------------------------------------------------------- +# dbt index helper +# --------------------------------------------------------------------------- + + +def dbt_resources_by_unique_id(state: DashboardChatRuntimeState) -> dict[str, dict[str, Any]]: + """Return the allowlisted dbt index built at session start.""" + dbt_index = state.get("dbt_index") or {} + return dict(dbt_index.get("resources_by_unique_id") or {}) + + +# --------------------------------------------------------------------------- +# Tool handlers +# --------------------------------------------------------------------------- + + +def handle_retrieve_docs_tool( + vector_store, + source_config, + runtime_config, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -28,74 +234,69 @@ def _handle_retrieve_docs_tool( for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) ] retrieved_documents = [] + cached_embedding = get_cached_query_embedding( + vector_store, query, execution_context["embedding_cache"] + ) if "chart" in requested_types: retrieved_documents.extend( - self._retrieve_vector_documents( + retrieve_vector_documents( + vector_store, + runtime_config, org=state["org"], collection_name=state.get("vector_collection_name"), query_text=query, - source_types=self.source_config.filter_enabled( + source_types=source_config.filter_enabled( [DashboardChatSourceType.DASHBOARD_EXPORT] ), dashboard_id=state["dashboard_id"], - query_embedding=self._get_cached_query_embedding( - query, - execution_context["embedding_cache"], - ), + query_embedding=cached_embedding, ) ) if "context" in requested_types: retrieved_documents.extend( - self._retrieve_vector_documents( + retrieve_vector_documents( + vector_store, + runtime_config, org=state["org"], collection_name=state.get("vector_collection_name"), query_text=query, - source_types=self.source_config.filter_enabled( + source_types=source_config.filter_enabled( [DashboardChatSourceType.DASHBOARD_CONTEXT] ), dashboard_id=state["dashboard_id"], - query_embedding=self._get_cached_query_embedding( - query, - execution_context["embedding_cache"], - ), + query_embedding=cached_embedding, ) ) retrieved_documents.extend( - self._retrieve_vector_documents( + retrieve_vector_documents( + vector_store, + runtime_config, org=state["org"], collection_name=state.get("vector_collection_name"), query_text=query, - source_types=self.source_config.filter_enabled( - [DashboardChatSourceType.ORG_CONTEXT] - ), - query_embedding=self._get_cached_query_embedding( - query, - execution_context["embedding_cache"], - ), + source_types=source_config.filter_enabled([DashboardChatSourceType.ORG_CONTEXT]), + query_embedding=cached_embedding, ) ) if "dataset" in requested_types or "dbt_model" in requested_types: - dbt_results = self._retrieve_vector_documents( + dbt_results = retrieve_vector_documents( + vector_store, + runtime_config, org=state["org"], collection_name=state.get("vector_collection_name"), query_text=query, - source_types=self.source_config.filter_enabled( + source_types=source_config.filter_enabled( [ DashboardChatSourceType.DBT_MANIFEST, DashboardChatSourceType.DBT_CATALOG, ] ), - query_embedding=self._get_cached_query_embedding( - query, - execution_context["embedding_cache"], - ), - ) - retrieved_documents.extend( - self._filter_allowlisted_dbt_results(dbt_results, state["allowlist"]) + query_embedding=cached_embedding, ) + retrieved_documents.extend(filter_allowlisted_dbt_results(dbt_results, state["allowlist"])) - merged_results = self._dedupe_retrieved_documents(retrieved_documents)[:limit] + merged_results = dedupe_retrieved_documents(retrieved_documents)[:limit] for document in merged_results: if document.document_id in execution_context["retrieved_document_ids"]: continue @@ -103,18 +304,14 @@ def _handle_retrieve_docs_tool( execution_context["retrieved_documents"].append(document) docs = [ - self._build_tool_document_payload( - document, - state["allowlist"], - state["dashboard_export"], - ) + build_tool_document_payload(document, state["allowlist"], state["dashboard_export"]) for document in merged_results ] return {"docs": docs, "count": len(docs)} -def _handle_get_schema_snippets_tool( - self, +def handle_get_schema_snippets_tool( + warehouse_tools_factory, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -122,12 +319,11 @@ def _handle_get_schema_snippets_tool( """Return schema snippets for allowlisted tables only.""" requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] allowed_tables = [ - table_name - for table_name in requested_tables - if state["allowlist"].is_allowed(table_name) + table_name for table_name in requested_tables if state["allowlist"].is_allowed(table_name) ] filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) - schema_cache = self._get_cached_schema_snippets( + schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, state, execution_context, tables=allowed_tables, @@ -140,14 +336,13 @@ def _handle_get_schema_snippets_tool( response: dict[str, Any] = {"tables": tables_payload} if filtered_tables: response["filtered_tables"] = filtered_tables - response["filter_note"] = ( - f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." - ) + response[ + "filter_note" + ] = f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." return response -def _handle_search_dbt_models_tool( - self, +def handle_search_dbt_models_tool( args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -159,7 +354,7 @@ def _handle_search_dbt_models_tool( return {"models": [], "count": 0} results: list[dict[str, Any]] = [] - for node in self._dbt_resources_by_unique_id(state).values(): + for node in dbt_resources_by_unique_id(state).values(): table_name = node.get("table") haystacks = [ str(node.get("name") or ""), @@ -178,8 +373,7 @@ def _handle_search_dbt_models_tool( "database": str(node.get("database") or ""), "description": str(node.get("description") or ""), "columns": [ - str(column.get("name") or "") - for column in (node.get("columns") or []) + str(column.get("name") or "") for column in (node.get("columns") or []) ][:20], "table": table_name, } @@ -190,8 +384,7 @@ def _handle_search_dbt_models_tool( return {"models": results, "count": len(results)} -def _handle_get_dbt_model_info_tool( - self, +def handle_get_dbt_model_info_tool( args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -203,7 +396,7 @@ def _handle_get_dbt_model_info_tool( matched_unique_id: str | None = None matched_node: dict[str, Any] | None = None - for unique_id, node in self._dbt_resources_by_unique_id(state).items(): + for unique_id, node in dbt_resources_by_unique_id(state).items(): table_name = node.get("table") candidates = { str(node.get("name") or "").lower(), @@ -229,8 +422,8 @@ def _handle_get_dbt_model_info_tool( } -def _handle_get_distinct_values_tool( - self, +def handle_get_distinct_values_tool( + warehouse_tools_factory, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -243,18 +436,16 @@ def _handle_get_distinct_values_tool( return { "error": "table_not_allowed", "table": table_name, - "message": ( - f"Table {table_name} is not accessible in the current dashboard context." - ), + "message": (f"Table {table_name} is not accessible in the current dashboard context."), } - schema_cache = self._get_cached_schema_snippets(state, execution_context) + schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) snippet = schema_cache.get(table_name) normalized_column_name = column_name.lower() if snippet is not None and normalized_column_name not in { str(column.get("name") or "").lower() for column in snippet.columns }: - candidates = self._find_tables_with_column(normalized_column_name, schema_cache) + candidates = find_tables_with_column(normalized_column_name, schema_cache) return { "error": "column_not_in_table", "table": table_name, @@ -266,7 +457,8 @@ def _handle_get_distinct_values_tool( ), } - values = self._get_turn_warehouse_tools( + values = get_turn_warehouse_tools( + warehouse_tools_factory, execution_context, state["org"], ).get_distinct_values( @@ -274,7 +466,7 @@ def _handle_get_distinct_values_tool( column_name=column_name, limit=limit, ) - self._record_validated_distinct_values( + record_validated_distinct_values( state=state, execution_context=execution_context, table_name=table_name, @@ -289,8 +481,8 @@ def _handle_get_distinct_values_tool( } -def _handle_list_tables_by_keyword_tool( - self, +def handle_list_tables_by_keyword_tool( + warehouse_tools_factory, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -317,7 +509,8 @@ def _handle_list_tables_by_keyword_tool( lookup_tables = direct_match_tables or allowlisted_tables if lookup_tables: try: - schema_cache = self._get_cached_schema_snippets( + schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, state, execution_context, tables=lookup_tables, @@ -364,8 +557,8 @@ def _handle_list_tables_by_keyword_tool( } -def _handle_check_table_row_count_tool( - self, +def handle_check_table_row_count_tool( + warehouse_tools_factory, args: dict[str, Any], state: DashboardChatRuntimeState, execution_context: dict[str, Any], @@ -376,9 +569,7 @@ def _handle_check_table_row_count_tool( return { "error": "table_not_allowed", "table": table_name, - "message": ( - f"Table {table_name} is not accessible in the current dashboard context." - ), + "message": (f"Table {table_name} is not accessible in the current dashboard context."), } sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" @@ -389,96 +580,12 @@ def _handle_check_table_row_count_tool( if not validation.is_valid or not validation.sanitized_sql: return {"error": "sql_validation_failed", "issues": validation.errors} - rows = self._get_turn_warehouse_tools( + rows = get_turn_warehouse_tools( + warehouse_tools_factory, execution_context, state["org"], - ).execute_sql( - validation.sanitized_sql - ) + ).execute_sql(validation.sanitized_sql) row_count = 0 if rows: row_count = int(rows[0].get("row_count") or 0) return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} - - -def _get_turn_warehouse_tools( - self, - execution_context: dict[str, Any], - org: Org, -) -> DashboardChatWarehouseTools: - """Build the warehouse tool helper lazily for the turn.""" - warehouse_tools = execution_context.get("warehouse_tools") - if warehouse_tools is None: - warehouse_tools = self.warehouse_tools_factory(org) - execution_context["warehouse_tools"] = warehouse_tools - return warehouse_tools - - -def _get_cached_schema_snippets( - self, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - tables: Sequence[str] | None = None, -) -> dict[str, Any]: - """Load and cache schema snippets for allowlisted tables.""" - requested_tables = [ - table_name.lower() - for table_name in ( - tables if tables is not None else state["allowlist"].prioritized_tables() - ) - if state["allowlist"].is_allowed(table_name) - ] - cache = execution_context["schema_cache"] - missing_tables = [table_name for table_name in requested_tables if table_name not in cache] - if missing_tables: - snippets = self._get_turn_warehouse_tools( - execution_context, - state["org"], - ).get_schema_snippets(missing_tables) - for table_name, snippet in snippets.items(): - cache[table_name.lower()] = snippet - if snippets: - self._persist_session_schema_cache(state, cache) - if tables is None: - return cache - return { - table_name: cache[table_name] - for table_name in requested_tables - if table_name in cache - } - - -def _seed_distinct_cache_from_previous_sql( - self, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> None: - """Treat text filters from the previous successful SQL as already validated for follow-ups.""" - previous_sql = state["conversation_context"].last_sql_query - if not previous_sql: - return - - self._record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=previous_sql, - ) - - -def _dbt_resources_by_unique_id( - state: DashboardChatRuntimeState, -) -> dict[str, dict[str, Any]]: - """Return the allowlisted dbt index built at session start.""" - dbt_index = state.get("dbt_index") or {} - return dict(dbt_index.get("resources_by_unique_id") or {}) - - -def _get_cached_query_embedding( - self, - query_text: str, - embedding_cache: dict[str, list[float]], -) -> list[float]: - """Cache embeddings per query string during one turn.""" - if query_text not in embedding_cache: - embedding_cache[query_text] = self.vector_store.embed_query(query_text) - return embedding_cache[query_text] diff --git a/ddpui/core/dashboard_chat/orchestration/tool_loop.py b/ddpui/core/dashboard_chat/orchestration/tool_loop.py index 7ccf3f14b..0b2a88c81 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_loop.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_loop.py @@ -9,13 +9,35 @@ from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseToolsError +from .presentation import ( + serialize_tool_result, + summarize_tool_call, + max_turns_message, + fallback_answer_text, +) +from .sql_execution import run_sql_with_distinct_guard from .state import DashboardChatRuntimeState +from .tool_handlers import ( + handle_retrieve_docs_tool, + handle_get_schema_snippets_tool, + handle_search_dbt_models_tool, + handle_get_dbt_model_info_tool, + handle_get_distinct_values_tool, + handle_list_tables_by_keyword_tool, + handle_check_table_row_count_tool, + seed_distinct_cache_from_previous_sql, +) logger = logging.getLogger(__name__) -def _execute_tool_loop( - self, +def execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, *, state: DashboardChatRuntimeState, messages: list[dict[str, Any]], @@ -40,14 +62,14 @@ def _execute_tool_loop( }, } tool_loop_started_at = perf_counter() - self._seed_distinct_cache_from_previous_sql(state, execution_context) + seed_distinct_cache_from_previous_sql(state, execution_context) intent_decision = state["intent_decision"] for turn_index in range(max_turns): tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" - ai_message = self.llm_client.run_tool_loop_turn( + ai_message = llm_client.run_tool_loop_turn( messages=messages, - tools=self.TOOL_SPECIFICATIONS, + tools=tool_specifications, tool_choice=tool_choice, operation=f"tool_loop_{intent_decision.intent.value}", ) @@ -75,10 +97,10 @@ def _execute_tool_loop( messages.append(assistant_record) if not tool_calls: - return self._build_tool_loop_result( + return build_tool_loop_result( answer_text=( (ai_message.get("content") or "").strip() - or self._fallback_answer_text( + or fallback_answer_text( execution_context["retrieved_documents"], execution_context["last_sql_results"], ) @@ -97,7 +119,11 @@ def _execute_tool_loop( except json.JSONDecodeError: args = {} tool_started_at = perf_counter() - result = self._execute_tool_call( + result = execute_tool_call( + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, tool_name=str(tool_call.get("name") or ""), args=args, state=state, @@ -106,13 +132,10 @@ def _execute_tool_loop( tool_duration_ms = round((perf_counter() - tool_started_at) * 1000, 2) tool_name = str(tool_call.get("name") or "") execution_context["timing_breakdown"]["tool_calls_ms"].append( - { - "name": tool_name, - "duration_ms": tool_duration_ms, - } + {"name": tool_name, "duration_ms": tool_duration_ms} ) execution_context["tool_calls"].append( - self._summarize_tool_call( + summarize_tool_call( tool_name=tool_name, args=args, result=result, @@ -124,21 +147,21 @@ def _execute_tool_loop( "role": "tool", "tool_call_id": tool_call.get("id"), "content": json.dumps( - self._serialize_tool_result(result), + serialize_tool_result(result), cls=DjangoJSONEncoder, ), } ) if tool_name == "run_sql_query" and result.get("success"): - return self._build_tool_loop_result( + return build_tool_loop_result( answer_text="", execution_context=execution_context, max_turns_reached=False, tool_loop_started_at=tool_loop_started_at, ) - return self._build_tool_loop_result( - answer_text=self._max_turns_message( + return build_tool_loop_result( + answer_text=max_turns_message( state["user_query"], execution_context["retrieved_documents"], ), @@ -148,8 +171,11 @@ def _execute_tool_loop( ) -def _execute_tool_call( - self, +def execute_tool_call( + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, *, tool_name: str, args: dict[str, Any], @@ -159,21 +185,33 @@ def _execute_tool_call( """Execute one prototype tool against the Dalgo runtime primitives.""" try: if tool_name == "retrieve_docs": - return self._handle_retrieve_docs_tool(args, state, execution_context) + return handle_retrieve_docs_tool( + vector_store, source_config, runtime_config, args, state, execution_context + ) if tool_name == "get_schema_snippets": - return self._handle_get_schema_snippets_tool(args, state, execution_context) + return handle_get_schema_snippets_tool( + warehouse_tools_factory, args, state, execution_context + ) if tool_name == "search_dbt_models": - return self._handle_search_dbt_models_tool(args, state, execution_context) + return handle_search_dbt_models_tool(args, state, execution_context) if tool_name == "get_dbt_model_info": - return self._handle_get_dbt_model_info_tool(args, state, execution_context) + return handle_get_dbt_model_info_tool(args, state, execution_context) if tool_name == "get_distinct_values": - return self._handle_get_distinct_values_tool(args, state, execution_context) + return handle_get_distinct_values_tool( + warehouse_tools_factory, args, state, execution_context + ) if tool_name == "run_sql_query": - return self._run_sql_with_distinct_guard(args, state, execution_context) + return run_sql_with_distinct_guard( + warehouse_tools_factory, runtime_config, args, state, execution_context + ) if tool_name == "list_tables_by_keyword": - return self._handle_list_tables_by_keyword_tool(args, state, execution_context) + return handle_list_tables_by_keyword_tool( + warehouse_tools_factory, args, state, execution_context + ) if tool_name == "check_table_row_count": - return self._handle_check_table_row_count_tool(args, state, execution_context) + return handle_check_table_row_count_tool( + warehouse_tools_factory, args, state, execution_context + ) return {"error": f"Unknown tool: {tool_name}"} except DashboardChatWarehouseToolsError as error: logger.warning("Dashboard chat tool %s failed: %s", tool_name, error) @@ -185,8 +223,7 @@ def _execute_tool_call( return {"error": str(error)} -def _build_tool_loop_result( - self, +def build_tool_loop_result( *, answer_text: str, execution_context: dict[str, Any], From ad2a70e4c873d95fb2488a98e287ea4e76642fa4 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Thu, 26 Mar 2026 21:48:05 +0530 Subject: [PATCH 19/49] refactoring tool calling and use our logger class --- ddpui/core/dashboard_chat/agents/openai.py | 20 +- .../orchestration/nodes/handle_data_query.py | 2 +- .../orchestration/nodes/handle_follow_up.py | 2 +- .../orchestration/orchestrator.py | 2 +- .../orchestration/presentation.py | 4 +- .../orchestration/sql_execution.py | 409 ------------ .../orchestration/tools/__init__.py | 0 .../orchestration/tools/cache.py | 210 ++++++ .../{tool_handlers.py => tools/handlers.py} | 601 ++++++++++++------ .../{tool_loop.py => tools/loop.py} | 16 +- .../specifications.py} | 9 +- .../orchestration/{ => tools}/sql_parsing.py | 2 +- ddpui/core/dashboard_chat/warehouse/tools.py | 4 +- 13 files changed, 643 insertions(+), 638 deletions(-) delete mode 100644 ddpui/core/dashboard_chat/orchestration/sql_execution.py create mode 100644 ddpui/core/dashboard_chat/orchestration/tools/__init__.py create mode 100644 ddpui/core/dashboard_chat/orchestration/tools/cache.py rename ddpui/core/dashboard_chat/orchestration/{tool_handlers.py => tools/handlers.py} (54%) rename ddpui/core/dashboard_chat/orchestration/{tool_loop.py => tools/loop.py} (96%) rename ddpui/core/dashboard_chat/orchestration/{tool_specifications.py => tools/specifications.py} (94%) rename ddpui/core/dashboard_chat/orchestration/{ => tools}/sql_parsing.py (99%) diff --git a/ddpui/core/dashboard_chat/agents/openai.py b/ddpui/core/dashboard_chat/agents/openai.py index b283e15ec..28e4a411f 100644 --- a/ddpui/core/dashboard_chat/agents/openai.py +++ b/ddpui/core/dashboard_chat/agents/openai.py @@ -1,7 +1,6 @@ """OpenAI-backed dashboard chat LLM adapter.""" import json -import logging import os from time import sleep from typing import Any @@ -23,17 +22,16 @@ DashboardChatRetrievedDocument, ) from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey +from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.openai_client import get_shared_openai_client -logger = logging.getLogger("ddpui") +logger = CustomLogger("dashboard_chat") class OpenAIDashboardChatLlmClient: """Direct OpenAI SDK adapter with JSON-mode helpers.""" - TECHNICAL_DIFFICULTIES_MESSAGE = ( - "I'm experiencing technical difficulties. Please try again." - ) + TECHNICAL_DIFFICULTIES_MESSAGE = "I'm experiencing technical difficulties. Please try again." def __init__( self, @@ -70,9 +68,7 @@ def classify_intent( conversation_context: DashboardChatConversationContext, ) -> DashboardChatIntentDecision: """Classify intent with prototype-style conversation awareness.""" - system_prompt = self.prompt_store.get( - DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION - ) + system_prompt = self.prompt_store.get(DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION) if conversation_context.last_sql_query or conversation_context.last_chart_ids: system_prompt += ( "\n\nCONVERSATION CONTEXT:\n" @@ -173,9 +169,7 @@ def compose_final_answer( result = self._complete_json( operation="final_answer_table_summary", system_prompt=( - self.prompt_store.get( - DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION - ) + self.prompt_store.get(DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION) + "\n\n" + TABLE_SUMMARY_JSON_INSTRUCTIONS ), @@ -239,7 +233,9 @@ def run_tool_loop_turn( ) return {"content": message.content or "", "tool_calls": tool_calls} - def _complete_json(self, operation: str, system_prompt: str, user_prompt: str) -> dict[str, Any]: + def _complete_json( + self, operation: str, system_prompt: str, user_prompt: str + ) -> dict[str, Any]: """Run a JSON-mode chat completion and parse the result.""" response = self._create_chat_completion( messages=[ diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py index 2d2616e7a..4006bd3ef 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py @@ -13,7 +13,7 @@ ) from ..retrieval import build_citations, get_cached_query_embedding from ..state import DashboardChatRuntimeState -from ..tool_loop import execute_tool_loop +from ..tools.loop import execute_tool_loop from .helpers import merge_tool_loop_timing diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py index e81edba89..20ae99a6a 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py @@ -13,7 +13,7 @@ ) from ..retrieval import build_citations, get_cached_query_embedding from ..state import DashboardChatRuntimeState -from ..tool_loop import execute_tool_loop +from ..tools.loop import execute_tool_loop from .helpers import merge_tool_loop_timing diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index f65d6c512..009defa66 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -26,7 +26,7 @@ from .nodes.load_context import load_context_node from .nodes.route_intent import route_intent_node from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN -from .tool_specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS +from .tools.specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS def _timed_node(node_name: str, handler): diff --git a/ddpui/core/dashboard_chat/orchestration/presentation.py b/ddpui/core/dashboard_chat/orchestration/presentation.py index 38247b972..676ece334 100644 --- a/ddpui/core/dashboard_chat/orchestration/presentation.py +++ b/ddpui/core/dashboard_chat/orchestration/presentation.py @@ -1,7 +1,6 @@ """Answer composition and display-shaping helpers for dashboard chat.""" from collections.abc import Sequence -import logging import re from typing import Any @@ -10,10 +9,11 @@ DashboardChatIntentDecision, DashboardChatRetrievedDocument, ) +from ddpui.utils.custom_logger import CustomLogger from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN -logger = logging.getLogger(__name__) +logger = CustomLogger("dashboard_chat") def serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: diff --git a/ddpui/core/dashboard_chat/orchestration/sql_execution.py b/ddpui/core/dashboard_chat/orchestration/sql_execution.py deleted file mode 100644 index 62b08e479..000000000 --- a/ddpui/core/dashboard_chat/orchestration/sql_execution.py +++ /dev/null @@ -1,409 +0,0 @@ -"""SQL execution and guardrail helpers for dashboard chat graph execution.""" - -import json -import re -from typing import Any - -from django.core.serializers.json import DjangoJSONEncoder - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent -from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard - -from .conversation import extract_requested_follow_up_dimension -from .sql_parsing import ( - table_references, - resolve_identifier_table, - tables_with_column, - extract_text_filter_values, - find_tables_with_column, - primary_table_name, - referenced_sql_identifier_refs, - resolve_table_qualifier, - best_table_for_missing_columns, - structural_dimensions_from_sql, - normalize_dimension_name, -) -from .state import DashboardChatRuntimeState -from .tool_handlers import ( - get_turn_warehouse_tools, - get_cached_schema_snippets, - has_validated_distinct_value, - is_text_type, - record_validated_distinct_values, - record_validated_filters_from_sql, -) - - -def validate_sql_allowlist( - sql: str, - allowlist: DashboardChatAllowlist, -) -> dict[str, Any]: - """Validate that all referenced tables are in the dashboard allowlist.""" - referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) - invalid_tables = [ - table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) - ] - if invalid_tables: - return { - "valid": False, - "invalid_tables": invalid_tables, - "message": ( - "SQL references tables not available in the current dashboard: " - + ", ".join(invalid_tables) - + ". Use list_tables_by_keyword to find allowed tables." - ), - } - return {"valid": True, "invalid_tables": [], "message": ""} - - -def run_sql_with_distinct_guard( - warehouse_tools_factory, - runtime_config, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Validate SQL like the prototype and let the tool loop self-correct on failures.""" - sql = str(args.get("sql") or "").strip() - if not sql: - return {"error": "sql_missing", "message": "SQL is required"} - - allowlist_validation = validate_sql_allowlist(sql, state["allowlist"]) - if not allowlist_validation["valid"]: - return { - "error": "table_not_allowed", - "invalid_tables": allowlist_validation["invalid_tables"], - "message": allowlist_validation["message"], - } - - follow_up_dimension_validation = validate_follow_up_dimension_usage( - warehouse_tools_factory, - sql=sql, - state=state, - execution_context=execution_context, - ) - if follow_up_dimension_validation is not None: - return follow_up_dimension_validation - - missing_distinct = check_missing_distinct( - warehouse_tools_factory, sql, state, execution_context - ) - if missing_distinct: - return { - "error": "must_fetch_distinct_values", - "missing": missing_distinct, - "message": ( - "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." - ), - } - - validation = DashboardChatSqlGuard( - allowlist=state["allowlist"], - max_rows=runtime_config.max_query_rows, - ).validate(sql) - execution_context["last_sql_validation"] = validation - if not validation.is_valid or not validation.sanitized_sql: - return { - "error": "sql_validation_failed", - "issues": validation.errors, - "warnings": validation.warnings, - } - - missing_columns = missing_columns_in_primary_table( - warehouse_tools_factory, - sql=validation.sanitized_sql, - state=state, - execution_context=execution_context, - ) - if missing_columns is not None: - return missing_columns - - execution_context["last_sql"] = validation.sanitized_sql - try: - rows = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).execute_sql(validation.sanitized_sql) - except Exception as error: - structured_error = structured_sql_execution_error( - warehouse_tools_factory, - sql=validation.sanitized_sql, - error=error, - state=state, - execution_context=execution_context, - ) - if structured_error is not None: - return structured_error - return { - "success": False, - "error": str(error), - "sql_used": validation.sanitized_sql, - } - - serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) - execution_context["last_sql_results"] = serialized_rows - record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=validation.sanitized_sql, - ) - return { - "success": True, - "row_count": len(serialized_rows), - "error": None, - "sql_used": validation.sanitized_sql, - "columns": list(serialized_rows[0].keys()) if serialized_rows else [], - "rows": serialized_rows, - } - - -def missing_columns_in_primary_table( - warehouse_tools_factory, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" - table_refs = table_references(sql) - referenced_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - if not referenced_tables: - return None - - schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=referenced_tables, - ) - all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - missing_columns_by_table: dict[str, set[str]] = {} - candidate_tables_by_column: dict[str, list[str]] = {} - tables_in_query = list(dict.fromkeys(referenced_tables)) - - for qualifier, column_name in referenced_sql_identifier_refs(sql): - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=column_name, - table_refs=table_refs, - schema_cache=schema_cache, - ) - if resolved_table is not None: - continue - - if qualifier is not None: - target_table = ( - resolve_table_qualifier(qualifier, table_refs) - or primary_table_name(sql) - or tables_in_query[0] - ) - else: - matching_tables = tables_with_column(column_name, tables_in_query, schema_cache) - if len(matching_tables) > 1: - continue - target_table = primary_table_name(sql) or tables_in_query[0] - - missing_columns_by_table.setdefault(target_table, set()).add(column_name) - candidate_tables_by_column[column_name] = find_tables_with_column( - column_name, - all_schema_cache, - ) - - missing_columns = sorted( - {column_name for columns in missing_columns_by_table.values() for column_name in columns} - ) - if not missing_columns: - return None - - primary = primary_table_name(sql) or tables_in_query[0] - target_table = ( - next(iter(missing_columns_by_table)) if len(missing_columns_by_table) == 1 else primary - ) - best_table = best_table_for_missing_columns(missing_columns, all_schema_cache) - message = ( - f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " - "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." - ) - if best_table: - message += f" Best candidate table: {best_table}." - result = { - "error": "column_not_in_table", - "table": target_table, - "missing_columns": missing_columns, - "candidate_tables": candidate_tables_by_column, - "best_table": best_table, - "message": message, - } - if len(missing_columns) == 1: - column_name = missing_columns[0] - result["column"] = column_name - result["candidates"] = candidate_tables_by_column.get(column_name, []) - return result - - -def structured_sql_execution_error( - warehouse_tools_factory, - *, - sql: str, - error: Exception, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" - error_text = str(error) - missing_column_match = re.search( - r'column "(?:[\w]+\.)?([^"]+)" does not exist', - error_text, - flags=re.IGNORECASE, - ) - if missing_column_match: - missing_column = missing_column_match.group(1).lower() - schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - candidate_tables = find_tables_with_column(missing_column, schema_cache) - return { - "error": "column_not_in_table", - "table": primary_table_name(sql), - "column": missing_column, - "missing_columns": [missing_column], - "candidates": candidate_tables, - "candidate_tables": {missing_column: candidate_tables}, - "best_table": candidate_tables[0] if candidate_tables else None, - "message": ( - f"Column {missing_column} is not available on the current table. " - "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." - ), - "sql_used": sql, - } - return None - - -def validate_follow_up_dimension_usage( - warehouse_tools_factory, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" - intent_decision = state["intent_decision"] - if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: - return None - if intent_decision.follow_up_context.follow_up_type != "add_dimension": - return None - - requested_dimension = extract_requested_follow_up_dimension( - intent_decision.follow_up_context.modification_instruction or state["user_query"] - ) - if not requested_dimension: - return None - - previous_sql = state["conversation_context"].last_sql_query or "" - current_dimensions = structural_dimensions_from_sql(sql) - previous_dimensions = structural_dimensions_from_sql(previous_sql) - normalized_requested_dimension = normalize_dimension_name(requested_dimension) - if ( - normalized_requested_dimension in current_dimensions - and normalized_requested_dimension not in previous_dimensions - ): - return None - - candidate_tables = find_tables_with_column( - requested_dimension, - get_cached_schema_snippets(warehouse_tools_factory, state, execution_context), - ) - return { - "error": "requested_dimension_missing", - "requested_dimension": requested_dimension, - "previous_dimensions": sorted(previous_dimensions), - "current_dimensions": sorted(current_dimensions), - "candidate_tables": candidate_tables, - "message": ( - f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " - "Use the requested dimension exactly, or pick a table that contains it." - ), - } - - -def check_missing_distinct( - warehouse_tools_factory, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> list[dict[str, Any]]: - """Detect text filters that require a prior distinct-values call.""" - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return [] - - table_refs = table_references(sql) - query_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - if not query_tables: - return [] - primary = primary_table_name(sql) or query_tables[0] - - full_schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=query_tables, - ) - all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - - column_types = { - table_name: { - str(column.get("name") or "") - .lower(): str(column.get("data_type") or column.get("type") or "") - .lower() - for column in getattr(snippet, "columns", []) - } - for table_name, snippet in full_schema_cache.items() - } - missing: list[dict[str, Any]] = [] - for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_refs=table_refs, - schema_cache=full_schema_cache, - ) - if resolved_table is None and qualifier is None: - matching_tables = tables_with_column(normalized_column, query_tables, full_schema_cache) - if len(matching_tables) > 1: - continue - if resolved_table is None: - candidate_tables = find_tables_with_column(normalized_column, all_schema_cache) - if qualifier is None and candidate_tables: - continue - missing.append( - { - "table": primary, - "column": column_name, - "error": "column_not_in_table", - "candidates": candidate_tables, - } - ) - continue - data_type = column_types.get(resolved_table, {}).get(normalized_column, "") - if not data_type: - continue - if not is_text_type(data_type): - continue - if not has_validated_distinct_value( - execution_context["distinct_cache"], - table_name=resolved_table, - column_name=normalized_column, - value=value, - ): - missing.append({"table": resolved_table, "column": column_name, "value": value}) - return missing diff --git a/ddpui/core/dashboard_chat/orchestration/tools/__init__.py b/ddpui/core/dashboard_chat/orchestration/tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/core/dashboard_chat/orchestration/tools/cache.py b/ddpui/core/dashboard_chat/orchestration/tools/cache.py new file mode 100644 index 000000000..66620a967 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/tools/cache.py @@ -0,0 +1,210 @@ +"""Execution-context cache helpers for dashboard chat tool loop.""" + +from collections.abc import Sequence +import re +from typing import Any + +from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools +from ddpui.models.org import Org +from ddpui.utils.custom_logger import CustomLogger + +from ..retrieval import ( + retrieve_vector_documents, + filter_allowlisted_dbt_results, + dedupe_retrieved_documents, + build_tool_document_payload, + get_cached_query_embedding, +) +from ..session_snapshot import persist_session_schema_cache, persist_session_distinct_cache +from ..state import DashboardChatRuntimeState +from .sql_parsing import ( + table_references as sql_table_references, + resolve_identifier_table, + tables_with_column, + extract_text_filter_values, +) + +logger = CustomLogger("dashboard_chat") + + +# --------------------------------------------------------------------------- +# Warehouse tools (lazily initialized per-turn) +# --------------------------------------------------------------------------- + + +def get_turn_warehouse_tools( + warehouse_tools_factory, + execution_context: dict[str, Any], + org: Org, +) -> DashboardChatWarehouseTools: + """Build the warehouse tool helper lazily for the turn.""" + warehouse_tools = execution_context.get("warehouse_tools") + if warehouse_tools is None: + warehouse_tools = warehouse_tools_factory(org) + execution_context["warehouse_tools"] = warehouse_tools + return warehouse_tools + + +# --------------------------------------------------------------------------- +# Schema snippet cache +# --------------------------------------------------------------------------- + + +def get_cached_schema_snippets( + warehouse_tools_factory, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + tables: Sequence[str] | None = None, +) -> dict[str, Any]: + """Load and cache schema snippets for allowlisted tables.""" + requested_tables = [ + table_name.lower() + for table_name in ( + tables if tables is not None else state["allowlist"].prioritized_tables() + ) + if state["allowlist"].is_allowed(table_name) + ] + cache = execution_context["schema_cache"] + missing_tables = [table_name for table_name in requested_tables if table_name not in cache] + if missing_tables: + snippets = get_turn_warehouse_tools( + warehouse_tools_factory, + execution_context, + state["org"], + ).get_schema_snippets(missing_tables) + for table_name, snippet in snippets.items(): + cache[table_name.lower()] = snippet + if snippets: + persist_session_schema_cache(state, cache) + if tables is None: + return cache + return {table_name: cache[table_name] for table_name in requested_tables if table_name in cache} + + +# --------------------------------------------------------------------------- +# Distinct value cache helpers +# --------------------------------------------------------------------------- + + +def normalize_distinct_value(value: Any) -> str: + """Normalize one distinct value for exact cache lookups.""" + return str(value).strip().lower() + + +def has_validated_distinct_value( + distinct_cache: set[tuple[Any, ...]], + *, + table_name: str, + column_name: str, + value: Any, +) -> bool: + """Return whether this exact text filter value was already validated in-session.""" + normalized_value = normalize_distinct_value(value) + normalized_column = column_name.lower() + normalized_table = table_name.lower() + return ( + (normalized_table, normalized_column, normalized_value) in distinct_cache + or ("*", normalized_column, normalized_value) in distinct_cache + or (normalized_table, normalized_column) in distinct_cache + or ("*", normalized_column) in distinct_cache + ) + + +def is_text_type(data_type: str) -> bool: + """Treat common string-like warehouse types as requiring distinct-value lookup.""" + return any(token in data_type for token in ["char", "text", "string", "varchar"]) + + +def record_validated_distinct_values( + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + table_name: str, + column_name: str, + values: Sequence[Any], +) -> None: + """Persist exact validated filter values for the current session.""" + normalized_table = table_name.lower() + normalized_column = column_name.lower() + distinct_cache = execution_context["distinct_cache"] + for value in values: + normalized_value = normalize_distinct_value(value) + distinct_cache.add((normalized_table, normalized_column, normalized_value)) + distinct_cache.add(("*", normalized_column, normalized_value)) + persist_session_distinct_cache(state, distinct_cache) + + +def record_validated_filters_from_sql( + *, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], + sql: str, +) -> None: + """Seed exact validated filter values from a successful SQL statement.""" + table_refs = sql_table_references(sql) + if not table_refs: + return + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return + + query_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + schema_cache = dict(execution_context.get("schema_cache") or {}) + values_by_target: dict[tuple[str, str], list[str]] = {} + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_refs=table_refs, + schema_cache=schema_cache, + ) + if resolved_table is None and qualifier is None: + if schema_cache: + matching = tables_with_column(normalized_column, query_tables, schema_cache) + if len(matching) == 1: + resolved_table = matching[0] + elif len(query_tables) == 1: + resolved_table = query_tables[0] + values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) + + for (tbl, col), vals in values_by_target.items(): + record_validated_distinct_values( + state=state, + execution_context=execution_context, + table_name=tbl, + column_name=col, + values=vals, + ) + + +def seed_distinct_cache_from_previous_sql( + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> None: + """Treat text filters from the previous successful SQL as already validated for follow-ups.""" + previous_sql = state["conversation_context"].last_sql_query + if not previous_sql: + return + record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=previous_sql, + ) + + +# --------------------------------------------------------------------------- +# dbt index helper +# --------------------------------------------------------------------------- + + +def dbt_resources_by_unique_id(state: DashboardChatRuntimeState) -> dict[str, dict[str, Any]]: + """Return the allowlisted dbt index built at session start.""" + dbt_index = state.get("dbt_index") or {} + return dict(dbt_index.get("resources_by_unique_id") or {}) diff --git a/ddpui/core/dashboard_chat/orchestration/tool_handlers.py b/ddpui/core/dashboard_chat/orchestration/tools/handlers.py similarity index 54% rename from ddpui/core/dashboard_chat/orchestration/tool_handlers.py rename to ddpui/core/dashboard_chat/orchestration/tools/handlers.py index b36ad8c2b..0855b0a3e 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_handlers.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/handlers.py @@ -1,216 +1,50 @@ -"""Tool handlers and execution-context cache helpers for dashboard chat.""" +"""Tool handler implementations for dashboard chat.""" -from collections.abc import Sequence -import logging +import json import re from typing import Any -from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from django.core.serializers.json import DjangoJSONEncoder + +from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.utils.custom_logger import CustomLogger +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools -from ddpui.models.org import Org +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -from .retrieval import ( +from ..conversation import extract_requested_follow_up_dimension +from ..retrieval import ( retrieve_vector_documents, filter_allowlisted_dbt_results, dedupe_retrieved_documents, build_tool_document_payload, get_cached_query_embedding, ) -from .session_snapshot import persist_session_schema_cache, persist_session_distinct_cache +from ..state import DashboardChatRuntimeState +from .cache import ( + get_turn_warehouse_tools, + get_cached_schema_snippets, + has_validated_distinct_value, + is_text_type, + record_validated_distinct_values, + record_validated_filters_from_sql, + dbt_resources_by_unique_id, +) from .sql_parsing import ( - table_references as sql_table_references, + table_references, resolve_identifier_table, tables_with_column, extract_text_filter_values, find_tables_with_column, + primary_table_name, + referenced_sql_identifier_refs, + resolve_table_qualifier, + best_table_for_missing_columns, + structural_dimensions_from_sql, + normalize_dimension_name, ) -from .state import DashboardChatRuntimeState - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Warehouse tools (lazily initialized per-turn) -# --------------------------------------------------------------------------- - - -def get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context: dict[str, Any], - org: Org, -) -> DashboardChatWarehouseTools: - """Build the warehouse tool helper lazily for the turn.""" - warehouse_tools = execution_context.get("warehouse_tools") - if warehouse_tools is None: - warehouse_tools = warehouse_tools_factory(org) - execution_context["warehouse_tools"] = warehouse_tools - return warehouse_tools - - -# --------------------------------------------------------------------------- -# Schema snippet cache -# --------------------------------------------------------------------------- - - -def get_cached_schema_snippets( - warehouse_tools_factory, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - tables: Sequence[str] | None = None, -) -> dict[str, Any]: - """Load and cache schema snippets for allowlisted tables.""" - requested_tables = [ - table_name.lower() - for table_name in ( - tables if tables is not None else state["allowlist"].prioritized_tables() - ) - if state["allowlist"].is_allowed(table_name) - ] - cache = execution_context["schema_cache"] - missing_tables = [table_name for table_name in requested_tables if table_name not in cache] - if missing_tables: - snippets = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).get_schema_snippets(missing_tables) - for table_name, snippet in snippets.items(): - cache[table_name.lower()] = snippet - if snippets: - persist_session_schema_cache(state, cache) - if tables is None: - return cache - return {table_name: cache[table_name] for table_name in requested_tables if table_name in cache} - - -# --------------------------------------------------------------------------- -# Distinct value cache helpers -# --------------------------------------------------------------------------- - - -def normalize_distinct_value(value: Any) -> str: - """Normalize one distinct value for exact cache lookups.""" - return str(value).strip().lower() - - -def has_validated_distinct_value( - distinct_cache: set[tuple[Any, ...]], - *, - table_name: str, - column_name: str, - value: Any, -) -> bool: - """Return whether this exact text filter value was already validated in-session.""" - normalized_value = normalize_distinct_value(value) - normalized_column = column_name.lower() - normalized_table = table_name.lower() - return ( - (normalized_table, normalized_column, normalized_value) in distinct_cache - or ("*", normalized_column, normalized_value) in distinct_cache - or (normalized_table, normalized_column) in distinct_cache - or ("*", normalized_column) in distinct_cache - ) - - -def is_text_type(data_type: str) -> bool: - """Treat common string-like warehouse types as requiring distinct-value lookup.""" - return any(token in data_type for token in ["char", "text", "string", "varchar"]) - - -def record_validated_distinct_values( - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - table_name: str, - column_name: str, - values: Sequence[Any], -) -> None: - """Persist exact validated filter values for the current session.""" - normalized_table = table_name.lower() - normalized_column = column_name.lower() - distinct_cache = execution_context["distinct_cache"] - for value in values: - normalized_value = normalize_distinct_value(value) - distinct_cache.add((normalized_table, normalized_column, normalized_value)) - distinct_cache.add(("*", normalized_column, normalized_value)) - persist_session_distinct_cache(state, distinct_cache) - - -def record_validated_filters_from_sql( - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - sql: str, -) -> None: - """Seed exact validated filter values from a successful SQL statement.""" - table_refs = sql_table_references(sql) - if not table_refs: - return - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return - - query_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - schema_cache = dict(execution_context.get("schema_cache") or {}) - values_by_target: dict[tuple[str, str], list[str]] = {} - for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_refs=table_refs, - schema_cache=schema_cache, - ) - if resolved_table is None and qualifier is None: - if schema_cache: - matching = tables_with_column(normalized_column, query_tables, schema_cache) - if len(matching) == 1: - resolved_table = matching[0] - elif len(query_tables) == 1: - resolved_table = query_tables[0] - values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) - - for (tbl, col), vals in values_by_target.items(): - record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=tbl, - column_name=col, - values=vals, - ) - -def seed_distinct_cache_from_previous_sql( - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> None: - """Treat text filters from the previous successful SQL as already validated for follow-ups.""" - previous_sql = state["conversation_context"].last_sql_query - if not previous_sql: - return - record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=previous_sql, - ) - - -# --------------------------------------------------------------------------- -# dbt index helper -# --------------------------------------------------------------------------- - - -def dbt_resources_by_unique_id(state: DashboardChatRuntimeState) -> dict[str, dict[str, Any]]: - """Return the allowlisted dbt index built at session start.""" - dbt_index = state.get("dbt_index") or {} - return dict(dbt_index.get("resources_by_unique_id") or {}) +logger = CustomLogger("dashboard_chat") # --------------------------------------------------------------------------- @@ -589,3 +423,382 @@ def handle_check_table_row_count_tool( if rows: row_count = int(rows[0].get("row_count") or 0) return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} + + +def handle_run_sql_query_tool( + warehouse_tools_factory, + runtime_config, + args: dict[str, Any], + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any]: + """Validate SQL like the prototype and let the tool loop self-correct on failures.""" + sql = str(args.get("sql") or "").strip() + if not sql: + return {"error": "sql_missing", "message": "SQL is required"} + + allowlist_validation = _validate_sql_allowlist(sql, state["allowlist"]) + if not allowlist_validation["valid"]: + return { + "error": "table_not_allowed", + "invalid_tables": allowlist_validation["invalid_tables"], + "message": allowlist_validation["message"], + } + + follow_up_dimension_validation = _validate_follow_up_dimension_usage( + warehouse_tools_factory, + sql=sql, + state=state, + execution_context=execution_context, + ) + if follow_up_dimension_validation is not None: + return follow_up_dimension_validation + + missing_distinct = _check_missing_distinct( + warehouse_tools_factory, sql, state, execution_context + ) + if missing_distinct: + return { + "error": "must_fetch_distinct_values", + "missing": missing_distinct, + "message": ( + "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." + ), + } + + validation = DashboardChatSqlGuard( + allowlist=state["allowlist"], + max_rows=runtime_config.max_query_rows, + ).validate(sql) + execution_context["last_sql_validation"] = validation + if not validation.is_valid or not validation.sanitized_sql: + return { + "error": "sql_validation_failed", + "issues": validation.errors, + "warnings": validation.warnings, + } + + missing_columns = _missing_columns_in_primary_table( + warehouse_tools_factory, + sql=validation.sanitized_sql, + state=state, + execution_context=execution_context, + ) + if missing_columns is not None: + return missing_columns + + execution_context["last_sql"] = validation.sanitized_sql + try: + rows = get_turn_warehouse_tools( + warehouse_tools_factory, + execution_context, + state["org"], + ).execute_sql(validation.sanitized_sql) + except Exception as error: + structured_error = _structured_sql_execution_error( + warehouse_tools_factory, + sql=validation.sanitized_sql, + error=error, + state=state, + execution_context=execution_context, + ) + if structured_error is not None: + return structured_error + return { + "success": False, + "error": str(error), + "sql_used": validation.sanitized_sql, + } + + serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) + execution_context["last_sql_results"] = serialized_rows + record_validated_filters_from_sql( + state=state, + execution_context=execution_context, + sql=validation.sanitized_sql, + ) + return { + "success": True, + "row_count": len(serialized_rows), + "error": None, + "sql_used": validation.sanitized_sql, + "columns": list(serialized_rows[0].keys()) if serialized_rows else [], + "rows": serialized_rows, + } + + +# --------------------------------------------------------------------------- +# SQL execution (run_sql_query tool handler helpers) +# --------------------------------------------------------------------------- + + +def _validate_sql_allowlist( + sql: str, + allowlist: DashboardChatAllowlist, +) -> dict[str, Any]: + """Validate that all referenced tables are in the dashboard allowlist.""" + referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) + invalid_tables = [ + table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) + ] + if invalid_tables: + return { + "valid": False, + "invalid_tables": invalid_tables, + "message": ( + "SQL references tables not available in the current dashboard: " + + ", ".join(invalid_tables) + + ". Use list_tables_by_keyword to find allowed tables." + ), + } + return {"valid": True, "invalid_tables": [], "message": ""} + + +def _missing_columns_in_primary_table( + warehouse_tools_factory, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" + table_refs = table_references(sql) + referenced_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + if not referenced_tables: + return None + + schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, + state, + execution_context, + tables=referenced_tables, + ) + all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) + missing_columns_by_table: dict[str, set[str]] = {} + candidate_tables_by_column: dict[str, list[str]] = {} + tables_in_query = list(dict.fromkeys(referenced_tables)) + + for qualifier, column_name in referenced_sql_identifier_refs(sql): + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=column_name, + table_refs=table_refs, + schema_cache=schema_cache, + ) + if resolved_table is not None: + continue + + if qualifier is not None: + target_table = ( + resolve_table_qualifier(qualifier, table_refs) + or primary_table_name(sql) + or tables_in_query[0] + ) + else: + matching_tables = tables_with_column(column_name, tables_in_query, schema_cache) + if len(matching_tables) > 1: + continue + target_table = primary_table_name(sql) or tables_in_query[0] + + missing_columns_by_table.setdefault(target_table, set()).add(column_name) + candidate_tables_by_column[column_name] = find_tables_with_column( + column_name, + all_schema_cache, + ) + + missing_columns = sorted( + {column_name for columns in missing_columns_by_table.values() for column_name in columns} + ) + if not missing_columns: + return None + + primary = primary_table_name(sql) or tables_in_query[0] + target_table = ( + next(iter(missing_columns_by_table)) if len(missing_columns_by_table) == 1 else primary + ) + best_table = best_table_for_missing_columns(missing_columns, all_schema_cache) + message = ( + f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " + "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." + ) + if best_table: + message += f" Best candidate table: {best_table}." + result = { + "error": "column_not_in_table", + "table": target_table, + "missing_columns": missing_columns, + "candidate_tables": candidate_tables_by_column, + "best_table": best_table, + "message": message, + } + if len(missing_columns) == 1: + column_name = missing_columns[0] + result["column"] = column_name + result["candidates"] = candidate_tables_by_column.get(column_name, []) + return result + + +def _structured_sql_execution_error( + warehouse_tools_factory, + *, + sql: str, + error: Exception, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" + error_text = str(error) + missing_column_match = re.search( + r'column "(?:[\w]+\.)?([^"]+)" does not exist', + error_text, + flags=re.IGNORECASE, + ) + if missing_column_match: + missing_column = missing_column_match.group(1).lower() + schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) + candidate_tables = find_tables_with_column(missing_column, schema_cache) + return { + "error": "column_not_in_table", + "table": primary_table_name(sql), + "column": missing_column, + "missing_columns": [missing_column], + "candidates": candidate_tables, + "candidate_tables": {missing_column: candidate_tables}, + "best_table": candidate_tables[0] if candidate_tables else None, + "message": ( + f"Column {missing_column} is not available on the current table. " + "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." + ), + "sql_used": sql, + } + return None + + +def _validate_follow_up_dimension_usage( + warehouse_tools_factory, + *, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> dict[str, Any] | None: + """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" + intent_decision = state["intent_decision"] + if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: + return None + if intent_decision.follow_up_context.follow_up_type != "add_dimension": + return None + + requested_dimension = extract_requested_follow_up_dimension( + intent_decision.follow_up_context.modification_instruction or state["user_query"] + ) + if not requested_dimension: + return None + + previous_sql = state["conversation_context"].last_sql_query or "" + current_dimensions = structural_dimensions_from_sql(sql) + previous_dimensions = structural_dimensions_from_sql(previous_sql) + normalized_requested_dimension = normalize_dimension_name(requested_dimension) + if ( + normalized_requested_dimension in current_dimensions + and normalized_requested_dimension not in previous_dimensions + ): + return None + + candidate_tables = find_tables_with_column( + requested_dimension, + get_cached_schema_snippets(warehouse_tools_factory, state, execution_context), + ) + return { + "error": "requested_dimension_missing", + "requested_dimension": requested_dimension, + "previous_dimensions": sorted(previous_dimensions), + "current_dimensions": sorted(current_dimensions), + "candidate_tables": candidate_tables, + "message": ( + f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " + "Use the requested dimension exactly, or pick a table that contains it." + ), + } + + +def _check_missing_distinct( + warehouse_tools_factory, + sql: str, + state: DashboardChatRuntimeState, + execution_context: dict[str, Any], +) -> list[dict[str, Any]]: + """Detect text filters that require a prior distinct-values call.""" + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return [] + + table_refs = table_references(sql) + query_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + if not query_tables: + return [] + primary = primary_table_name(sql) or query_tables[0] + + full_schema_cache = get_cached_schema_snippets( + warehouse_tools_factory, + state, + execution_context, + tables=query_tables, + ) + all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) + + column_types = { + table_name: { + str(column.get("name") or "") + .lower(): str(column.get("data_type") or column.get("type") or "") + .lower() + for column in getattr(snippet, "columns", []) + } + for table_name, snippet in full_schema_cache.items() + } + missing: list[dict[str, Any]] = [] + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_refs=table_refs, + schema_cache=full_schema_cache, + ) + if resolved_table is None and qualifier is None: + matching_tables = tables_with_column(normalized_column, query_tables, full_schema_cache) + if len(matching_tables) > 1: + continue + if resolved_table is None: + candidate_tables = find_tables_with_column(normalized_column, all_schema_cache) + if qualifier is None and candidate_tables: + continue + missing.append( + { + "table": primary, + "column": column_name, + "error": "column_not_in_table", + "candidates": candidate_tables, + } + ) + continue + data_type = column_types.get(resolved_table, {}).get(normalized_column, "") + if not data_type: + continue + if not is_text_type(data_type): + continue + if not has_validated_distinct_value( + execution_context["distinct_cache"], + table_name=resolved_table, + column_name=normalized_column, + value=value, + ): + missing.append({"table": resolved_table, "column": column_name, "value": value}) + return missing diff --git a/ddpui/core/dashboard_chat/orchestration/tool_loop.py b/ddpui/core/dashboard_chat/orchestration/tools/loop.py similarity index 96% rename from ddpui/core/dashboard_chat/orchestration/tool_loop.py rename to ddpui/core/dashboard_chat/orchestration/tools/loop.py index 0b2a88c81..f558a1a41 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_loop.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/loop.py @@ -1,23 +1,23 @@ """Prototype-style tool-loop execution helpers for dashboard chat.""" import json -import logging from time import perf_counter from typing import Any from django.core.serializers.json import DjangoJSONEncoder from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseToolsError +from ddpui.utils.custom_logger import CustomLogger -from .presentation import ( +from ..presentation import ( serialize_tool_result, summarize_tool_call, max_turns_message, fallback_answer_text, ) -from .sql_execution import run_sql_with_distinct_guard -from .state import DashboardChatRuntimeState -from .tool_handlers import ( +from ..state import DashboardChatRuntimeState +from .cache import seed_distinct_cache_from_previous_sql +from .handlers import ( handle_retrieve_docs_tool, handle_get_schema_snippets_tool, handle_search_dbt_models_tool, @@ -25,10 +25,10 @@ handle_get_distinct_values_tool, handle_list_tables_by_keyword_tool, handle_check_table_row_count_tool, - seed_distinct_cache_from_previous_sql, + handle_run_sql_query_tool, ) -logger = logging.getLogger(__name__) +logger = CustomLogger("dashboard_chat") def execute_tool_loop( @@ -201,7 +201,7 @@ def execute_tool_call( warehouse_tools_factory, args, state, execution_context ) if tool_name == "run_sql_query": - return run_sql_with_distinct_guard( + return handle_run_sql_query_tool( warehouse_tools_factory, runtime_config, args, state, execution_context ) if tool_name == "list_tables_by_keyword": diff --git a/ddpui/core/dashboard_chat/orchestration/tool_specifications.py b/ddpui/core/dashboard_chat/orchestration/tools/specifications.py similarity index 94% rename from ddpui/core/dashboard_chat/orchestration/tool_specifications.py rename to ddpui/core/dashboard_chat/orchestration/tools/specifications.py index eb1c8264b..9975c4b85 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_specifications.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/specifications.py @@ -85,10 +85,7 @@ "parameters": { "type": "object", "properties": { - "table": { - "type": "string", - "description": "Fully-qualified table name", - }, + "table": {"type": "string", "description": "Fully-qualified table name"}, "column": {"type": "string", "description": "Column name"}, "limit": {"type": "integer", "minimum": 1, "maximum": 200, "default": 50}, }, @@ -103,9 +100,7 @@ "description": "Execute a read-only SQL query on the database.", "parameters": { "type": "object", - "properties": { - "sql": {"type": "string", "description": "SELECT query to execute"} - }, + "properties": {"sql": {"type": "string", "description": "SELECT query to execute"}}, "required": ["sql"], }, }, diff --git a/ddpui/core/dashboard_chat/orchestration/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py similarity index 99% rename from ddpui/core/dashboard_chat/orchestration/sql_parsing.py rename to ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py index de8dfcf4e..5b0f5401d 100644 --- a/ddpui/core/dashboard_chat/orchestration/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py @@ -286,7 +286,7 @@ def find_tables_with_column( def structural_dimensions_from_sql(sql: str) -> set[str]: """Return normalized non-aggregate dimensions used by one SQL statement.""" - from .conversation import extract_dimensions_from_sql + from ..conversation import extract_dimensions_from_sql if not sql: return set() diff --git a/ddpui/core/dashboard_chat/warehouse/tools.py b/ddpui/core/dashboard_chat/warehouse/tools.py index abde1bd56..722a5f24e 100644 --- a/ddpui/core/dashboard_chat/warehouse/tools.py +++ b/ddpui/core/dashboard_chat/warehouse/tools.py @@ -1,17 +1,17 @@ """Warehouse access helpers used by dashboard chat runtime.""" import json -import logging import re from typing import Any from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse from ddpui.utils import secretsmanager +from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.utils.warehouse.client.warehouse_interface import Warehouse, WarehouseType -logger = logging.getLogger(__name__) +logger = CustomLogger("dashboard_chat") SAFE_WAREHOUSE_IDENTIFIER_PATTERN = re.compile(r"^[A-Za-z0-9_-]+$") From 8943bead144cb60592908c6f275a3a65c400416b Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Thu, 26 Mar 2026 22:09:55 +0530 Subject: [PATCH 20/49] updates --- ddpui/core/dashboard_chat/warehouse/tools.py | 114 +----------------- ddpui/utils/warehouse/client/bigquery.py | 28 +++++ ddpui/utils/warehouse/client/postgres.py | 29 +++++ .../warehouse/client/warehouse_interface.py | 7 ++ 4 files changed, 68 insertions(+), 110 deletions(-) diff --git a/ddpui/core/dashboard_chat/warehouse/tools.py b/ddpui/core/dashboard_chat/warehouse/tools.py index 722a5f24e..5b8eb5701 100644 --- a/ddpui/core/dashboard_chat/warehouse/tools.py +++ b/ddpui/core/dashboard_chat/warehouse/tools.py @@ -1,15 +1,13 @@ """Warehouse access helpers used by dashboard chat runtime.""" -import json import re from typing import Any from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse -from ddpui.utils import secretsmanager from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory -from ddpui.utils.warehouse.client.warehouse_interface import Warehouse, WarehouseType +from ddpui.utils.warehouse.client.warehouse_interface import Warehouse logger = CustomLogger("dashboard_chat") @@ -89,119 +87,15 @@ def get_distinct_values( f"Table '{table_name}' must be schema-qualified for distinct lookups" ) schema_name, bare_table_name = parsed_table - - if not self.warehouse_client.column_exists(schema_name, bare_table_name, column_name): - return [] - - query = self._build_distinct_values_query( - schema_name=schema_name, - table_name=bare_table_name, - column_name=column_name, - limit=limit, + return self.warehouse_client.get_distinct_values( + schema_name, bare_table_name, column_name, limit ) - rows = self.warehouse_client.execute(query) - return [ - str(row.get("value")) - for row in rows - if row.get("value") is not None and str(row.get("value")).strip() - ] def execute_sql(self, sql: str) -> list[dict[str, Any]]: """Execute a validated read-only SQL statement.""" rows = self.warehouse_client.execute(sql) return list(rows[: self.max_rows]) - def _build_distinct_values_query( - self, - schema_name: str, - table_name: str, - column_name: str, - limit: int, - ) -> str: - """Build a warehouse-specific query for distinct values.""" - if self.org_warehouse.wtype == WarehouseType.POSTGRES: - quoted_column = self._quote_postgres_identifier(column_name) - return f""" - SELECT DISTINCT {quoted_column} AS value - FROM {self._quote_table_ref(schema_name, table_name)} - WHERE {quoted_column} IS NOT NULL - AND TRIM(CAST({quoted_column} AS TEXT)) != '' - ORDER BY value - LIMIT {int(limit)} - """ - - if self.org_warehouse.wtype == WarehouseType.BIGQUERY: - quoted_column = self._quote_bigquery_identifier(column_name) - return f""" - SELECT DISTINCT {quoted_column} AS value - FROM {self._quote_bigquery_table_ref(schema_name, table_name)} - WHERE {quoted_column} IS NOT NULL - AND TRIM(CAST({quoted_column} AS STRING)) != '' - ORDER BY value - LIMIT {int(limit)} - """ - - raise DashboardChatWarehouseToolsError( - f"Unsupported warehouse type for dashboard chat: {self.org_warehouse.wtype}" - ) - - def _quote_table_ref(self, schema_name: str, table_name: str) -> str: - """Quote a Postgres schema.table reference.""" - return ( - f"{self._quote_postgres_identifier(schema_name)}." - f"{self._quote_postgres_identifier(table_name)}" - ) - - def _quote_bigquery_table_ref(self, schema_name: str, table_name: str) -> str: - """Quote a BigQuery fully-qualified table reference.""" - project_name = self._get_bigquery_project_id() - if not project_name: - raise DashboardChatWarehouseToolsError("BigQuery project id not configured") - safe_project_name = self._normalize_identifier_component( - project_name, - "BigQuery project id", - ) - safe_schema_name = self._normalize_identifier_component( - schema_name, - "schema name", - ) - safe_table_name = self._normalize_identifier_component( - table_name, - "table name", - ) - return f"`{safe_project_name}.{safe_schema_name}.{safe_table_name}`" - - def _get_bigquery_project_id(self) -> str | None: - """Read the BigQuery project id from stored warehouse credentials.""" - credentials = secretsmanager.retrieve_warehouse_credentials(self.org_warehouse) or {} - project_id = credentials.get("project_id") - if project_id: - return str(project_id) - - credentials_json = credentials.get("credentials_json") - if isinstance(credentials_json, str): - try: - parsed_credentials = json.loads(credentials_json) - except json.JSONDecodeError: - return None - project_id = parsed_credentials.get("project_id") - if project_id: - return str(project_id) - - return None - - @staticmethod - def _quote_postgres_identifier(identifier: str) -> str: - """Quote a Postgres identifier while preserving its literal value.""" - escaped_identifier = identifier.replace('"', '""') - return f'"{escaped_identifier}"' - - @staticmethod - def _quote_bigquery_identifier(identifier: str) -> str: - """Quote a BigQuery identifier while preserving its literal value.""" - escaped_identifier = identifier.replace("`", "") - return f"`{escaped_identifier}`" - @staticmethod def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: """Parse schema.table into separate pieces.""" @@ -218,7 +112,7 @@ def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: @staticmethod def _normalize_identifier_component(component: str, component_name: str) -> str: - """Normalize a schema/table/project component and reject unsafe identifier text.""" + """Normalize a schema/table component and reject unsafe identifier text.""" normalized_component = component.strip().strip('"').strip("`") if not normalized_component: raise DashboardChatWarehouseToolsError(f"{component_name} is required") diff --git a/ddpui/utils/warehouse/client/bigquery.py b/ddpui/utils/warehouse/client/bigquery.py index 860a14fad..eb9e2605c 100644 --- a/ddpui/utils/warehouse/client/bigquery.py +++ b/ddpui/utils/warehouse/client/bigquery.py @@ -1,4 +1,5 @@ import sqlalchemy.types as types +from typing import Union from sqlalchemy.engine import create_engine from sqlalchemy.engine.reflection import Inspector from sqlalchemy import inspect @@ -105,3 +106,30 @@ def column_exists(self, db_schema: str, db_table: str, column_name: str) -> bool if col.get("name") == column_name: return True return False + + def get_distinct_values( + self, db_schema: str, db_table: str, column_name: str, limit: Union[int, None] = None + ) -> list[str]: + """Return distinct non-empty values for a column, ordered alphabetically.""" + q_column = self._quote(column_name) + q_table = f"`{db_schema}.{db_table}`" + sql = ( + f"SELECT DISTINCT {q_column} AS value" + f" FROM {q_table}" + f" WHERE {q_column} IS NOT NULL" + f" AND TRIM(CAST({q_column} AS STRING)) != ''" + f" ORDER BY value" + ) + if limit is not None: + sql += f" LIMIT {int(limit)}" + rows = self.execute(sql) + return [ + str(row.get("value")) + for row in rows + if row.get("value") is not None and str(row.get("value")).strip() + ] + + @staticmethod + def _quote(identifier: str) -> str: + """Quote a BigQuery identifier.""" + return "`" + identifier.replace("`", "") + "`" diff --git a/ddpui/utils/warehouse/client/postgres.py b/ddpui/utils/warehouse/client/postgres.py index d5655c19a..aa2c9ed15 100644 --- a/ddpui/utils/warehouse/client/postgres.py +++ b/ddpui/utils/warehouse/client/postgres.py @@ -1,4 +1,5 @@ import tempfile +from typing import Union from urllib.parse import quote from sqlalchemy.engine import create_engine @@ -117,3 +118,31 @@ def column_exists(self, db_schema: str, db_table: str, column_name: str) -> bool if col.get("name") == column_name: return True return False + + def get_distinct_values( + self, db_schema: str, db_table: str, column_name: str, limit: Union[int, None] = None + ) -> list[str]: + """Return distinct non-empty values for a column, ordered alphabetically.""" + q_schema = self._quote(db_schema) + q_table = self._quote(db_table) + q_column = self._quote(column_name) + sql = ( + f"SELECT DISTINCT {q_column} AS value" + f" FROM {q_schema}.{q_table}" + f" WHERE {q_column} IS NOT NULL" + f" AND TRIM(CAST({q_column} AS TEXT)) != ''" + f" ORDER BY value" + ) + if limit is not None: + sql += f" LIMIT {int(limit)}" + rows = self.execute(sql) + return [ + str(row.get("value")) + for row in rows + if row.get("value") is not None and str(row.get("value")).strip() + ] + + @staticmethod + def _quote(identifier: str) -> str: + """Quote a Postgres identifier.""" + return '"' + identifier.replace('"', '""') + '"' diff --git a/ddpui/utils/warehouse/client/warehouse_interface.py b/ddpui/utils/warehouse/client/warehouse_interface.py index 2951618c3..361d75500 100644 --- a/ddpui/utils/warehouse/client/warehouse_interface.py +++ b/ddpui/utils/warehouse/client/warehouse_interface.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum +from typing import Union class WarehouseType(str, Enum): @@ -31,3 +32,9 @@ def get_wtype(self): @abstractmethod def column_exists(self, db_schema: str, db_table: str, column_name: str) -> bool: pass + + @abstractmethod + def get_distinct_values( + self, db_schema: str, db_table: str, column_name: str, limit: Union[int, None] = None + ) -> list[str]: + pass From 132bb56f0d383ae0cb75c2758e483afabb75dfa8 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Thu, 26 Mar 2026 22:37:30 +0530 Subject: [PATCH 21/49] imports global --- .../core/dashboard_chat/contracts/__init__.py | 29 +++++++---------- .../core/dashboard_chat/contracts/response.py | 2 +- .../dashboard_chat/orchestration/__init__.py | 7 +++-- .../orchestration/conversation.py | 4 ++- .../orchestration/message_stack.py | 7 +++-- .../orchestration/nodes/finalize.py | 2 +- .../orchestration/nodes/handle_data_query.py | 15 +++++---- .../orchestration/nodes/handle_follow_up.py | 15 +++++---- .../orchestration/nodes/handle_irrelevant.py | 4 +-- .../nodes/handle_needs_clarification.py | 7 +++-- .../orchestration/nodes/handle_small_talk.py | 7 +++-- .../orchestration/nodes/helpers.py | 2 +- .../orchestration/nodes/load_context.py | 4 +-- .../orchestration/nodes/route_intent.py | 9 ++++-- .../orchestration/orchestrator.py | 31 ++++++++++++------- .../orchestration/presentation.py | 5 ++- .../dashboard_chat/orchestration/retrieval.py | 2 +- .../orchestration/session_snapshot.py | 2 +- .../orchestration/tools/cache.py | 11 ++++--- .../orchestration/tools/handlers.py | 12 ++++--- .../orchestration/tools/loop.py | 10 +++--- .../orchestration/tools/sql_parsing.py | 3 +- 22 files changed, 110 insertions(+), 80 deletions(-) diff --git a/ddpui/core/dashboard_chat/contracts/__init__.py b/ddpui/core/dashboard_chat/contracts/__init__.py index fb1a3ad4f..6f2fba813 100644 --- a/ddpui/core/dashboard_chat/contracts/__init__.py +++ b/ddpui/core/dashboard_chat/contracts/__init__.py @@ -1,27 +1,20 @@ """Typed contracts for dashboard chat orchestration.""" -from .conversation import ( +from ddpui.core.dashboard_chat.contracts.conversation import ( DashboardChatConversationContext, DashboardChatConversationMessage, ) -from .intents import ( +from ddpui.core.dashboard_chat.contracts.intents import ( DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, ) -from .response import DashboardChatCitation, DashboardChatResponse -from .retrieval import DashboardChatRetrievedDocument, DashboardChatSchemaSnippet -from .sql import DashboardChatSqlValidationResult - -__all__ = [ - "DashboardChatCitation", - "DashboardChatConversationContext", - "DashboardChatConversationMessage", - "DashboardChatFollowUpContext", - "DashboardChatIntent", - "DashboardChatIntentDecision", - "DashboardChatResponse", - "DashboardChatRetrievedDocument", - "DashboardChatSchemaSnippet", - "DashboardChatSqlValidationResult", -] +from ddpui.core.dashboard_chat.contracts.response import ( + DashboardChatCitation, + DashboardChatResponse, +) +from ddpui.core.dashboard_chat.contracts.retrieval import ( + DashboardChatRetrievedDocument, + DashboardChatSchemaSnippet, +) +from ddpui.core.dashboard_chat.contracts.sql import DashboardChatSqlValidationResult diff --git a/ddpui/core/dashboard_chat/contracts/response.py b/ddpui/core/dashboard_chat/contracts/response.py index 477aa0a46..30d85c700 100644 --- a/ddpui/core/dashboard_chat/contracts/response.py +++ b/ddpui/core/dashboard_chat/contracts/response.py @@ -6,7 +6,7 @@ from django.core.serializers.json import DjangoJSONEncoder -from .intents import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.intents import DashboardChatIntent @dataclass(frozen=True) diff --git a/ddpui/core/dashboard_chat/orchestration/__init__.py b/ddpui/core/dashboard_chat/orchestration/__init__.py index 7897f8b2e..181b9610e 100644 --- a/ddpui/core/dashboard_chat/orchestration/__init__.py +++ b/ddpui/core/dashboard_chat/orchestration/__init__.py @@ -1,5 +1,6 @@ """LangGraph orchestration modules for dashboard chat.""" -from .orchestrator import DashboardChatRuntime, get_dashboard_chat_runtime - -__all__ = ["DashboardChatRuntime", "get_dashboard_chat_runtime"] +from ddpui.core.dashboard_chat.orchestration.orchestrator import ( + DashboardChatRuntime, + get_dashboard_chat_runtime, +) diff --git a/ddpui/core/dashboard_chat/orchestration/conversation.py b/ddpui/core/dashboard_chat/orchestration/conversation.py index 49bd20002..a46035643 100644 --- a/ddpui/core/dashboard_chat/orchestration/conversation.py +++ b/ddpui/core/dashboard_chat/orchestration/conversation.py @@ -10,7 +10,9 @@ ) from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -from .source_identifiers import chart_id_from_source_identifier +from ddpui.core.dashboard_chat.orchestration.source_identifiers import ( + chart_id_from_source_identifier, +) def extract_conversation_context( diff --git a/ddpui/core/dashboard_chat/orchestration/message_stack.py b/ddpui/core/dashboard_chat/orchestration/message_stack.py index af452910d..ac319ffc8 100644 --- a/ddpui/core/dashboard_chat/orchestration/message_stack.py +++ b/ddpui/core/dashboard_chat/orchestration/message_stack.py @@ -4,8 +4,11 @@ from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey -from .conversation import build_follow_up_context_prompt, detect_sql_modification_type -from .state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.conversation import ( + build_follow_up_context_prompt, + detect_sql_modification_type, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def build_new_query_messages( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index ea35eb01b..e923e89f3 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -5,7 +5,7 @@ from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.contracts import DashboardChatCitation, DashboardChatResponse -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def finalize_node(state: DashboardChatRuntimeState) -> dict[str, Any]: diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py index 4006bd3ef..acc60dc2b 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py @@ -4,17 +4,20 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatResponse -from ..message_stack import build_new_query_messages -from ..presentation import ( +from ddpui.core.dashboard_chat.orchestration.message_stack import build_new_query_messages +from ddpui.core.dashboard_chat.orchestration.presentation import ( build_usage_summary, compose_final_answer_text, determine_response_format, sql_result_columns, ) -from ..retrieval import build_citations, get_cached_query_embedding -from ..state import DashboardChatRuntimeState -from ..tools.loop import execute_tool_loop -from .helpers import merge_tool_loop_timing +from ddpui.core.dashboard_chat.orchestration.retrieval import ( + build_citations, + get_cached_query_embedding, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.tools.loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.nodes.helpers import merge_tool_loop_timing def handle_data_query_node( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py index 20ae99a6a..8cb90093a 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py @@ -4,17 +4,20 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatResponse -from ..message_stack import build_follow_up_messages -from ..presentation import ( +from ddpui.core.dashboard_chat.orchestration.message_stack import build_follow_up_messages +from ddpui.core.dashboard_chat.orchestration.presentation import ( build_usage_summary, compose_final_answer_text, determine_response_format, sql_result_columns, ) -from ..retrieval import build_citations, get_cached_query_embedding -from ..state import DashboardChatRuntimeState -from ..tools.loop import execute_tool_loop -from .helpers import merge_tool_loop_timing +from ddpui.core.dashboard_chat.orchestration.retrieval import ( + build_citations, + get_cached_query_embedding, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.tools.loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.nodes.helpers import merge_tool_loop_timing def handle_follow_up_node( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py index c7296f927..f9f18d0aa 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py @@ -4,8 +4,8 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ..presentation import build_usage_summary -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.presentation import build_usage_summary +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def handle_irrelevant_node( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py index 1be10fa3c..7d65e45c9 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py @@ -4,8 +4,11 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ..presentation import build_usage_summary, clarification_fallback -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.presentation import ( + build_usage_summary, + clarification_fallback, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def handle_needs_clarification_node( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py index 807222647..046653ca2 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py @@ -4,8 +4,11 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ..presentation import build_usage_summary, compose_small_talk_response -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.presentation import ( + build_usage_summary, + compose_small_talk_response, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def handle_small_talk_node( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py b/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py index f236d5477..d94679fae 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py @@ -2,7 +2,7 @@ from typing import Any -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def route_after_intent(state: DashboardChatRuntimeState) -> str: diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py index 6e5c98a3f..45c3ab632 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py @@ -2,8 +2,8 @@ from typing import Any -from ..session_snapshot import load_session_snapshot -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.session_snapshot import load_session_snapshot +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def load_context_node(state: DashboardChatRuntimeState) -> dict[str, Any]: diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py index 09bc128c3..b7f23560d 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py @@ -2,9 +2,12 @@ from typing import Any -from ..conversation import extract_conversation_context -from ..presentation import build_fast_path_intent, build_fast_path_small_talk_response -from ..state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.conversation import extract_conversation_context +from ddpui.core.dashboard_chat.orchestration.presentation import ( + build_fast_path_intent, + build_fast_path_small_talk_response, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def route_intent_node(state: DashboardChatRuntimeState, llm_client) -> dict[str, Any]: diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index 009defa66..475134cfd 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -15,18 +15,25 @@ from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org -from .conversation import normalize_conversation_history -from .nodes.finalize import finalize_node -from .nodes.handle_data_query import handle_data_query_node -from .nodes.handle_follow_up import handle_follow_up_node -from .nodes.handle_irrelevant import handle_irrelevant_node -from .nodes.handle_needs_clarification import handle_needs_clarification_node -from .nodes.handle_small_talk import handle_small_talk_node -from .nodes.helpers import route_after_intent -from .nodes.load_context import load_context_node -from .nodes.route_intent import route_intent_node -from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN -from .tools.specifications import DASHBOARD_CHAT_TOOL_SPECIFICATIONS +from ddpui.core.dashboard_chat.orchestration.conversation import normalize_conversation_history +from ddpui.core.dashboard_chat.orchestration.nodes.finalize import finalize_node +from ddpui.core.dashboard_chat.orchestration.nodes.handle_data_query import handle_data_query_node +from ddpui.core.dashboard_chat.orchestration.nodes.handle_follow_up import handle_follow_up_node +from ddpui.core.dashboard_chat.orchestration.nodes.handle_irrelevant import handle_irrelevant_node +from ddpui.core.dashboard_chat.orchestration.nodes.handle_needs_clarification import ( + handle_needs_clarification_node, +) +from ddpui.core.dashboard_chat.orchestration.nodes.handle_small_talk import handle_small_talk_node +from ddpui.core.dashboard_chat.orchestration.nodes.helpers import route_after_intent +from ddpui.core.dashboard_chat.orchestration.nodes.load_context import load_context_node +from ddpui.core.dashboard_chat.orchestration.nodes.route_intent import route_intent_node +from ddpui.core.dashboard_chat.orchestration.state import ( + DashboardChatRuntimeState, + SMALL_TALK_FAST_PATH_PATTERN, +) +from ddpui.core.dashboard_chat.orchestration.tools.specifications import ( + DASHBOARD_CHAT_TOOL_SPECIFICATIONS, +) def _timed_node(node_name: str, handler): diff --git a/ddpui/core/dashboard_chat/orchestration/presentation.py b/ddpui/core/dashboard_chat/orchestration/presentation.py index 676ece334..487553929 100644 --- a/ddpui/core/dashboard_chat/orchestration/presentation.py +++ b/ddpui/core/dashboard_chat/orchestration/presentation.py @@ -11,7 +11,10 @@ ) from ddpui.utils.custom_logger import CustomLogger -from .state import DashboardChatRuntimeState, SMALL_TALK_FAST_PATH_PATTERN +from ddpui.core.dashboard_chat.orchestration.state import ( + DashboardChatRuntimeState, + SMALL_TALK_FAST_PATH_PATTERN, +) logger = CustomLogger("dashboard_chat") diff --git a/ddpui/core/dashboard_chat/orchestration/retrieval.py b/ddpui/core/dashboard_chat/orchestration/retrieval.py index f1ba55448..fe7fd8661 100644 --- a/ddpui/core/dashboard_chat/orchestration/retrieval.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval.py @@ -13,7 +13,7 @@ ) from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from .source_identifiers import ( +from ddpui.core.dashboard_chat.orchestration.source_identifiers import ( chart_id_from_source_identifier, unique_id_from_source_identifier, ) diff --git a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py index 19609b704..d6ceaaeec 100644 --- a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py +++ b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py @@ -18,7 +18,7 @@ ) from ddpui.services.dashboard_service import DashboardService -from .state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState def load_session_snapshot(state: DashboardChatRuntimeState) -> dict[str, Any]: diff --git a/ddpui/core/dashboard_chat/orchestration/tools/cache.py b/ddpui/core/dashboard_chat/orchestration/tools/cache.py index 66620a967..a3affbdee 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/cache.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/cache.py @@ -8,16 +8,19 @@ from ddpui.models.org import Org from ddpui.utils.custom_logger import CustomLogger -from ..retrieval import ( +from ddpui.core.dashboard_chat.orchestration.retrieval import ( retrieve_vector_documents, filter_allowlisted_dbt_results, dedupe_retrieved_documents, build_tool_document_payload, get_cached_query_embedding, ) -from ..session_snapshot import persist_session_schema_cache, persist_session_distinct_cache -from ..state import DashboardChatRuntimeState -from .sql_parsing import ( +from ddpui.core.dashboard_chat.orchestration.session_snapshot import ( + persist_session_schema_cache, + persist_session_distinct_cache, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.tools.sql_parsing import ( table_references as sql_table_references, resolve_identifier_table, tables_with_column, diff --git a/ddpui/core/dashboard_chat/orchestration/tools/handlers.py b/ddpui/core/dashboard_chat/orchestration/tools/handlers.py index 0855b0a3e..f2ce713f5 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/handlers.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/handlers.py @@ -12,16 +12,18 @@ from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -from ..conversation import extract_requested_follow_up_dimension -from ..retrieval import ( +from ddpui.core.dashboard_chat.orchestration.conversation import ( + extract_requested_follow_up_dimension, +) +from ddpui.core.dashboard_chat.orchestration.retrieval import ( retrieve_vector_documents, filter_allowlisted_dbt_results, dedupe_retrieved_documents, build_tool_document_payload, get_cached_query_embedding, ) -from ..state import DashboardChatRuntimeState -from .cache import ( +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.tools.cache import ( get_turn_warehouse_tools, get_cached_schema_snippets, has_validated_distinct_value, @@ -30,7 +32,7 @@ record_validated_filters_from_sql, dbt_resources_by_unique_id, ) -from .sql_parsing import ( +from ddpui.core.dashboard_chat.orchestration.tools.sql_parsing import ( table_references, resolve_identifier_table, tables_with_column, diff --git a/ddpui/core/dashboard_chat/orchestration/tools/loop.py b/ddpui/core/dashboard_chat/orchestration/tools/loop.py index f558a1a41..a66496f48 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/loop.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/loop.py @@ -9,15 +9,17 @@ from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseToolsError from ddpui.utils.custom_logger import CustomLogger -from ..presentation import ( +from ddpui.core.dashboard_chat.orchestration.presentation import ( serialize_tool_result, summarize_tool_call, max_turns_message, fallback_answer_text, ) -from ..state import DashboardChatRuntimeState -from .cache import seed_distinct_cache_from_previous_sql -from .handlers import ( +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.tools.cache import ( + seed_distinct_cache_from_previous_sql, +) +from ddpui.core.dashboard_chat.orchestration.tools.handlers import ( handle_retrieve_docs_tool, handle_get_schema_snippets_tool, handle_search_dbt_models_tool, diff --git a/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py index 5b0f5401d..cbfedb3f1 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py @@ -6,6 +6,7 @@ from ddpui.core.dashboard_chat.context.allowlist import normalize_dashboard_chat_table_name from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.orchestration.conversation import extract_dimensions_from_sql from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard @@ -286,8 +287,6 @@ def find_tables_with_column( def structural_dimensions_from_sql(sql: str) -> set[str]: """Return normalized non-aggregate dimensions used by one SQL statement.""" - from ..conversation import extract_dimensions_from_sql - if not sql: return set() From a045439c5eb933d58cc783b66fae8eac10dd29a2 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Fri, 27 Mar 2026 01:43:06 +0530 Subject: [PATCH 22/49] updates vecto store refactor --- ddpui/celeryworkers/tasks.py | 12 +- ddpui/core/dashboard_chat/config.py | 28 +-- .../orchestration/orchestrator.py | 16 +- ddpui/core/dashboard_chat/vector/building.py | 51 +++-- ddpui/core/dashboard_chat/vector/store.py | 206 ++++++++--------- .../tests/core/dashboard_chat/test_runtime.py | 80 ++++--- ddpui/tests/core/dashboard_chat/test_tasks.py | 8 +- .../dashboard_chat/test_vector_building.py | 40 ++-- .../core/dashboard_chat/test_vector_store.py | 25 ++- ddpui/utils/vector/__init__.py | 2 + ddpui/utils/vector/backends/__init__.py | 0 ddpui/utils/vector/backends/chroma.py | 171 +++++++++++++++ ddpui/utils/vector/chroma/__init__.py | 12 - ddpui/utils/vector/chroma/client.py | 11 - ddpui/utils/vector/chroma/store.py | 207 ------------------ ddpui/utils/vector/chroma/types.py | 23 -- ddpui/utils/vector/interface.py | 93 ++++++++ 17 files changed, 496 insertions(+), 489 deletions(-) create mode 100644 ddpui/utils/vector/backends/__init__.py create mode 100644 ddpui/utils/vector/backends/chroma.py delete mode 100644 ddpui/utils/vector/chroma/__init__.py delete mode 100644 ddpui/utils/vector/chroma/client.py delete mode 100644 ddpui/utils/vector/chroma/store.py delete mode 100644 ddpui/utils/vector/chroma/types.py create mode 100644 ddpui/utils/vector/interface.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index 3971aa5e2..ec14e6654 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -75,7 +75,7 @@ ) from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError -from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildService from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers @@ -1342,7 +1342,7 @@ def build_dashboard_chat_context_for_org(self, org_id: int): return {"status": "skipped_locked", "org_id": org_id} try: - result = DashboardChatVectorBuildService().build_org_vector_context(org) + result = OrgVectorBuildService().build_org_vector_context(org) return { "status": "completed", "org_id": org_id, @@ -1429,9 +1429,7 @@ def run_dashboard_chat_turn(session_id: str, user_message_id: int): .filter(session_id=session_id) .first() ) - user_message = ( - DashboardChatMessage.objects.filter(id=user_message_id, role="user").first() - ) + user_message = DashboardChatMessage.objects.filter(id=user_message_id, role="user").first() if session is not None and session.dashboard is not None and user_message is not None: publish_dashboard_chat_event( str(session.session_id), @@ -1493,9 +1491,7 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: assistant_payload = { key: value for key, value in response_payload.items() if key != "answer_text" } - timing_breakdown = ( - dict(response_payload.get("metadata") or {}).get("timing_breakdown") or {} - ) + timing_breakdown = dict(response_payload.get("metadata") or {}).get("timing_breakdown") or {} assistant_message = create_dashboard_chat_assistant_message( session=session, content=response.answer_text, diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 6064424d0..493bad3a1 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -58,9 +58,7 @@ def from_env(cls) -> "DashboardChatSourceConfig": env_value = _parse_enabled_source_types_env( os.getenv("AI_DASHBOARD_CHAT_ENABLED_SOURCE_TYPES") ) - return cls( - enabled_source_types=env_value or _default_enabled_source_types() - ) + return cls(enabled_source_types=env_value or _default_enabled_source_types()) def is_enabled(self, source_type: DashboardChatSourceType) -> bool: """Return whether the given source type should participate in runtime work.""" @@ -71,20 +69,16 @@ def filter_enabled( source_types: Sequence[DashboardChatSourceType], ) -> list[DashboardChatSourceType]: """Keep only the configured source types from a requested set.""" - return [ - source_type - for source_type in source_types - if self.is_enabled(source_type) - ] + return [source_type for source_type in source_types if self.is_enabled(source_type)] @dataclass(frozen=True) class DashboardChatVectorStoreConfig: - """Environment-backed configuration for the Chroma sidecar and embeddings.""" + """Environment-backed configuration for the vector store backend and embeddings.""" - chroma_host: str = "localhost" - chroma_port: int = 8003 - chroma_ssl: bool = False + vector_store_host: str = "localhost" + vector_store_port: int = 8003 + vector_store_ssl: bool = False collection_prefix: str = "org_" embedding_model: str = "text-embedding-3-small" @@ -92,9 +86,9 @@ class DashboardChatVectorStoreConfig: def from_env(cls) -> "DashboardChatVectorStoreConfig": """Build vector store config from environment variables.""" return cls( - chroma_host=os.getenv("AI_DASHBOARD_CHAT_CHROMA_HOST", "localhost"), - chroma_port=int(os.getenv("AI_DASHBOARD_CHAT_CHROMA_PORT", "8003")), - chroma_ssl=_parse_bool(os.getenv("AI_DASHBOARD_CHAT_CHROMA_SSL"), False), + vector_store_host=os.getenv("AI_DASHBOARD_CHAT_CHROMA_HOST", "localhost"), + vector_store_port=int(os.getenv("AI_DASHBOARD_CHAT_CHROMA_PORT", "8003")), + vector_store_ssl=_parse_bool(os.getenv("AI_DASHBOARD_CHAT_CHROMA_SSL"), False), collection_prefix=os.getenv("AI_DASHBOARD_CHAT_CHROMA_COLLECTION_PREFIX", "org_"), embedding_model=os.getenv( "AI_DASHBOARD_CHAT_CHROMA_EMBEDDING_MODEL", @@ -124,8 +118,6 @@ def from_env(cls) -> "DashboardChatRuntimeConfig": llm_max_attempts=int(os.getenv("AI_DASHBOARD_CHAT_LLM_MAX_ATTEMPTS", "1")), retrieval_limit=int(os.getenv("AI_DASHBOARD_CHAT_RETRIEVAL_LIMIT", "6")), max_query_rows=int(os.getenv("AI_DASHBOARD_CHAT_MAX_QUERY_ROWS", "200")), - max_distinct_values=int( - os.getenv("AI_DASHBOARD_CHAT_MAX_DISTINCT_VALUES", "50") - ), + max_distinct_values=int(os.getenv("AI_DASHBOARD_CHAT_MAX_DISTINCT_VALUES", "50")), max_schema_tables=int(os.getenv("AI_DASHBOARD_CHAT_MAX_SCHEMA_TABLES", "4")), ) diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index 475134cfd..796b0a9b1 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -3,7 +3,7 @@ from collections.abc import Callable, Sequence from functools import lru_cache from time import perf_counter -from typing import Any +from typing import Any, Union from langgraph.graph import END, START, StateGraph @@ -11,7 +11,7 @@ from ddpui.core.dashboard_chat.agents.interface import DashboardChatLlmClient from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient from ddpui.core.dashboard_chat.contracts import DashboardChatResponse -from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.vector.store import OrgVectorStore from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools from ddpui.models.org import Org @@ -146,15 +146,15 @@ class DashboardChatRuntime: def __init__( self, - vector_store: ChromaDashboardChatVectorStore | None = None, - llm_client: DashboardChatLlmClient | None = None, - warehouse_tools_factory: Callable[[Org], DashboardChatWarehouseTools] | None = None, - runtime_config: DashboardChatRuntimeConfig | None = None, - source_config: DashboardChatSourceConfig | None = None, + vector_store: Union[OrgVectorStore, None] = None, + llm_client: Union[DashboardChatLlmClient, None] = None, + warehouse_tools_factory: Union[Callable[[Org], DashboardChatWarehouseTools], None] = None, + runtime_config: Union[DashboardChatRuntimeConfig, None] = None, + source_config: Union[DashboardChatSourceConfig, None] = None, ): self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() self.source_config = source_config or DashboardChatSourceConfig.from_env() - self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.vector_store = vector_store or OrgVectorStore() self.llm_client = llm_client or OpenAIDashboardChatLlmClient( model=self.runtime_config.llm_model, timeout_ms=self.runtime_config.llm_timeout_ms, diff --git a/ddpui/core/dashboard_chat/vector/building.py b/ddpui/core/dashboard_chat/vector/building.py index 2f6c8f63e..4692886e2 100644 --- a/ddpui/core/dashboard_chat/vector/building.py +++ b/ddpui/core/dashboard_chat/vector/building.py @@ -2,18 +2,18 @@ from dataclasses import dataclass from datetime import timedelta -from typing import Callable +from typing import Callable, Union from django.utils import timezone +from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.context.dbt_docs import ( DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) -from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig from ddpui.core.dashboard_chat.vector.builder import DashboardChatVectorDocumentBuilder from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore +from ddpui.core.dashboard_chat.vector.store import OrgVectorStore from ddpui.models.dashboard_chat import DashboardChatSession from ddpui.models.org import Org @@ -26,43 +26,45 @@ ] -class DashboardChatVectorBuildError(Exception): - """Raised when the dashboard chat vector build cannot complete.""" +class OrgVectorBuildError(Exception): + """Raised when an org vector build cannot complete.""" @dataclass(frozen=True) -class DashboardChatVectorBuildResult: +class OrgVectorBuildResult: """Summary of one completed org vector build.""" org_id: int - docs_generated_at: timezone.datetime | None + docs_generated_at: Union[timezone.datetime, None] vector_ingested_at: timezone.datetime source_document_counts: dict[str, int] upserted_document_ids: list[str] deleted_document_ids: list[str] -class DashboardChatVectorBuildService: - """Build org-scoped dashboard-chat vector context and sync it into Chroma.""" +class OrgVectorBuildService: + """Build org-scoped dashboard-chat vector context and sync it into the vector store.""" def __init__( self, - vector_store: ChromaDashboardChatVectorStore | None = None, - dbt_docs_generator: Callable[[Org, object], DashboardChatDbtDocsArtifacts] | None = None, - source_config: DashboardChatSourceConfig | None = None, - document_builder: DashboardChatVectorDocumentBuilder | None = None, + vector_store: Union[OrgVectorStore, None] = None, + dbt_docs_generator: Union[ + Callable[[Org, object], DashboardChatDbtDocsArtifacts], None + ] = None, + source_config: Union[DashboardChatSourceConfig, None] = None, + document_builder: Union[DashboardChatVectorDocumentBuilder, None] = None, ): - self.vector_store = vector_store or ChromaDashboardChatVectorStore() + self.vector_store = vector_store or OrgVectorStore() self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts self.source_config = source_config or DashboardChatSourceConfig.from_env() self.document_builder = document_builder or DashboardChatVectorDocumentBuilder( source_config=self.source_config ) - def build_org_vector_context(self, org: Org) -> DashboardChatVectorBuildResult: + def build_org_vector_context(self, org: Org) -> OrgVectorBuildResult: """Run dbt docs generation and rebuild the desired vector documents for an org.""" if org.dbt is None: - raise DashboardChatVectorBuildError("dbt workspace not configured") + raise OrgVectorBuildError("dbt workspace not configured") collection_versioned_at = timezone.now() target_collection_name = self.vector_store.collection_name( @@ -81,11 +83,14 @@ def build_org_vector_context(self, org: Org) -> DashboardChatVectorBuildResult: if self.source_config.is_enabled(source_type) for document in documents_by_source[source_type.value] ] - if self.vector_store.load_collection( - org.id, - collection_name=target_collection_name, - allow_legacy_fallback=False, - ) is not None: + if ( + self.vector_store.load_collection( + org.id, + collection_name=target_collection_name, + allow_legacy_fallback=False, + ) + is not None + ): self.vector_store.delete_collection( org.id, collection_name=target_collection_name, @@ -107,7 +112,7 @@ def build_org_vector_context(self, org: Org) -> DashboardChatVectorBuildResult: active_collection_name=target_collection_name, ) - return DashboardChatVectorBuildResult( + return OrgVectorBuildResult( org_id=org.id, docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, vector_ingested_at=vector_ingested_at, @@ -129,7 +134,7 @@ def _garbage_collect_inactive_collections( org: Org, active_collection_name: str, ) -> None: - """Delete old versioned collections that are not pinned by recent chat sessions.""" + """Delete old versioned collections not pinned by recent chat sessions.""" retention_cutoff = timezone.now() - timedelta(hours=24) recent_sessions = DashboardChatSession.objects.filter( org=org, diff --git a/ddpui/core/dashboard_chat/vector/store.py b/ddpui/core/dashboard_chat/vector/store.py index 148fa4139..55897ef75 100644 --- a/ddpui/core/dashboard_chat/vector/store.py +++ b/ddpui/core/dashboard_chat/vector/store.py @@ -1,14 +1,8 @@ -"""Dashboard-chat vector retrieval built on top of the shared Chroma transport.""" +"""Org-scoped vector store for dashboard chat retrieval.""" from collections.abc import Sequence -from typing import Any +from typing import Any, Union -from chromadb import ClientAPI - -from ddpui.core.dashboard_chat.vector.embeddings import ( - DashboardChatEmbeddingProvider, - OpenAIEmbeddingProvider, -) from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, @@ -16,44 +10,41 @@ build_dashboard_chat_collection_base_name, build_dashboard_chat_collection_name, ) -from ddpui.utils.vector.chroma import ( - ChromaHttpVectorStore, - ChromaQueryResult, - ChromaStoredDocument, +from ddpui.core.dashboard_chat.vector.embeddings import ( + DashboardChatEmbeddingProvider, + OpenAIEmbeddingProvider, ) +from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument + + +def _default_backend(config: DashboardChatVectorStoreConfig) -> VectorStore: + """Build the default vector store backend from config.""" + from ddpui.utils.vector.backends.chroma import ChromaVectorStore -DashboardChatVectorQueryResult = ChromaQueryResult -DashboardChatStoredDocument = ChromaStoredDocument + return ChromaVectorStore( + host=config.vector_store_host, + port=config.vector_store_port, + ssl=config.vector_store_ssl, + ) -class ChromaDashboardChatVectorStore: - """Dashboard-chat-specific adapter on top of the generic Chroma wrapper.""" +class OrgVectorStore: + """Org-scoped vector retrieval layer for dashboard chat.""" def __init__( self, - config: DashboardChatVectorStoreConfig | None = None, - embedding_provider: DashboardChatEmbeddingProvider | None = None, - client: ClientAPI | None = None, - chroma_store: ChromaHttpVectorStore | None = None, + config: Union[DashboardChatVectorStoreConfig, None] = None, + embedding_provider: Union[DashboardChatEmbeddingProvider, None] = None, + backend: Union[VectorStore, None] = None, ): self.config = config or DashboardChatVectorStoreConfig.from_env() self.embedding_provider = embedding_provider or OpenAIEmbeddingProvider( model=self.config.embedding_model ) - self.chroma_store = chroma_store or ChromaHttpVectorStore( - host=self.config.chroma_host, - port=self.config.chroma_port, - ssl=self.config.chroma_ssl, - client=client, - ) + self.backend = backend or _default_backend(self.config) - def collection_name( - self, - org_id: int, - *, - version: Any = None, - ) -> str: - """Return the Chroma collection name for an org.""" + def collection_name(self, org_id: int, *, version: Any = None) -> str: + """Return the collection name for an org, optionally versioned.""" return build_dashboard_chat_collection_name( org_id, self.config.collection_prefix, @@ -64,121 +55,106 @@ def create_collection( self, org_id: int, *, - collection_name: str | None = None, + collection_name: Union[str, None] = None, ) -> Any: - """Create or load the Chroma collection for an org.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.create_collection( - name=resolved_collection_name, - metadata={"org_id": str(org_id)}, - ) + """Create or load the collection for an org.""" + resolved = collection_name or self.collection_name(org_id) + return self.backend.create_collection(resolved, metadata={"org_id": str(org_id)}) def load_collection( self, org_id: int, *, - collection_name: str | None = None, + collection_name: Union[str, None] = None, allow_legacy_fallback: bool = True, - ) -> Any | None: - """Load an existing Chroma collection for an org.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - collection = self.chroma_store.load_collection(resolved_collection_name) + ) -> Union[Any, None]: + """Load an existing collection for an org.""" + resolved = collection_name or self.collection_name(org_id) + collection = self.backend.load_collection(resolved) if collection is not None or collection_name is None or not allow_legacy_fallback: return collection - return self.chroma_store.load_collection( - build_dashboard_chat_collection_base_name( - org_id, - self.config.collection_prefix, - ) + return self.backend.load_collection( + build_dashboard_chat_collection_base_name(org_id, self.config.collection_prefix) ) def delete_collection( self, org_id: int, *, - collection_name: str | None = None, + collection_name: Union[str, None] = None, ) -> bool: - """Delete the Chroma collection for an org if it exists.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.delete_collection(resolved_collection_name) + """Delete the collection for an org if it exists.""" + resolved = collection_name or self.collection_name(org_id) + return self.backend.delete_collection(resolved) def list_collection_names(self) -> list[str]: - """Return all Chroma collection names for the current client.""" - return self.chroma_store.list_collection_names() + """Return all collection names in the backend.""" + return self.backend.list_collection_names() def list_org_collection_names(self, org_id: int) -> list[str]: """Return all collection names that belong to one org.""" base_name = build_dashboard_chat_collection_base_name(org_id, self.config.collection_prefix) return [ - collection_name - for collection_name in self.list_collection_names() - if collection_name == base_name or collection_name.startswith(f"{base_name}__") + name + for name in self.list_collection_names() + if name == base_name or name.startswith(f"{base_name}__") ] def get_documents( self, org_id: int, - source_types: Sequence[DashboardChatSourceType] | None = None, - dashboard_id: int | None = None, + source_types: Union[Sequence[DashboardChatSourceType], None] = None, + dashboard_id: Union[int, None] = None, include_documents: bool = False, - collection_name: str | None = None, - ) -> list[DashboardChatStoredDocument]: + collection_name: Union[str, None] = None, + ) -> list[VectorStoredDocument]: """Load stored documents for an org using metadata filters.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.get_documents( - resolved_collection_name, - where=self._build_vector_metadata_filter( - source_types=source_types, - dashboard_id=dashboard_id, - ), + resolved = collection_name or self.collection_name(org_id) + return self.backend.get_documents( + resolved, + where=self._build_filter(source_types=source_types, dashboard_id=dashboard_id), include_documents=include_documents, ) def delete_documents( self, org_id: int, - ids: list[str] | None = None, - source_types: Sequence[DashboardChatSourceType] | None = None, - dashboard_id: int | None = None, - collection_name: str | None = None, + ids: Union[list[str], None] = None, + source_types: Union[Sequence[DashboardChatSourceType], None] = None, + dashboard_id: Union[int, None] = None, + collection_name: Union[str, None] = None, ) -> int: """Delete matching documents from an org collection.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.delete_documents( - resolved_collection_name, + resolved = collection_name or self.collection_name(org_id) + return self.backend.delete_documents( + resolved, ids=ids, - where=self._build_vector_metadata_filter( - source_types=source_types, - dashboard_id=dashboard_id, - ), + where=self._build_filter(source_types=source_types, dashboard_id=dashboard_id), ) def upsert_documents( self, org_id: int, documents: list[DashboardChatVectorDocument], - collection_name: str | None = None, + collection_name: Union[str, None] = None, ) -> list[str]: - """Upsert documents into the org-specific Chroma collection.""" + """Upsert documents into the org-specific collection.""" if not documents: return [] - - contents = [document.content for document in documents] - document_ids = [document.document_id for document in documents] - metadatas = [document.metadata() for document in documents] + contents = [doc.content for doc in documents] embeddings = self.embedding_provider.embed_documents(contents) - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.upsert_documents( - resolved_collection_name, - ids=document_ids, + resolved = collection_name or self.collection_name(org_id) + return self.backend.upsert( + resolved, + ids=[doc.document_id for doc in documents], documents=contents, - metadatas=metadatas, + metadatas=[doc.metadata() for doc in documents], embeddings=embeddings, collection_metadata={"org_id": str(org_id)}, ) def embed_query(self, query_text: str) -> list[float]: - """Build one query embedding that can be reused across filtered retrieval calls.""" + """Embed one query string.""" return self.embedding_provider.embed_query(query_text) def reset_usage(self) -> None: @@ -197,41 +173,35 @@ def query( org_id: int, query_text: str, n_results: int = 5, - source_types: Sequence[DashboardChatSourceType] | None = None, - dashboard_id: int | None = None, - query_embedding: list[float] | None = None, - collection_name: str | None = None, - ) -> list[DashboardChatVectorQueryResult]: - """Query the org-specific Chroma collection.""" - resolved_collection_name = collection_name or self.collection_name(org_id) - return self.chroma_store.query( - resolved_collection_name, + source_types: Union[Sequence[DashboardChatSourceType], None] = None, + dashboard_id: Union[int, None] = None, + query_embedding: Union[list[float], None] = None, + collection_name: Union[str, None] = None, + ) -> list[VectorQueryResult]: + """Query the org-specific collection.""" + resolved = collection_name or self.collection_name(org_id) + return self.backend.query( + resolved, query_embedding=query_embedding or self.embed_query(query_text), n_results=n_results, - where=self._build_vector_metadata_filter( - source_types=source_types, - dashboard_id=dashboard_id, - ), + where=self._build_filter(source_types=source_types, dashboard_id=dashboard_id), ) @staticmethod - def _build_vector_metadata_filter( - source_types: Sequence[DashboardChatSourceType] | None = None, - dashboard_id: int | None = None, - ) -> dict[str, Any] | None: - """Build the metadata filter used for Chroma queries.""" + def _build_filter( + source_types: Union[Sequence[DashboardChatSourceType], None] = None, + dashboard_id: Union[int, None] = None, + ) -> Union[dict[str, Any], None]: + """Build the metadata filter for collection queries.""" filters: list[dict[str, Any]] = [] - if source_types: - normalized_types = [source_type.value for source_type in source_types] - if len(normalized_types) == 1: - filters.append({"source_type": normalized_types[0]}) + normalized = [st.value for st in source_types] + if len(normalized) == 1: + filters.append({"source_type": normalized[0]}) else: - filters.append({"source_type": {"$in": normalized_types}}) - + filters.append({"source_type": {"$in": normalized}}) if dashboard_id is not None: filters.append({"dashboard_id": dashboard_id}) - if not filters: return None if len(filters) == 1: diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 64f2b3f81..1f86b6d84 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -13,6 +13,8 @@ ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.orchestration.orchestrator import DashboardChatRuntime +from ddpui.core.dashboard_chat.orchestration.conversation import extract_conversation_context +from ddpui.core.dashboard_chat.orchestration.presentation import determine_response_format from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, @@ -24,7 +26,7 @@ ) from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector.store import DashboardChatVectorQueryResult +from ddpui.utils.vector.interface import VectorQueryResult as DashboardChatVectorQueryResult from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org from ddpui.models.org_user import OrgUser @@ -77,7 +79,10 @@ def query( for source_type in (source_types or []) } for row in self.rows: - if normalized_source_types and row.metadata.get("source_type") not in normalized_source_types: + if ( + normalized_source_types + and row.metadata.get("source_type") not in normalized_source_types + ): continue if dashboard_id is not None and row.metadata.get("dashboard_id") != dashboard_id: continue @@ -195,7 +200,10 @@ def execute_sql(self, sql): {"donor_type": "Grant", "beneficiary_count": 80}, {"donor_type": "Corporate", "beneficiary_count": 40}, ] - if "analytics.stg_donor_funding_clean" in sql and "GROUP BY quarter_label, donor_type" in sql: + if ( + "analytics.stg_donor_funding_clean" in sql + and "GROUP BY quarter_label, donor_type" in sql + ): return [ { "quarter_label": "2025 Q1", @@ -315,7 +323,10 @@ def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): { "id": "call-1", "name": "retrieve_docs", - "args": {"query": "How many beneficiaries are in Education?", "types": ["chart"]}, + "args": { + "query": "How many beneficiaries are in Education?", + "types": ["chart"], + }, } ], }, @@ -420,7 +431,9 @@ def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): if self.turn == 2: tool_messages = [message for message in messages if message["role"] == "tool"] assert any("column_not_in_table" in message["content"] for message in tool_messages) - assert any("analytics.stg_program_reach" in message["content"] for message in tool_messages) + assert any( + "analytics.stg_program_reach" in message["content"] for message in tool_messages + ) self.turn += 1 return { "content": "", @@ -495,7 +508,9 @@ def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): } if self.turn == 2: tool_messages = [message for message in messages if message["role"] == "tool"] - assert any("requested_dimension_missing" in message["content"] for message in tool_messages) + assert any( + "requested_dimension_missing" in message["content"] for message in tool_messages + ) self.turn += 1 return { "content": "", @@ -673,11 +688,14 @@ def primary_dashboard(org, orguser, primary_chart): yield dashboard dashboard.delete() + def test_extract_conversation_context_reads_previous_sql_payload(): """Follow-up routing should recover prior SQL context from assistant payloads.""" - conversation_context = DashboardChatRuntime._extract_conversation_context( + conversation_context = extract_conversation_context( [ - DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="user", content="How many beneficiaries do we have?" + ), DashboardChatConversationMessage( role="assistant", content="There are 120 beneficiaries.", @@ -710,7 +728,7 @@ def test_seed_distinct_cache_reuses_previous_text_filters(primary_dashboard): ) state = { "dashboard_id": primary_dashboard.id, - "conversation_context": DashboardChatRuntime._extract_conversation_context( + "conversation_context": extract_conversation_context( [ DashboardChatConversationMessage( role="assistant", @@ -753,7 +771,7 @@ def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(p "analytics.stg_donor_funding_clean", } ), - "conversation_context": DashboardChatRuntime._extract_conversation_context( + "conversation_context": extract_conversation_context( [ DashboardChatConversationMessage( role="assistant", @@ -825,7 +843,11 @@ def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_d "analytics.donor_funding_quarterly", [ {"name": "quarter_label", "data_type": "text", "nullable": False}, - {"name": "total_realized_funding_usd", "data_type": "numeric", "nullable": False}, + { + "name": "total_realized_funding_usd", + "data_type": "numeric", + "nullable": False, + }, ], ), "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( @@ -863,9 +885,7 @@ def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): llm_client=SmallTalkLlm(), ) state = { - "allowlist": DashboardChatAllowlist( - allowed_tables={"analytics.stg_donor_funding_clean"} - ), + "allowlist": DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), "org": primary_dashboard.org, } execution_context = { @@ -1238,7 +1258,7 @@ def test_runtime_prompt_messages_do_not_inline_raw_human_context(primary_dashboa { "user_query": "Explain that metric", "human_context": "Organization context: duplicated markdown", - "conversation_context": DashboardChatRuntime._extract_conversation_context([]), + "conversation_context": extract_conversation_context([]), } ) @@ -1306,6 +1326,7 @@ def test_runtime_reuses_session_snapshot_across_turns(org, primary_dashboard): ] ) fake_warehouse = FakeWarehouseTools() + def build_runtime(): return DashboardChatRuntime( vector_store=vector_store, @@ -1453,7 +1474,9 @@ def test_runtime_follow_up_sql_corrects_after_failed_sql_attempt( dashboard_id=primary_dashboard.id, user_query="Now split that by donor type.", conversation_history=[ - DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="user", content="How many beneficiaries do we have?" + ), DashboardChatConversationMessage( role="assistant", content="There are 120 beneficiaries.", @@ -1547,9 +1570,10 @@ def test_runtime_dbt_tools_use_compact_allowlisted_index(): ) assert search_result["count"] >= 1 - assert { - model["table"] for model in search_result["models"] - } <= {"analytics.program_reach", "analytics.stg_program_reach"} + assert {model["table"] for model in search_result["models"]} <= { + "analytics.program_reach", + "analytics.stg_program_reach", + } assert info_result["model"] == "program_reach" assert info_result["upstream"] == ["analytics.stg_program_reach"] @@ -1609,7 +1633,9 @@ def test_runtime_follow_up_sql_rejects_query_that_ignores_requested_dimension( dashboard_id=primary_dashboard.id, user_query="Now split that by donor type.", conversation_history=[ - DashboardChatConversationMessage(role="user", content="How many beneficiaries do we have?"), + DashboardChatConversationMessage( + role="user", content="How many beneficiaries do we have?" + ), DashboardChatConversationMessage( role="assistant", content="There are 120 beneficiaries.", @@ -1621,9 +1647,7 @@ def test_runtime_follow_up_sql_rejects_query_that_ignores_requested_dimension( "WHERE quarter_label IN ('2025 Q1', '2025 Q2') " "ORDER BY quarter_label" ), - "metadata": { - "query_plan_tables": ["analytics.donor_funding_quarterly"] - }, + "metadata": {"query_plan_tables": ["analytics.donor_funding_quarterly"]}, }, ), ], @@ -1665,9 +1689,7 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr ), ), "user_query": "Now split that by donor type.", - "allowlist": DashboardChatAllowlist( - allowed_tables={"analytics.stg_donor_funding_clean"} - ), + "allowlist": DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), "org": primary_dashboard.org, } execution_context = { @@ -1842,9 +1864,7 @@ def test_tool_document_payload_exposes_structured_chart_metadata(): dashboard_id=6, distance=0.02, ), - DashboardChatAllowlist( - allowed_tables={"analytics.facilitator_effectiveness_quarterly"} - ), + DashboardChatAllowlist(allowed_tables={"analytics.facilitator_effectiveness_quarterly"}), { "dashboard": {"title": "Facilitator Effectiveness Studio"}, "charts": [ @@ -1985,7 +2005,7 @@ def test_compose_final_answer_text_uses_llm_and_normalizes_rate_values(): def test_determine_response_format_prefers_table_for_grouped_breakdowns(): """Grouped breakdowns should tell the frontend to render a structured table.""" - response_format = DashboardChatRuntime._determine_response_format( + response_format = determine_response_format( user_query="Give me a district wise pass rate breakdown", sql_results=[ { diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 064b4f172..8ef41b774 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -11,7 +11,7 @@ run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) -from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildResult +from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildResult from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.models.org import Org, OrgDbt from ddpui.models.dashboard import Dashboard @@ -115,7 +115,7 @@ def test_build_dashboard_chat_context_for_org_skips_when_locked(orguser): with patch( "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client - ), patch("ddpui.celeryworkers.tasks.DashboardChatVectorBuildService") as vector_build_service: + ), patch("ddpui.celeryworkers.tasks.OrgVectorBuildService") as vector_build_service: result = build_dashboard_chat_context_for_org.run(org.id) assert result == {"status": "skipped_locked", "org_id": org.id} @@ -134,7 +134,7 @@ def test_build_dashboard_chat_context_for_org_runs_vector_build(orguser): redis_client = Mock() redis_client.lock.return_value = redis_lock - result_payload = DashboardChatVectorBuildResult( + result_payload = OrgVectorBuildResult( org_id=org.id, docs_generated_at=timezone.now(), vector_ingested_at=timezone.now(), @@ -148,7 +148,7 @@ def test_build_dashboard_chat_context_for_org_runs_vector_build(orguser): with patch( "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client ), patch( - "ddpui.celeryworkers.tasks.DashboardChatVectorBuildService", + "ddpui.celeryworkers.tasks.OrgVectorBuildService", return_value=vector_build_service, ): result = build_dashboard_chat_context_for_org.run(org.id) diff --git a/ddpui/tests/core/dashboard_chat/test_vector_building.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py index a2a646aca..69c080bbc 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_building.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -15,12 +15,12 @@ generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.vector.building import DashboardChatVectorBuildService +from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildService from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, build_dashboard_chat_collection_name, ) -from ddpui.core.dashboard_chat.vector.store import DashboardChatStoredDocument +from ddpui.utils.vector.interface import VectorStoredDocument from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpprefect import DBTCLIPROFILE from ddpui.models.dashboard import Dashboard @@ -88,7 +88,7 @@ def get_documents( if dashboard_id is not None: rows = [row for row in rows if row.metadata.get("dashboard_id") == dashboard_id] return [ - DashboardChatStoredDocument( + VectorStoredDocument( document_id=row.document_id, metadata=row.metadata, content=row.content if include_documents else None, @@ -101,7 +101,7 @@ def upsert_documents(self, org_id, documents, collection_name=None): resolved_collection_name = collection_name or self.collection_name(org_id) org_documents = self.documents_by_collection.setdefault(resolved_collection_name, {}) for document in documents: - org_documents[document.document_id] = DashboardChatStoredDocument( + org_documents[document.document_id] = VectorStoredDocument( document_id=document.document_id, metadata=document.metadata(), content=document.content, @@ -347,7 +347,9 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera mock_git_manager.pull_changes.assert_called_once_with() -def test_build_org_vector_context_is_idempotent_and_removes_stale_docs(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_is_idempotent_and_removes_stale_docs( + org, orgdbt, orguser, dashboard +): """A repeated identical build should skip writes, and a removed source should be deleted.""" OrgAIContext.objects.create( org=org, @@ -413,7 +415,7 @@ def test_build_org_vector_context_is_idempotent_and_removes_stale_docs(org, orgd }, generated_at=timezone.now(), ) - service = DashboardChatVectorBuildService( + service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) @@ -449,7 +451,9 @@ def test_build_org_vector_context_is_idempotent_and_removes_stale_docs(org, orgd assert "dashboard_context" not in stored_source_types -def test_build_org_vector_context_keeps_collections_isolated_per_org(org, orgdbt, orguser, dashboard, seed_db): +def test_build_org_vector_context_keeps_collections_isolated_per_org( + org, orgdbt, orguser, dashboard, seed_db +): """The context build should never mix documents between org collections.""" other_org = Org.objects.create( name="Dashboard Chat Org 2", @@ -490,7 +494,7 @@ def test_build_org_vector_context_keeps_collections_isolated_per_org(org, orgdbt generated_at=timezone.now(), ) vector_store = FakeDashboardChatVectorStore() - service = DashboardChatVectorBuildService( + service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) @@ -511,7 +515,9 @@ def test_build_org_vector_context_keeps_collections_isolated_per_org(org, orgdbt other_org.delete() -def test_build_org_vector_context_keeps_last_good_context_when_upsert_fails(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_keeps_last_good_context_when_upsert_fails( + org, orgdbt, orguser, dashboard +): """A failed rebuild should not delete the previously indexed documents.""" OrgAIContext.objects.create( org=org, @@ -525,7 +531,7 @@ def test_build_org_vector_context_keeps_last_good_context_when_upsert_fails(org, catalog_json={"sources": {}, "nodes": {}}, generated_at=timezone.now(), ) - service = DashboardChatVectorBuildService( + service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) @@ -555,7 +561,9 @@ def _raise_on_upsert(org_id, documents, collection_name=None): assert vector_store.delete_calls == [] -def test_build_org_vector_context_deletes_disabled_source_documents(org, orgdbt, orguser, dashboard): +def test_build_org_vector_context_deletes_disabled_source_documents( + org, orgdbt, orguser, dashboard +): """Disabled source types should be omitted from the target document set.""" OrgAIContext.objects.create( org=org, @@ -575,13 +583,13 @@ def test_build_org_vector_context_deletes_disabled_source_documents(org, orgdbt, catalog_json={"sources": {}, "nodes": {}}, generated_at=timezone.now(), ) - initial_service = DashboardChatVectorBuildService( + initial_service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), ) initial_service.build_org_vector_context(org) - disabled_source_service = DashboardChatVectorBuildService( + disabled_source_service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=lambda org_instance, orgdbt_instance: artifacts.to_artifacts(), source_config=DashboardChatSourceConfig( @@ -605,11 +613,13 @@ def test_build_org_vector_context_deletes_disabled_source_documents(org, orgdbt, assert "dbt_catalog" not in stored_source_types -def test_build_org_vector_context_skips_dbt_docs_when_dbt_sources_are_disabled(org, orgdbt, dashboard): +def test_build_org_vector_context_skips_dbt_docs_when_dbt_sources_are_disabled( + org, orgdbt, dashboard +): """Disabling both dbt sources should skip dbt docs generation entirely.""" vector_store = FakeDashboardChatVectorStore() dbt_docs_generator = Mock(side_effect=AssertionError("dbt docs should not run")) - service = DashboardChatVectorBuildService( + service = OrgVectorBuildService( vector_store=vector_store, dbt_docs_generator=dbt_docs_generator, source_config=DashboardChatSourceConfig( diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index b555bf47a..a7d2e0a4a 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -9,7 +9,8 @@ DashboardChatVectorDocument, build_dashboard_chat_collection_name, ) -from ddpui.core.dashboard_chat.vector.store import ChromaDashboardChatVectorStore +from ddpui.utils.vector.backends.chroma import ChromaVectorStore +from ddpui.core.dashboard_chat.vector.store import OrgVectorStore class FakeEmbeddingProvider: @@ -134,9 +135,9 @@ def test_dashboard_chat_vector_store_config_reads_env(): ): config = DashboardChatVectorStoreConfig.from_env() - assert config.chroma_host == "chroma.internal" - assert config.chroma_port == 8100 - assert config.chroma_ssl is True + assert config.vector_store_host == "chroma.internal" + assert config.vector_store_port == 8100 + assert config.vector_store_ssl is True assert config.collection_prefix == "tenant_" assert config.embedding_model == "text-embedding-3-large" @@ -194,10 +195,10 @@ def test_vector_document_has_stable_id_and_required_metadata(): def test_upsert_documents_uses_embeddings_and_metadata(): """Upserts should use deterministic IDs, embeddings, and per-org collections.""" fake_client = FakeChromaClient() - store = ChromaDashboardChatVectorStore( + store = OrgVectorStore( config=DashboardChatVectorStoreConfig(collection_prefix="org_"), embedding_provider=FakeEmbeddingProvider(), - client=fake_client, + backend=ChromaVectorStore(client=fake_client), ) documents = [ DashboardChatVectorDocument( @@ -234,10 +235,10 @@ def test_query_scopes_to_org_collection_and_where_filters(): """Queries should stay inside the org collection and forward source/dashboard filters.""" fake_client = FakeChromaClient() fake_client.get_or_create_collection("org_3") - store = ChromaDashboardChatVectorStore( + store = OrgVectorStore( config=DashboardChatVectorStoreConfig(), embedding_provider=FakeEmbeddingProvider(), - client=fake_client, + backend=ChromaVectorStore(client=fake_client), ) results = store.query( @@ -262,10 +263,10 @@ def test_query_scopes_to_org_collection_and_where_filters(): def test_delete_collection_returns_false_for_missing_org(): """Deleting a missing collection should be a no-op.""" - store = ChromaDashboardChatVectorStore( + store = OrgVectorStore( config=DashboardChatVectorStoreConfig(), embedding_provider=FakeEmbeddingProvider(), - client=FakeChromaClient(), + backend=ChromaVectorStore(client=FakeChromaClient()), ) assert store.delete_collection(404) is False @@ -274,10 +275,10 @@ def test_delete_collection_returns_false_for_missing_org(): def test_get_documents_and_delete_documents_respect_where_filters(): """Collection reads and deletes should honor source and dashboard scoping.""" fake_client = FakeChromaClient() - store = ChromaDashboardChatVectorStore( + store = OrgVectorStore( config=DashboardChatVectorStoreConfig(), embedding_provider=FakeEmbeddingProvider(), - client=fake_client, + backend=ChromaVectorStore(client=fake_client), ) documents = [ DashboardChatVectorDocument( diff --git a/ddpui/utils/vector/__init__.py b/ddpui/utils/vector/__init__.py index 00cf8100d..b6fef17ab 100644 --- a/ddpui/utils/vector/__init__.py +++ b/ddpui/utils/vector/__init__.py @@ -1 +1,3 @@ """Generic vector-database utilities.""" + +from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument diff --git a/ddpui/utils/vector/backends/__init__.py b/ddpui/utils/vector/backends/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ddpui/utils/vector/backends/chroma.py b/ddpui/utils/vector/backends/chroma.py new file mode 100644 index 000000000..f5951f5df --- /dev/null +++ b/ddpui/utils/vector/backends/chroma.py @@ -0,0 +1,171 @@ +"""Chroma vector store backend implementing the VectorStore interface.""" + +from functools import lru_cache +from typing import Any, Union + +from chromadb import ClientAPI, Collection, HttpClient +from chromadb.errors import InvalidCollectionException + +from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument + + +@lru_cache(maxsize=8) +def _get_chroma_client(host: str, port: int, ssl: bool) -> ClientAPI: + """Return a shared Chroma HTTP client for one host/port/ssl tuple.""" + return HttpClient(host=host, port=port, ssl=ssl) + + +class ChromaVectorStore(VectorStore): + """Chroma HTTP backend implementing the VectorStore interface.""" + + def __init__( + self, + *, + host: str = "localhost", + port: int = 8000, + ssl: bool = False, + client: Union[ClientAPI, None] = None, + ): + self.client = client or _get_chroma_client(host, port, ssl) + + def create_collection( + self, + name: str, + *, + metadata: Union[dict[str, Any], None] = None, + ) -> Collection: + return self.client.get_or_create_collection(name=name, metadata=metadata) + + def load_collection(self, name: str) -> Union[Collection, None]: + try: + return self.client.get_collection(name=name) + except (InvalidCollectionException, ValueError): + return None + + def delete_collection(self, name: str) -> bool: + if self.load_collection(name) is None: + return False + self.client.delete_collection(name=name) + return True + + def list_collection_names(self) -> list[str]: + raw_collections = self.client.list_collections() + collection_names: list[str] = [] + for collection in raw_collections: + if isinstance(collection, str): + collection_names.append(collection) + continue + name = getattr(collection, "name", None) + if name: + collection_names.append(str(name)) + return collection_names + + def get_documents( + self, + collection_name: str, + *, + where: Union[dict[str, Any], None] = None, + include_documents: bool = False, + ) -> list[VectorStoredDocument]: + collection = self.load_collection(collection_name) + if collection is None: + return [] + include = ["metadatas"] + if include_documents: + include.append("documents") + result = collection.get(where=where, include=include) + return self._parse_get_response(result, include_documents=include_documents) + + def delete_documents( + self, + collection_name: str, + *, + ids: Union[list[str], None] = None, + where: Union[dict[str, Any], None] = None, + ) -> int: + collection = self.load_collection(collection_name) + if collection is None: + return 0 + if ids is None and where is None: + return 0 + deleted_count = ( + len(ids) if ids is not None else len(self.get_documents(collection_name, where=where)) + ) + collection.delete(ids=ids, where=where) + return deleted_count + + def upsert( + self, + collection_name: str, + *, + ids: list[str], + documents: list[str], + metadatas: list[dict[str, Any]], + embeddings: list[list[float]], + collection_metadata: Union[dict[str, Any], None] = None, + ) -> list[str]: + if not ids: + return [] + collection = self.create_collection(collection_name, metadata=collection_metadata) + collection.upsert(ids=ids, documents=documents, metadatas=metadatas, embeddings=embeddings) + return ids + + def query( + self, + collection_name: str, + *, + query_embedding: list[float], + n_results: int = 5, + where: Union[dict[str, Any], None] = None, + ) -> list[VectorQueryResult]: + collection = self.load_collection(collection_name) + if collection is None: + return [] + result = collection.query( + query_embeddings=[query_embedding], + n_results=n_results, + where=where, + include=["documents", "metadatas", "distances"], + ) + return self._parse_query_response(result) + + @staticmethod + def _parse_query_response(result: dict[str, Any]) -> list[VectorQueryResult]: + ids = result.get("ids", [[]]) + documents = result.get("documents", [[]]) + metadatas = result.get("metadatas", [[]]) + distances = result.get("distances", [[]]) + return [ + VectorQueryResult( + document_id=document_id, + content=content, + metadata=metadata, + distance=distance, + ) + for document_id, content, metadata, distance in zip( + ids[0] if ids else [], + documents[0] if documents else [], + metadatas[0] if metadatas else [], + distances[0] if distances else [], + ) + ] + + @staticmethod + def _parse_get_response( + result: dict[str, Any], + *, + include_documents: bool = False, + ) -> list[VectorStoredDocument]: + ids = result.get("ids", []) + metadatas = result.get("metadatas", []) + documents = result.get("documents", []) if include_documents else [] + return [ + VectorStoredDocument( + document_id=document_id, + metadata=metadatas[index] if index < len(metadatas) else {}, + content=( + documents[index] if include_documents and index < len(documents) else None + ), + ) + for index, document_id in enumerate(ids) + ] diff --git a/ddpui/utils/vector/chroma/__init__.py b/ddpui/utils/vector/chroma/__init__.py deleted file mode 100644 index 605abdc2c..000000000 --- a/ddpui/utils/vector/chroma/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Chroma helpers shared across Dalgo.""" - -from ddpui.utils.vector.chroma.client import get_shared_chroma_http_client -from ddpui.utils.vector.chroma.store import ChromaHttpVectorStore -from ddpui.utils.vector.chroma.types import ChromaQueryResult, ChromaStoredDocument - -__all__ = [ - "ChromaHttpVectorStore", - "ChromaQueryResult", - "ChromaStoredDocument", - "get_shared_chroma_http_client", -] diff --git a/ddpui/utils/vector/chroma/client.py b/ddpui/utils/vector/chroma/client.py deleted file mode 100644 index 5741de917..000000000 --- a/ddpui/utils/vector/chroma/client.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Shared Chroma HTTP client helpers.""" - -from functools import lru_cache - -from chromadb import ClientAPI, HttpClient - - -@lru_cache(maxsize=8) -def get_shared_chroma_http_client(host: str, port: int, ssl: bool) -> ClientAPI: - """Return a shared Chroma HTTP client for one host/port/ssl tuple.""" - return HttpClient(host=host, port=port, ssl=ssl) diff --git a/ddpui/utils/vector/chroma/store.py b/ddpui/utils/vector/chroma/store.py deleted file mode 100644 index 55c9e95d7..000000000 --- a/ddpui/utils/vector/chroma/store.py +++ /dev/null @@ -1,207 +0,0 @@ -"""Generic Chroma wrapper that only knows how to talk to the Chroma server.""" - -from typing import Any - -from chromadb import ClientAPI -from chromadb.errors import InvalidCollectionException - -from ddpui.utils.vector.chroma.client import get_shared_chroma_http_client -from ddpui.utils.vector.chroma.types import ChromaQueryResult, ChromaStoredDocument - - -class ChromaHttpVectorStore: - """Thin generic wrapper around the Chroma HTTP client.""" - - def __init__( - self, - *, - host: str = "localhost", - port: int = 8000, - ssl: bool = False, - client: ClientAPI | None = None, - ): - self.client = client or get_shared_chroma_http_client(host, port, ssl) - - def create_collection( - self, - name: str, - *, - metadata: dict[str, Any] | None = None, - ) -> Any: - """Create or load a Chroma collection.""" - return self.client.get_or_create_collection(name=name, metadata=metadata) - - def load_collection(self, name: str) -> Any | None: - """Load one existing Chroma collection by name.""" - try: - return self.client.get_collection(name=name) - except (InvalidCollectionException, ValueError): - return None - - def delete_collection(self, name: str) -> bool: - """Delete one Chroma collection if it exists.""" - if self.load_collection(name) is None: - return False - self.client.delete_collection(name=name) - return True - - def list_collection_names(self) -> list[str]: - """Return all collection names available to this Chroma client.""" - raw_collections = self.client.list_collections() - collection_names: list[str] = [] - for collection in raw_collections: - if isinstance(collection, str): - collection_names.append(collection) - continue - name = getattr(collection, "name", None) - if name: - collection_names.append(str(name)) - return collection_names - - def get_documents( - self, - collection_name: str, - *, - where: dict[str, Any] | None = None, - include_documents: bool = False, - ) -> list[ChromaStoredDocument]: - """Read documents from one collection using an optional metadata filter.""" - collection = self.load_collection(collection_name) - if collection is None: - return [] - - include = ["metadatas"] - if include_documents: - include.append("documents") - result = collection.get(where=where, include=include) - return self._parse_chroma_get_response( - result, - include_documents=include_documents, - ) - - def delete_documents( - self, - collection_name: str, - *, - ids: list[str] | None = None, - where: dict[str, Any] | None = None, - ) -> int: - """Delete documents from one collection by ids and/or metadata filter.""" - collection = self.load_collection(collection_name) - if collection is None: - return 0 - - if ids is None and where is None: - return 0 - - deleted_count = ( - len(ids) - if ids is not None - else len( - self.get_documents( - collection_name, - where=where, - include_documents=False, - ) - ) - ) - collection.delete(ids=ids, where=where) - return deleted_count - - def upsert_documents( - self, - collection_name: str, - *, - ids: list[str], - documents: list[str], - metadatas: list[dict[str, Any]], - embeddings: list[list[float]], - collection_metadata: dict[str, Any] | None = None, - ) -> list[str]: - """Upsert documents into one collection.""" - if not ids: - return [] - - collection = self.create_collection( - collection_name, - metadata=collection_metadata, - ) - collection.upsert( - ids=ids, - documents=documents, - metadatas=metadatas, - embeddings=embeddings, - ) - return ids - - def query( - self, - collection_name: str, - *, - query_embedding: list[float], - n_results: int = 5, - where: dict[str, Any] | None = None, - ) -> list[ChromaQueryResult]: - """Query one collection using a precomputed embedding and optional metadata filter.""" - collection = self.load_collection(collection_name) - if collection is None: - return [] - - result = collection.query( - query_embeddings=[query_embedding], - n_results=n_results, - where=where, - include=["documents", "metadatas", "distances"], - ) - return self._parse_chroma_query_response(result) - - @staticmethod - def _parse_chroma_query_response( - result: dict[str, Any], - ) -> list[ChromaQueryResult]: - """Parse Chroma's nested query result shape into flat typed rows.""" - ids = result.get("ids", [[]]) - documents = result.get("documents", [[]]) - metadatas = result.get("metadatas", [[]]) - distances = result.get("distances", [[]]) - - parsed_results: list[ChromaQueryResult] = [] - for document_id, content, metadata, distance in zip( - ids[0] if ids else [], - documents[0] if documents else [], - metadatas[0] if metadatas else [], - distances[0] if distances else [], - ): - parsed_results.append( - ChromaQueryResult( - document_id=document_id, - content=content, - metadata=metadata, - distance=distance, - ) - ) - return parsed_results - - @staticmethod - def _parse_chroma_get_response( - result: dict[str, Any], - *, - include_documents: bool = False, - ) -> list[ChromaStoredDocument]: - """Parse Chroma's get result into typed stored-document rows.""" - ids = result.get("ids", []) - metadatas = result.get("metadatas", []) - documents = result.get("documents", []) if include_documents else [] - - parsed_results: list[ChromaStoredDocument] = [] - for index, document_id in enumerate(ids): - parsed_results.append( - ChromaStoredDocument( - document_id=document_id, - metadata=metadatas[index] if index < len(metadatas) else {}, - content=documents[index] - if include_documents and index < len(documents) - else None, - ) - ) - return parsed_results diff --git a/ddpui/utils/vector/chroma/types.py b/ddpui/utils/vector/chroma/types.py deleted file mode 100644 index 4759f9c70..000000000 --- a/ddpui/utils/vector/chroma/types.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Generic Chroma result objects shared across Dalgo.""" - -from dataclasses import dataclass -from typing import Any - - -@dataclass(frozen=True) -class ChromaQueryResult: - """Single query result returned from Chroma.""" - - document_id: str - content: str - metadata: dict[str, Any] - distance: float | None = None - - -@dataclass(frozen=True) -class ChromaStoredDocument: - """Stored document metadata returned from Chroma collection reads.""" - - document_id: str - metadata: dict[str, Any] - content: str | None = None diff --git a/ddpui/utils/vector/interface.py b/ddpui/utils/vector/interface.py new file mode 100644 index 000000000..f26fbef87 --- /dev/null +++ b/ddpui/utils/vector/interface.py @@ -0,0 +1,93 @@ +"""Abstract vector store interface.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Union + + +@dataclass(frozen=True) +class VectorQueryResult: + """Single result from a vector similarity query.""" + + document_id: str + content: str + metadata: dict[str, Any] + distance: Union[float, None] = None + + +@dataclass(frozen=True) +class VectorStoredDocument: + """Stored document returned from a vector collection read.""" + + document_id: str + metadata: dict[str, Any] + content: Union[str, None] = None + + +class VectorStore(ABC): + """Abstract interface for a vector store backend.""" + + @abstractmethod + def create_collection( + self, + name: str, + *, + metadata: Union[dict[str, Any], None] = None, + ) -> Any: + """Create or load a collection by name.""" + + @abstractmethod + def load_collection(self, name: str) -> Union[Any, None]: + """Load an existing collection by name, or return None if not found.""" + + @abstractmethod + def delete_collection(self, name: str) -> bool: + """Delete a collection if it exists. Returns True if deleted.""" + + @abstractmethod + def list_collection_names(self) -> list[str]: + """Return all collection names available in this backend.""" + + @abstractmethod + def get_documents( + self, + collection_name: str, + *, + where: Union[dict[str, Any], None] = None, + include_documents: bool = False, + ) -> list[VectorStoredDocument]: + """Read documents from a collection using an optional metadata filter.""" + + @abstractmethod + def delete_documents( + self, + collection_name: str, + *, + ids: Union[list[str], None] = None, + where: Union[dict[str, Any], None] = None, + ) -> int: + """Delete documents by ids and/or metadata filter. Returns count deleted.""" + + @abstractmethod + def upsert( + self, + collection_name: str, + *, + ids: list[str], + documents: list[str], + metadatas: list[dict[str, Any]], + embeddings: list[list[float]], + collection_metadata: Union[dict[str, Any], None] = None, + ) -> list[str]: + """Upsert documents into a collection. Returns the upserted ids.""" + + @abstractmethod + def query( + self, + collection_name: str, + *, + query_embedding: list[float], + n_results: int = 5, + where: Union[dict[str, Any], None] = None, + ) -> list[VectorQueryResult]: + """Query a collection using a precomputed embedding.""" From b3f585ba957d83397d1c6c6fdc5386b817f25541 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Fri, 27 Mar 2026 01:48:55 +0530 Subject: [PATCH 23/49] updates --- ddpui/celeryworkers/tasks.py | 2 +- ddpui/core/dashboard_chat/vector/building.py | 158 --------------- .../core/dashboard_chat/vector/embeddings.py | 86 --------- .../vector/{builder.py => ingest.py} | 180 ++++++++++++++++-- ddpui/core/dashboard_chat/vector/store.py | 100 +++++++++- ddpui/tests/core/dashboard_chat/test_tasks.py | 2 +- .../dashboard_chat/test_vector_building.py | 2 +- 7 files changed, 266 insertions(+), 264 deletions(-) delete mode 100644 ddpui/core/dashboard_chat/vector/building.py delete mode 100644 ddpui/core/dashboard_chat/vector/embeddings.py rename ddpui/core/dashboard_chat/vector/{builder.py => ingest.py} (73%) diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index ec14e6654..db0aa6076 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -75,7 +75,7 @@ ) from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError -from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildService +from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildService from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers diff --git a/ddpui/core/dashboard_chat/vector/building.py b/ddpui/core/dashboard_chat/vector/building.py deleted file mode 100644 index 4692886e2..000000000 --- a/ddpui/core/dashboard_chat/vector/building.py +++ /dev/null @@ -1,158 +0,0 @@ -"""Vector build pipeline for dashboard chat retrieval.""" - -from dataclasses import dataclass -from datetime import timedelta -from typing import Callable, Union - -from django.utils import timezone - -from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.context.dbt_docs import ( - DashboardChatDbtDocsArtifacts, - generate_dashboard_chat_dbt_docs_artifacts, -) -from ddpui.core.dashboard_chat.vector.builder import DashboardChatVectorDocumentBuilder -from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.vector.store import OrgVectorStore -from ddpui.models.dashboard_chat import DashboardChatSession -from ddpui.models.org import Org - -INGEST_SOURCE_ORDER = [ - DashboardChatSourceType.ORG_CONTEXT, - DashboardChatSourceType.DASHBOARD_CONTEXT, - DashboardChatSourceType.DASHBOARD_EXPORT, - DashboardChatSourceType.DBT_MANIFEST, - DashboardChatSourceType.DBT_CATALOG, -] - - -class OrgVectorBuildError(Exception): - """Raised when an org vector build cannot complete.""" - - -@dataclass(frozen=True) -class OrgVectorBuildResult: - """Summary of one completed org vector build.""" - - org_id: int - docs_generated_at: Union[timezone.datetime, None] - vector_ingested_at: timezone.datetime - source_document_counts: dict[str, int] - upserted_document_ids: list[str] - deleted_document_ids: list[str] - - -class OrgVectorBuildService: - """Build org-scoped dashboard-chat vector context and sync it into the vector store.""" - - def __init__( - self, - vector_store: Union[OrgVectorStore, None] = None, - dbt_docs_generator: Union[ - Callable[[Org, object], DashboardChatDbtDocsArtifacts], None - ] = None, - source_config: Union[DashboardChatSourceConfig, None] = None, - document_builder: Union[DashboardChatVectorDocumentBuilder, None] = None, - ): - self.vector_store = vector_store or OrgVectorStore() - self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts - self.source_config = source_config or DashboardChatSourceConfig.from_env() - self.document_builder = document_builder or DashboardChatVectorDocumentBuilder( - source_config=self.source_config - ) - - def build_org_vector_context(self, org: Org) -> OrgVectorBuildResult: - """Run dbt docs generation and rebuild the desired vector documents for an org.""" - if org.dbt is None: - raise OrgVectorBuildError("dbt workspace not configured") - - collection_versioned_at = timezone.now() - target_collection_name = self.vector_store.collection_name( - org.id, - version=collection_versioned_at, - ) - dbt_docs = None - if self.source_config.is_enabled( - DashboardChatSourceType.DBT_MANIFEST - ) or self.source_config.is_enabled(DashboardChatSourceType.DBT_CATALOG): - dbt_docs = self.dbt_docs_generator(org, org.dbt) - documents_by_source = self.document_builder.build_documents_by_source(org, dbt_docs) - desired_documents = [ - document - for source_type in INGEST_SOURCE_ORDER - if self.source_config.is_enabled(source_type) - for document in documents_by_source[source_type.value] - ] - if ( - self.vector_store.load_collection( - org.id, - collection_name=target_collection_name, - allow_legacy_fallback=False, - ) - is not None - ): - self.vector_store.delete_collection( - org.id, - collection_name=target_collection_name, - ) - - upserted_document_ids = sorted( - self.vector_store.upsert_documents( - org.id, - desired_documents, - collection_name=target_collection_name, - ) - ) - - vector_ingested_at = collection_versioned_at - org.dbt.vector_last_ingested_at = collection_versioned_at - org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) - self._garbage_collect_inactive_collections( - org=org, - active_collection_name=target_collection_name, - ) - - return OrgVectorBuildResult( - org_id=org.id, - docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, - vector_ingested_at=vector_ingested_at, - source_document_counts={ - source_type.value: ( - len(documents_by_source[source_type.value]) - if self.source_config.is_enabled(source_type) - else 0 - ) - for source_type in INGEST_SOURCE_ORDER - }, - upserted_document_ids=upserted_document_ids, - deleted_document_ids=[], - ) - - def _garbage_collect_inactive_collections( - self, - *, - org: Org, - active_collection_name: str, - ) -> None: - """Delete old versioned collections not pinned by recent chat sessions.""" - retention_cutoff = timezone.now() - timedelta(hours=24) - recent_sessions = DashboardChatSession.objects.filter( - org=org, - updated_at__gte=retention_cutoff, - ) - pinned_collection_names = { - collection_name - for collection_name in recent_sessions.values_list("vector_collection_name", flat=True) - if collection_name - } - if recent_sessions.filter(vector_collection_name__isnull=True).exists(): - pinned_collection_names.add(self.vector_store.collection_name(org.id)) - pinned_collection_names.add(active_collection_name) - - for collection_name in self.vector_store.list_org_collection_names(org.id): - if collection_name in pinned_collection_names: - continue - self.vector_store.delete_collection( - org.id, - collection_name=collection_name, - ) diff --git a/ddpui/core/dashboard_chat/vector/embeddings.py b/ddpui/core/dashboard_chat/vector/embeddings.py deleted file mode 100644 index 407eacade..000000000 --- a/ddpui/core/dashboard_chat/vector/embeddings.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Embedding providers used by dashboard chat retrieval.""" - -import os -from typing import Any, Protocol - -from openai import OpenAI - -from ddpui.utils.openai_client import get_shared_openai_client - - -class DashboardChatEmbeddingProvider(Protocol): - """Embedding provider interface used by the vector store wrapper.""" - - def embed_documents(self, texts: list[str]) -> list[list[float]]: - """Embed a batch of texts.""" - - def embed_query(self, text: str) -> list[float]: - """Embed a single query.""" - - def reset_usage(self) -> None: - """Reset per-turn embedding usage before a new runtime invocation.""" - - -class OpenAIEmbeddingProvider: - """OpenAI embeddings adapter for dashboard chat retrieval.""" - - def __init__( - self, - api_key: str | None = None, - model: str = "text-embedding-3-small", - client: OpenAI | None = None, - ): - self.api_key = api_key or os.getenv("OPENAI_API_KEY") - self.model = model - self.usage_events: list[dict[str, Any]] = [] - if client is None: - if not self.api_key: - raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") - client = get_shared_openai_client(self.api_key, max_retries=2) - self.client = client - - def reset_usage(self) -> None: - """Reset aggregated embedding usage before one new chat turn.""" - self.usage_events = [] - - def embed_documents(self, texts: list[str]) -> list[list[float]]: - """Embed a batch of documents using OpenAI.""" - if not texts: - return [] - response = self.client.embeddings.create(model=self.model, input=texts) - self._record_usage("embed_documents", response, len(texts)) - return [item.embedding for item in response.data] - - def embed_query(self, text: str) -> list[float]: - """Embed a single query using the document embedding path.""" - return self.embed_documents([text])[0] - - def usage_summary(self) -> dict[str, Any]: - """Return aggregated embedding usage for the current turn.""" - totals = { - "prompt_tokens": 0, - "total_tokens": 0, - } - for event in self.usage_events: - totals["prompt_tokens"] += event.get("prompt_tokens", 0) - totals["total_tokens"] += event.get("total_tokens", 0) - return { - "model": self.model, - "calls": list(self.usage_events), - "totals": totals, - } - - def _record_usage(self, operation: str, response: Any, input_count: int) -> None: - """Capture embedding usage from one OpenAI embeddings response.""" - usage = getattr(response, "usage", None) - if usage is None: - return - self.usage_events.append( - { - "operation": operation, - "model": self.model, - "input_count": input_count, - "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, - "total_tokens": getattr(usage, "total_tokens", 0) or 0, - } - ) diff --git a/ddpui/core/dashboard_chat/vector/builder.py b/ddpui/core/dashboard_chat/vector/ingest.py similarity index 73% rename from ddpui/core/dashboard_chat/vector/builder.py rename to ddpui/core/dashboard_chat/vector/ingest.py index 04a64c29a..406eb0632 100644 --- a/ddpui/core/dashboard_chat/vector/builder.py +++ b/ddpui/core/dashboard_chat/vector/ingest.py @@ -1,18 +1,25 @@ -"""Document-building helpers for dashboard chat vector context.""" +"""Vector ingestion pipeline for dashboard chat — document building and org-level rebuild.""" -from collections import defaultdict import json +from collections import defaultdict +from dataclasses import dataclass +from datetime import timedelta +from typing import Callable, Union from django.utils import timezone from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.context.dbt_docs import DashboardChatDbtDocsArtifacts +from ddpui.core.dashboard_chat.context.dbt_docs import ( + DashboardChatDbtDocsArtifacts, + generate_dashboard_chat_dbt_docs_artifacts, +) from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, DashboardChatVectorDocument, ) +from ddpui.core.dashboard_chat.vector.store import OrgVectorStore from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardAIContext, OrgAIContext +from ddpui.models.dashboard_chat import DashboardAIContext, DashboardChatSession, OrgAIContext from ddpui.models.org import Org from ddpui.models.visualization import Chart from ddpui.services.dashboard_service import DashboardService @@ -28,6 +35,11 @@ ] +# --------------------------------------------------------------------------- +# Text chunking +# --------------------------------------------------------------------------- + + def _normalize_text(value: str) -> str: """Normalize text before chunking so document IDs stay deterministic.""" return "\n".join( @@ -75,16 +87,21 @@ def chunk_dashboard_chat_text(text: str, max_chars: int = MARKDOWN_CHUNK_MAX_CHA return chunks +# --------------------------------------------------------------------------- +# Document builder — reads from DB, produces DashboardChatVectorDocument chunks +# --------------------------------------------------------------------------- + + class DashboardChatVectorDocumentBuilder: """Build dashboard-chat vector documents from app context and dbt docs.""" - def __init__(self, source_config: DashboardChatSourceConfig | None = None): + def __init__(self, source_config: Union[DashboardChatSourceConfig, None] = None): self.source_config = source_config or DashboardChatSourceConfig.from_env() def build_documents_by_source( self, org: Org, - dbt_docs: DashboardChatDbtDocsArtifacts | None, + dbt_docs: Union[DashboardChatDbtDocsArtifacts, None], ) -> dict[str, list[DashboardChatVectorDocument]]: """Build the full desired vector document set for an org.""" documents_by_source: dict[str, list[DashboardChatVectorDocument]] = defaultdict(list) @@ -171,8 +188,8 @@ def _build_markdown_documents( source_identifier: str, markdown: str, title: str, - dashboard_id: int | None = None, - updated_at: timezone.datetime | None = None, + dashboard_id: Union[int, None] = None, + updated_at: Union[timezone.datetime, None] = None, ) -> list[DashboardChatVectorDocument]: """Chunk a markdown source into deterministic vector documents.""" return [ @@ -366,7 +383,7 @@ def _build_catalog_documents( return documents @staticmethod - def _include_dbt_unique_id(unique_id: str, project_name: str | None) -> bool: + def _include_dbt_unique_id(unique_id: str, project_name: Union[str, None]) -> bool: """Exclude package docs that do not belong to the org project.""" parts = unique_id.split(".") if len(parts) < 2: @@ -381,7 +398,9 @@ def _include_dbt_unique_id(unique_id: str, project_name: str | None) -> bool: @staticmethod def _format_manifest_source(unique_id: str, source: dict) -> str: """Format a manifest source entry into stable text.""" - column_lines = DashboardChatVectorDocumentBuilder._format_columns(source.get("columns") or {}) + column_lines = DashboardChatVectorDocumentBuilder._format_columns( + source.get("columns") or {} + ) blocks = [ f"dbt manifest source: {source.get('schema')}.{source.get('name')}", f"Unique id: {unique_id}", @@ -404,9 +423,7 @@ def _format_manifest_model(unique_id: str, node: dict) -> str: ] depends_on_nodes = sorted(node.get("depends_on", {}).get("nodes") or []) if depends_on_nodes: - blocks.append( - "Depends on:\n" + "\n".join(f"- {dependency}" for dependency in depends_on_nodes) - ) + blocks.append("Depends on:\n" + "\n".join(f"- {dep}" for dep in depends_on_nodes)) column_lines = DashboardChatVectorDocumentBuilder._format_columns(node.get("columns") or {}) if column_lines: blocks.append("Columns:\n" + "\n".join(column_lines)) @@ -459,3 +476,140 @@ def _format_catalog_columns(columns: dict) -> list[str]: line += f": {column['comment']}" formatted_columns.append(line) return formatted_columns + + +# --------------------------------------------------------------------------- +# Build pipeline — orchestrates a full org vector rebuild +# --------------------------------------------------------------------------- + + +class OrgVectorBuildError(Exception): + """Raised when an org vector build cannot complete.""" + + +@dataclass(frozen=True) +class OrgVectorBuildResult: + """Summary of one completed org vector build.""" + + org_id: int + docs_generated_at: Union[timezone.datetime, None] + vector_ingested_at: timezone.datetime + source_document_counts: dict[str, int] + upserted_document_ids: list[str] + deleted_document_ids: list[str] + + +class OrgVectorBuildService: + """Build org-scoped dashboard-chat vector context and sync it into the vector store.""" + + def __init__( + self, + vector_store: Union[OrgVectorStore, None] = None, + dbt_docs_generator: Union[ + Callable[[Org, object], DashboardChatDbtDocsArtifacts], None + ] = None, + source_config: Union[DashboardChatSourceConfig, None] = None, + document_builder: Union[DashboardChatVectorDocumentBuilder, None] = None, + ): + self.vector_store = vector_store or OrgVectorStore() + self.dbt_docs_generator = dbt_docs_generator or generate_dashboard_chat_dbt_docs_artifacts + self.source_config = source_config or DashboardChatSourceConfig.from_env() + self.document_builder = document_builder or DashboardChatVectorDocumentBuilder( + source_config=self.source_config + ) + + def build_org_vector_context(self, org: Org) -> OrgVectorBuildResult: + """Run dbt docs generation and rebuild the desired vector documents for an org.""" + if org.dbt is None: + raise OrgVectorBuildError("dbt workspace not configured") + + collection_versioned_at = timezone.now() + target_collection_name = self.vector_store.collection_name( + org.id, + version=collection_versioned_at, + ) + dbt_docs = None + if self.source_config.is_enabled( + DashboardChatSourceType.DBT_MANIFEST + ) or self.source_config.is_enabled(DashboardChatSourceType.DBT_CATALOG): + dbt_docs = self.dbt_docs_generator(org, org.dbt) + documents_by_source = self.document_builder.build_documents_by_source(org, dbt_docs) + desired_documents = [ + document + for source_type in INGEST_SOURCE_ORDER + if self.source_config.is_enabled(source_type) + for document in documents_by_source[source_type.value] + ] + if ( + self.vector_store.load_collection( + org.id, + collection_name=target_collection_name, + allow_legacy_fallback=False, + ) + is not None + ): + self.vector_store.delete_collection( + org.id, + collection_name=target_collection_name, + ) + + upserted_document_ids = sorted( + self.vector_store.upsert_documents( + org.id, + desired_documents, + collection_name=target_collection_name, + ) + ) + + vector_ingested_at = collection_versioned_at + org.dbt.vector_last_ingested_at = collection_versioned_at + org.dbt.save(update_fields=["vector_last_ingested_at", "updated_at"]) + self._garbage_collect_inactive_collections( + org=org, + active_collection_name=target_collection_name, + ) + + return OrgVectorBuildResult( + org_id=org.id, + docs_generated_at=dbt_docs.generated_at if dbt_docs else org.dbt.docs_generated_at, + vector_ingested_at=vector_ingested_at, + source_document_counts={ + source_type.value: ( + len(documents_by_source[source_type.value]) + if self.source_config.is_enabled(source_type) + else 0 + ) + for source_type in INGEST_SOURCE_ORDER + }, + upserted_document_ids=upserted_document_ids, + deleted_document_ids=[], + ) + + def _garbage_collect_inactive_collections( + self, + *, + org: Org, + active_collection_name: str, + ) -> None: + """Delete old versioned collections not pinned by recent chat sessions.""" + retention_cutoff = timezone.now() - timedelta(hours=24) + recent_sessions = DashboardChatSession.objects.filter( + org=org, + updated_at__gte=retention_cutoff, + ) + pinned_collection_names = { + collection_name + for collection_name in recent_sessions.values_list("vector_collection_name", flat=True) + if collection_name + } + if recent_sessions.filter(vector_collection_name__isnull=True).exists(): + pinned_collection_names.add(self.vector_store.collection_name(org.id)) + pinned_collection_names.add(active_collection_name) + + for collection_name in self.vector_store.list_org_collection_names(org.id): + if collection_name in pinned_collection_names: + continue + self.vector_store.delete_collection( + org.id, + collection_name=collection_name, + ) diff --git a/ddpui/core/dashboard_chat/vector/store.py b/ddpui/core/dashboard_chat/vector/store.py index 55897ef75..b6d85ffff 100644 --- a/ddpui/core/dashboard_chat/vector/store.py +++ b/ddpui/core/dashboard_chat/vector/store.py @@ -1,8 +1,12 @@ """Org-scoped vector store for dashboard chat retrieval.""" +import os +from abc import ABC, abstractmethod from collections.abc import Sequence from typing import Any, Union +from openai import OpenAI + from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, @@ -10,13 +14,101 @@ build_dashboard_chat_collection_base_name, build_dashboard_chat_collection_name, ) -from ddpui.core.dashboard_chat.vector.embeddings import ( - DashboardChatEmbeddingProvider, - OpenAIEmbeddingProvider, -) +from ddpui.utils.openai_client import get_shared_openai_client from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument +# --------------------------------------------------------------------------- +# Embedding providers +# --------------------------------------------------------------------------- + + +class DashboardChatEmbeddingProvider(ABC): + """Embedding provider interface used by the vector store wrapper.""" + + @abstractmethod + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of texts.""" + + @abstractmethod + def embed_query(self, text: str) -> list[float]: + """Embed a single query.""" + + @abstractmethod + def reset_usage(self) -> None: + """Reset per-turn embedding usage before a new runtime invocation.""" + + +class OpenAIEmbeddingProvider: + """OpenAI embeddings adapter for dashboard chat retrieval.""" + + def __init__( + self, + api_key: Union[str, None] = None, + model: str = "text-embedding-3-small", + client: Union[OpenAI, None] = None, + ): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.model = model + self.usage_events: list[dict[str, Any]] = [] + if client is None: + if not self.api_key: + raise ValueError("OPENAI_API_KEY must be set for dashboard chat embeddings") + client = get_shared_openai_client(self.api_key, max_retries=2) + self.client = client + + def reset_usage(self) -> None: + """Reset aggregated embedding usage before one new chat turn.""" + self.usage_events = [] + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of documents using OpenAI.""" + if not texts: + return [] + response = self.client.embeddings.create(model=self.model, input=texts) + self._record_usage("embed_documents", response, len(texts)) + return [item.embedding for item in response.data] + + def embed_query(self, text: str) -> list[float]: + """Embed a single query using the document embedding path.""" + return self.embed_documents([text])[0] + + def usage_summary(self) -> dict[str, Any]: + """Return aggregated embedding usage for the current turn.""" + totals = { + "prompt_tokens": 0, + "total_tokens": 0, + } + for event in self.usage_events: + totals["prompt_tokens"] += event.get("prompt_tokens", 0) + totals["total_tokens"] += event.get("total_tokens", 0) + return { + "model": self.model, + "calls": list(self.usage_events), + "totals": totals, + } + + def _record_usage(self, operation: str, response: Any, input_count: int) -> None: + """Capture embedding usage from one OpenAI embeddings response.""" + usage = getattr(response, "usage", None) + if usage is None: + return + self.usage_events.append( + { + "operation": operation, + "model": self.model, + "input_count": input_count, + "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0, + "total_tokens": getattr(usage, "total_tokens", 0) or 0, + } + ) + + +# --------------------------------------------------------------------------- +# Org-scoped vector store +# --------------------------------------------------------------------------- + + def _default_backend(config: DashboardChatVectorStoreConfig) -> VectorStore: """Build the default vector store backend from config.""" from ddpui.utils.vector.backends.chroma import ChromaVectorStore diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 8ef41b774..24268e2e8 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -11,7 +11,7 @@ run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) -from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildResult +from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildResult from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.models.org import Org, OrgDbt from ddpui.models.dashboard import Dashboard diff --git a/ddpui/tests/core/dashboard_chat/test_vector_building.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py index 69c080bbc..2ee911b52 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_building.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -15,7 +15,7 @@ generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.vector.building import OrgVectorBuildService +from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildService from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, build_dashboard_chat_collection_name, From 77c81ffaa3f8c413a103ed9637eb24f1de888cc3 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Fri, 27 Mar 2026 10:12:14 +0530 Subject: [PATCH 24/49] updates --- ddpui/celeryworkers/tasks.py | 91 +++++++++++------------------------- 1 file changed, 26 insertions(+), 65 deletions(-) diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index db0aa6076..155596c11 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -113,15 +113,11 @@ find_key_in_dictionary, convert_sqlalchemy_rows_to_csv_string, ) -from ddpui.utils.redis_client import RedisClient from ddpui.utils.feature_flags import get_all_feature_flags_for_org logger = CustomLogger("ddpui") UTC = timezone.UTC DASHBOARD_CHAT_CONTEXT_BUILD_INTERVAL_SECONDS = 3 * 60 * 60 -DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS = ( - DASHBOARD_CHAT_CONTEXT_BUILD_INTERVAL_SECONDS + 5 * 60 -) @app.task(bind=True) @@ -1273,8 +1269,9 @@ def clear_stuck_locks(): return processed_count -def _get_dashboard_chat_context_build_orgs(): - """Return orgs that are eligible for scheduled dashboard chat context builds.""" +@app.task +def schedule_dashboard_chat_context_builds(): + """Fan out one dashboard chat context-build task per eligible org.""" candidate_orgs = ( Org.objects.select_related("dbt", "preferences") .filter( @@ -1283,41 +1280,25 @@ def _get_dashboard_chat_context_build_orgs(): ) .order_by("id") ) - return [ - org - for org in candidate_orgs - if get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) - ] - - -def _dashboard_chat_context_build_lock_key(org_id: int) -> str: - """Build the Redis lock key for an org's scheduled context build.""" - return f"dashboard_chat_context_build:{org_id}" - - -@app.task -def schedule_dashboard_chat_context_builds(): - """Fan out one dashboard chat context-build task per eligible org.""" enqueued_org_ids: list[int] = [] - for org in _get_dashboard_chat_context_build_orgs(): - build_dashboard_chat_context_for_org.delay(org.id) - enqueued_org_ids.append(org.id) + for org in candidate_orgs: + if get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False): + build_dashboard_chat_context_for_org.delay(org.id) + enqueued_org_ids.append(org.id) logger.info("enqueued dashboard chat context builds for org ids=%s", enqueued_org_ids) return {"enqueued_org_ids": enqueued_org_ids} -@app.task(bind=True) -def build_dashboard_chat_context_for_org(self, org_id: int): +@app.task +def build_dashboard_chat_context_for_org(org_id: int): """Build dashboard chat retrieval context for one org if the org is eligible.""" - org = ( - Org.objects.select_related("dbt", "preferences") - .filter(id=org_id, dbt__isnull=False) - .first() - ) - if org is None: + org = Org.objects.filter(id=org_id).first() + orgdbt = org.dbt if org else None + if orgdbt is None: logger.warning( - "dashboard chat context build skipped: org %s not found or missing dbt", org_id + "dashboard chat context build skipped: org %s not found or missing dbt", + org.slug if org else "unknown", ) return {"status": "skipped_missing_org", "org_id": org_id} @@ -1325,41 +1306,21 @@ def build_dashboard_chat_context_for_org(self, org_id: int): feature_enabled = get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False) if not feature_enabled or preferences is None or not preferences.ai_data_sharing_enabled: logger.info( - "dashboard chat context build skipped for org=%s because it is not eligible", org_id + "dashboard chat context build skipped for org=%s because it is not eligible", + org.slug if org else "unknown", ) return {"status": "skipped_ineligible", "org_id": org_id} - redis_client = RedisClient.get_instance() - lock = redis_client.lock( - _dashboard_chat_context_build_lock_key(org_id), - timeout=DASHBOARD_CHAT_CONTEXT_BUILD_LOCK_TIMEOUT_SECONDS, - ) - if not lock.acquire(blocking=False): - logger.info( - "dashboard chat context build skipped for org=%s because a rebuild is already running", - org_id, - ) - return {"status": "skipped_locked", "org_id": org_id} - - try: - result = OrgVectorBuildService().build_org_vector_context(org) - return { - "status": "completed", - "org_id": org_id, - "docs_generated_at": ( - result.docs_generated_at.isoformat() if result.docs_generated_at else None - ), - "vector_last_ingested_at": result.vector_ingested_at.isoformat(), - "source_document_counts": result.source_document_counts, - } - finally: - try: - if lock.owned(): - lock.release() - except Exception: - logger.exception( - "failed to release dashboard chat context build lock for org=%s", org_id - ) + result = OrgVectorBuildService().build_org_vector_context(org) + return { + "status": "completed", + "org_id": org_id, + "docs_generated_at": ( + result.docs_generated_at.isoformat() if result.docs_generated_at else None + ), + "vector_last_ingested_at": result.vector_ingested_at.isoformat(), + "source_document_counts": result.source_document_counts, + } @app.task From 17d3668805fddbd7e6bd4d87de1fda89e6875698 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Fri, 27 Mar 2026 10:27:24 +0530 Subject: [PATCH 25/49] updates --- ddpui/celeryworkers/tasks.py | 161 ------------------ ddpui/core/dashboard_chat/sessions/service.py | 71 ++++++++ ddpui/websockets/dashboard_chat_consumer.py | 7 +- 3 files changed, 76 insertions(+), 163 deletions(-) diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index 155596c11..18dabbb0b 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -76,7 +76,6 @@ from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildService -from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers from ddpui.ddpprefect.prefect_service import ( @@ -99,16 +98,6 @@ ) from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.core import llm_service -from ddpui.core.dashboard_chat.events import ( - build_dashboard_chat_event, - publish_dashboard_chat_event, -) -from ddpui.core.dashboard_chat.sessions.service import ( - create_dashboard_chat_assistant_message, - find_dashboard_chat_assistant_reply, - list_dashboard_chat_history, - serialize_dashboard_chat_message, -) from ddpui.utils.helpers import ( find_key_in_dictionary, convert_sqlalchemy_rows_to_csv_string, @@ -1323,156 +1312,6 @@ def build_dashboard_chat_context_for_org(org_id: int): } -@app.task -def run_dashboard_chat_turn(session_id: str, user_message_id: int): - """Run one dashboard chat turn asynchronously and emit websocket events.""" - try: - result = execute_dashboard_chat_turn(session_id, user_message_id) - session = result["session"] - user_message = result["user_message"] - assistant_message = result.get("assistant_message") - - if result["status"] == "completed" and assistant_message is not None: - publish_dashboard_chat_event( - str(session.session_id), - build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=session.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), - ), - ) - return { - "status": "completed", - "session_id": str(session.session_id), - "assistant_message_id": assistant_message.id, - } - - if result["status"] == "skipped_existing_reply" and assistant_message is not None: - logger.info( - "dashboard chat turn reused existing assistant message for session=%s message_id=%s", - session_id, - user_message_id, - ) - return { - "status": "skipped_existing_reply", - "session_id": str(session.session_id), - "assistant_message_id": assistant_message.id, - } - - if result["status"] == "skipped_missing_session": - logger.warning( - "dashboard chat turn skipped because session %s was not found or has no dashboard", - session_id, - ) - return {"status": "skipped_missing_session", "session_id": session_id} - - if result["status"] == "skipped_missing_message": - logger.warning( - "dashboard chat turn skipped because message %s was not found in session %s", - user_message_id, - session_id, - ) - return {"status": "skipped_missing_message", "session_id": session_id} - - raise RuntimeError(f"Unexpected dashboard chat turn status: {result['status']}") - except Exception: - logger.exception( - "dashboard chat turn failed for session=%s message_id=%s", - session_id, - user_message_id, - ) - from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession - - session = ( - DashboardChatSession.objects.select_related("dashboard") - .filter(session_id=session_id) - .first() - ) - user_message = DashboardChatMessage.objects.filter(id=user_message_id, role="user").first() - if session is not None and session.dashboard is not None and user_message is not None: - publish_dashboard_chat_event( - str(session.session_id), - build_dashboard_chat_event( - event_type="error", - session_id=str(session.session_id), - dashboard_id=session.dashboard.id, - message_id=str(user_message.id), - data={"message": "Something went wrong while generating the response"}, - ), - ) - raise - - -def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: - """Run one dashboard chat turn synchronously and persist the assistant reply.""" - from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession - - session = ( - DashboardChatSession.objects.select_related("org", "dashboard", "orguser") - .filter(session_id=session_id) - .first() - ) - if session is None or session.dashboard is None: - return {"status": "skipped_missing_session", "session": None, "user_message": None} - - user_message = DashboardChatMessage.objects.filter( - id=user_message_id, - session=session, - role="user", - ).first() - if user_message is None: - return {"status": "skipped_missing_message", "session": session, "user_message": None} - - existing_assistant_message = find_dashboard_chat_assistant_reply( - session=session, - user_message=user_message, - ) - if existing_assistant_message is not None: - return { - "status": "skipped_existing_reply", - "session": session, - "user_message": user_message, - "assistant_message": existing_assistant_message, - } - - response = get_dashboard_chat_runtime().run( - org=session.org, - dashboard_id=session.dashboard.id, - user_query=user_message.content, - session_id=str(session.session_id), - vector_collection_name=session.vector_collection_name, - conversation_history=list_dashboard_chat_history( - session, - exclude_message_id=user_message.id, - ), - ) - response_payload = response.to_dict() - assistant_payload = { - key: value for key, value in response_payload.items() if key != "answer_text" - } - timing_breakdown = dict(response_payload.get("metadata") or {}).get("timing_breakdown") or {} - assistant_message = create_dashboard_chat_assistant_message( - session=session, - content=response.answer_text, - payload=assistant_payload, - timing_breakdown=timing_breakdown, - ) - response_latency_ms = max( - 0, - int((assistant_message.created_at - user_message.created_at).total_seconds() * 1000), - ) - assistant_message.response_latency_ms = response_latency_ms - assistant_message.save(update_fields=["response_latency_ms"]) - return { - "status": "completed", - "session": session, - "user_message": user_message, - "assistant_message": assistant_message, - } - - @app.on_after_finalize.connect def setup_periodic_tasks(sender: Celery, **kwargs): """periodic celery tasks""" diff --git a/ddpui/core/dashboard_chat/sessions/service.py b/ddpui/core/dashboard_chat/sessions/service.py index dac05688a..a51c03da0 100644 --- a/ddpui/core/dashboard_chat/sessions/service.py +++ b/ddpui/core/dashboard_chat/sessions/service.py @@ -18,6 +18,9 @@ DashboardChatSession, ) from ddpui.models.org_user import OrgUser +from ddpui.utils.custom_logger import CustomLogger + +logger = CustomLogger("dashboard_chat") class DashboardChatSessionError(Exception): @@ -173,6 +176,74 @@ def find_dashboard_chat_assistant_reply( ) +def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: + """Load session and message, run the runtime, persist and return the assistant reply.""" + from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime + + session = ( + DashboardChatSession.objects.select_related("org", "dashboard", "orguser") + .filter(session_id=session_id) + .first() + ) + if session is None or session.dashboard is None: + return {"status": "skipped_missing_session", "session": None, "user_message": None} + + user_message = DashboardChatMessage.objects.filter( + id=user_message_id, + session=session, + role="user", + ).first() + if user_message is None: + return {"status": "skipped_missing_message", "session": session, "user_message": None} + + existing_assistant_message = find_dashboard_chat_assistant_reply( + session=session, + user_message=user_message, + ) + if existing_assistant_message is not None: + return { + "status": "skipped_existing_reply", + "session": session, + "user_message": user_message, + "assistant_message": existing_assistant_message, + } + + response = get_dashboard_chat_runtime().run( + org=session.org, + dashboard_id=session.dashboard.id, + user_query=user_message.content, + session_id=str(session.session_id), + vector_collection_name=session.vector_collection_name, + conversation_history=list_dashboard_chat_history( + session, + exclude_message_id=user_message.id, + ), + ) + response_payload = response.to_dict() + assistant_payload = { + key: value for key, value in response_payload.items() if key != "answer_text" + } + timing_breakdown = dict(response_payload.get("metadata") or {}).get("timing_breakdown") or {} + assistant_message = create_dashboard_chat_assistant_message( + session=session, + content=response.answer_text, + payload=assistant_payload, + timing_breakdown=timing_breakdown, + ) + response_latency_ms = max( + 0, + int((assistant_message.created_at - user_message.created_at).total_seconds() * 1000), + ) + assistant_message.response_latency_ms = response_latency_ms + assistant_message.save(update_fields=["response_latency_ms"]) + return { + "status": "completed", + "session": session, + "user_message": user_message, + "assistant_message": assistant_message, + } + + def _create_dashboard_chat_message( *, session: DashboardChatSession, diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index c7cee02eb..777569380 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -3,7 +3,7 @@ from asgiref.sync import async_to_sync -from ddpui.celeryworkers.tasks import execute_dashboard_chat_turn +from ddpui.core.dashboard_chat.sessions.service import execute_dashboard_chat_turn from ddpui.core.dashboard_chat.events import ( build_dashboard_chat_event, dashboard_chat_group_name, @@ -169,7 +169,10 @@ def websocket_receive(self, message): return assistant_message = result.get("assistant_message") - if result["status"] in {"completed", "skipped_existing_reply"} and assistant_message is not None: + if ( + result["status"] in {"completed", "skipped_existing_reply"} + and assistant_message is not None + ): self.respond( WebsocketResponse( data=build_dashboard_chat_event( From 292f73ee968019935b2d7a967b3757cef1d0a2ed Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 27 Mar 2026 11:12:37 +0530 Subject: [PATCH 26/49] fix(ai-chat): harden chroma collection creation flow --- ddpui/core/dashboard_chat/vector/ingest.py | 18 +++++ .../dashboard_chat/test_vector_building.py | 80 +++++++++++++++++++ .../core/dashboard_chat/test_vector_store.py | 14 ++++ ddpui/utils/vector/backends/chroma.py | 12 ++- 4 files changed, 122 insertions(+), 2 deletions(-) diff --git a/ddpui/core/dashboard_chat/vector/ingest.py b/ddpui/core/dashboard_chat/vector/ingest.py index 406eb0632..7fd13fa48 100644 --- a/ddpui/core/dashboard_chat/vector/ingest.py +++ b/ddpui/core/dashboard_chat/vector/ingest.py @@ -560,6 +560,24 @@ def build_org_vector_context(self, org: Org) -> OrgVectorBuildResult: collection_name=target_collection_name, ) ) + if not desired_documents: + # Empty builds still need a concrete collection so later sessions never + # pin a version that only exists in the database. + self.vector_store.create_collection( + org.id, + collection_name=target_collection_name, + ) + + # Only mark the version active after the exact versioned collection exists. + created_collection = self.vector_store.load_collection( + org.id, + collection_name=target_collection_name, + allow_legacy_fallback=False, + ) + if created_collection is None: + raise OrgVectorBuildError( + f"Dashboard chat vector collection {target_collection_name} was not created" + ) vector_ingested_at = collection_versioned_at org.dbt.vector_last_ingested_at = collection_versioned_at diff --git a/ddpui/tests/core/dashboard_chat/test_vector_building.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py index 2ee911b52..24f58688c 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_building.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -39,12 +39,19 @@ class FakeDashboardChatVectorStore: def __init__(self): self.documents_by_collection = {} + self.created_collections = [] self.upsert_calls = [] self.delete_calls = [] def collection_name(self, org_id, *, version=None): return build_dashboard_chat_collection_name(org_id, version=version) + def create_collection(self, org_id, *, collection_name=None): + resolved_collection_name = collection_name or self.collection_name(org_id) + self.created_collections.append(resolved_collection_name) + self.documents_by_collection.setdefault(resolved_collection_name, {}) + return {"name": resolved_collection_name} + def load_collection(self, org_id, *, collection_name=None, allow_legacy_fallback=True): resolved_collection_name = collection_name or self.collection_name(org_id) if resolved_collection_name in self.documents_by_collection: @@ -98,6 +105,8 @@ def get_documents( def upsert_documents(self, org_id, documents, collection_name=None): self.upsert_calls.append([document.document_id for document in documents]) + if not documents: + return [] resolved_collection_name = collection_name or self.collection_name(org_id) org_documents = self.documents_by_collection.setdefault(resolved_collection_name, {}) for document in documents: @@ -637,3 +646,74 @@ def test_build_org_vector_context_skips_dbt_docs_when_dbt_sources_are_disabled( assert result.docs_generated_at is None assert result.source_document_counts["dbt_manifest"] == 0 assert result.source_document_counts["dbt_catalog"] == 0 + + +def test_build_org_vector_context_creates_empty_collection_before_marking_version_active( + org, + orgdbt, +): + """Even empty builds must materialize the target collection before pinning it as active.""" + + class EmptyDocumentBuilder: + def build_documents_by_source(self, org_instance, dbt_docs): + return {source_type.value: [] for source_type in DashboardChatSourceType} + + vector_store = FakeDashboardChatVectorStore() + service = OrgVectorBuildService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: StoredArtifacts( + manifest_json={"metadata": {"project_name": "dashchat"}, "sources": {}, "nodes": {}}, + catalog_json={"sources": {}, "nodes": {}}, + generated_at=timezone.now(), + ).to_artifacts(), + document_builder=EmptyDocumentBuilder(), + ) + + result = service.build_org_vector_context(org) + + active_collection_name = build_dashboard_chat_collection_name( + org.id, + version=result.vector_ingested_at, + ) + orgdbt.refresh_from_db() + + assert active_collection_name in vector_store.created_collections + assert active_collection_name in vector_store.documents_by_collection + assert orgdbt.vector_last_ingested_at == result.vector_ingested_at + + +def test_build_org_vector_context_does_not_mark_version_active_when_collection_is_missing( + org, + orgdbt, +): + """A build must fail before updating vector_last_ingested_at if the target collection still does not exist.""" + + class BrokenVectorStore(FakeDashboardChatVectorStore): + def create_collection(self, org_id, *, collection_name=None): + resolved_collection_name = collection_name or self.collection_name(org_id) + self.created_collections.append(resolved_collection_name) + return {"name": resolved_collection_name} + + class EmptyDocumentBuilder: + def build_documents_by_source(self, org_instance, dbt_docs): + return {source_type.value: [] for source_type in DashboardChatSourceType} + + vector_store = BrokenVectorStore() + service = OrgVectorBuildService( + vector_store=vector_store, + dbt_docs_generator=lambda org_instance, orgdbt_instance: StoredArtifacts( + manifest_json={"metadata": {"project_name": "dashchat"}, "sources": {}, "nodes": {}}, + catalog_json={"sources": {}, "nodes": {}}, + generated_at=timezone.now(), + ).to_artifacts(), + document_builder=EmptyDocumentBuilder(), + ) + + with pytest.raises( + Exception, + match="was not created", + ): + service.build_org_vector_context(org) + + orgdbt.refresh_from_db() + assert orgdbt.vector_last_ingested_at is None diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index a7d2e0a4a..f61fa46f2 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -3,6 +3,8 @@ from datetime import datetime, timezone from unittest.mock import patch +from chromadb.errors import NotFoundError + from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector.documents import ( DashboardChatSourceType, @@ -272,6 +274,18 @@ def test_delete_collection_returns_false_for_missing_org(): assert store.delete_collection(404) is False +def test_chroma_load_collection_treats_not_found_as_missing(): + """Missing collections should normalize to None across Chroma error variants.""" + + class NotFoundClient(FakeChromaClient): + def get_collection(self, name): + raise NotFoundError(name) + + backend = ChromaVectorStore(client=NotFoundClient()) + + assert backend.load_collection("org_404") is None + + def test_get_documents_and_delete_documents_respect_where_filters(): """Collection reads and deletes should honor source and dashboard scoping.""" fake_client = FakeChromaClient() diff --git a/ddpui/utils/vector/backends/chroma.py b/ddpui/utils/vector/backends/chroma.py index f5951f5df..78da7789e 100644 --- a/ddpui/utils/vector/backends/chroma.py +++ b/ddpui/utils/vector/backends/chroma.py @@ -4,7 +4,13 @@ from typing import Any, Union from chromadb import ClientAPI, Collection, HttpClient -from chromadb.errors import InvalidCollectionException + +try: + from chromadb.errors import InvalidCollectionException, NotFoundError +except ImportError: # pragma: no cover - older Chroma builds may not expose NotFoundError + from chromadb.errors import InvalidCollectionException + + NotFoundError = InvalidCollectionException from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument @@ -39,7 +45,9 @@ def create_collection( def load_collection(self, name: str) -> Union[Collection, None]: try: return self.client.get_collection(name=name) - except (InvalidCollectionException, ValueError): + # Chroma has used multiple exception types for "collection does not exist" + # across versions, so normalize them all to the same missing-collection result. + except (InvalidCollectionException, NotFoundError, ValueError): return None def delete_collection(self, name: str) -> bool: From 39d9e54b9de077bbca63f08c91b050978fd75ac4 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 27 Mar 2026 12:32:46 +0530 Subject: [PATCH 27/49] fix(ai-chat): use chroma native missing collection errors --- .../core/dashboard_chat/test_vector_store.py | 2 +- ddpui/utils/vector/backends/chroma.py | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index f61fa46f2..c86df0024 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -112,7 +112,7 @@ def get_or_create_collection(self, name, metadata=None): def get_collection(self, name): if name not in self.collections: - raise ValueError("collection does not exist") + raise NotFoundError("collection does not exist") return self.collections[name] def delete_collection(self, name): diff --git a/ddpui/utils/vector/backends/chroma.py b/ddpui/utils/vector/backends/chroma.py index 78da7789e..f9a7e79ba 100644 --- a/ddpui/utils/vector/backends/chroma.py +++ b/ddpui/utils/vector/backends/chroma.py @@ -4,13 +4,7 @@ from typing import Any, Union from chromadb import ClientAPI, Collection, HttpClient - -try: - from chromadb.errors import InvalidCollectionException, NotFoundError -except ImportError: # pragma: no cover - older Chroma builds may not expose NotFoundError - from chromadb.errors import InvalidCollectionException - - NotFoundError = InvalidCollectionException +from chromadb.errors import InvalidCollectionException, NotFoundError from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument @@ -45,9 +39,12 @@ def create_collection( def load_collection(self, name: str) -> Union[Collection, None]: try: return self.client.get_collection(name=name) - # Chroma has used multiple exception types for "collection does not exist" - # across versions, so normalize them all to the same missing-collection result. - except (InvalidCollectionException, NotFoundError, ValueError): + # In chromadb==0.6.3, get_collection() can raise: + # - InvalidCollectionException when the named collection does not exist + # - NotFoundError when the HTTP/sysdb layer reports a missing resource + # while resolving that collection lookup + # For our store interface, both cases mean "this collection is absent". + except (InvalidCollectionException, NotFoundError): return None def delete_collection(self, name: str) -> bool: From af593db63d1b227e1868c6e6149750a3ee4a8636 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Fri, 27 Mar 2026 12:35:47 +0530 Subject: [PATCH 28/49] chroma docker file --- docker-compose.chroma.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 docker-compose.chroma.yml diff --git a/docker-compose.chroma.yml b/docker-compose.chroma.yml new file mode 100644 index 000000000..b889af2dd --- /dev/null +++ b/docker-compose.chroma.yml @@ -0,0 +1,16 @@ +name: dalgo-vector-store + +services: + chroma: + image: chromadb/chroma:0.6.3 + ports: + - "8003:8000" + volumes: + - chroma_data:/chroma/chroma + environment: + - IS_PERSISTENT=TRUE + - ANONYMIZED_TELEMETRY=FALSE + restart: unless-stopped + +volumes: + chroma_data: From 34cf321bd3f50c2fdf3ec0aef08c2dbc513e9910 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Fri, 27 Mar 2026 18:50:27 +0530 Subject: [PATCH 29/49] refactor(ai-chat): add langgraph checkpointed runtime --- ddpui/celeryworkers/tasks.py | 2 +- ddpui/core/dashboard_chat/CLAUDE.md | 148 ++- ddpui/core/dashboard_chat/README.md | 184 ++++ ...rmatting.py => final_answer_formatting.py} | 0 .../{interface.py => llm_client_interface.py} | 0 .../{openai.py => openai_llm_client.py} | 20 +- ...ompt_store.py => prompt_template_store.py} | 5 + ddpui/core/dashboard_chat/config.py | 2 +- ...owlist.py => dashboard_table_allowlist.py} | 0 .../{dbt_docs.py => dbt_docs_artifacts.py} | 0 .../core/dashboard_chat/contracts/__init__.py | 10 +- ...versation.py => conversation_contracts.py} | 0 .../{intents.py => intent_contracts.py} | 0 .../{response.py => response_contracts.py} | 2 +- .../{retrieval.py => retrieval_contracts.py} | 0 .../contracts/{sql.py => sql_contracts.py} | 0 .../orchestration/checkpoints.py | 71 ++ ...onversation.py => conversation_context.py} | 34 +- .../orchestration/intent_routing.py | 9 + .../orchestration/llm_tools/__init__.py | 5 + .../llm_tools/implementations/__init__.py | 1 + .../llm_tools/implementations/dbt_tools.py | 91 ++ .../llm_tools/implementations/schema_tools.py | 220 +++++ .../implementations/sql_corrections.py | 142 +++ .../implementations/sql_execution_tools.py | 130 +++ .../implementations}/sql_parsing.py | 22 +- .../implementations/sql_validation.py | 181 ++++ .../implementations/vector_retrieval_tool.py | 116 +++ .../llm_tools/runtime/__init__.py | 1 + .../runtime/tool_loop.py} | 120 +-- .../runtime/tool_specifications.py} | 0 .../llm_tools/runtime/turn_context.py | 264 ++++++ .../orchestration/nodes/compose_response.py | 93 ++ .../orchestration/nodes/finalize.py | 51 +- .../orchestration/nodes/handle_data_query.py | 87 -- .../orchestration/nodes/handle_follow_up.py | 87 -- .../nodes/handle_follow_up_context.py | 57 ++ .../nodes/handle_follow_up_sql.py | 57 ++ .../orchestration/nodes/handle_irrelevant.py | 21 +- .../nodes/handle_needs_clarification.py | 26 +- .../nodes/handle_query_with_sql.py | 57 ++ .../nodes/handle_query_without_sql.py | 57 ++ .../orchestration/nodes/handle_small_talk.py | 23 +- .../orchestration/nodes/load_context.py | 43 +- .../orchestration/nodes/route_intent.py | 20 +- .../orchestration/orchestrator.py | 253 ++++- .../{presentation.py => response_composer.py} | 17 +- .../{retrieval.py => retrieval_support.py} | 19 +- .../orchestration/session_snapshot.py | 111 --- ...ifiers.py => source_identifier_parsing.py} | 0 .../dashboard_chat/orchestration/state.py | 51 -- .../orchestration/state/__init__.py | 7 + .../orchestration/state/accessors.py | 59 ++ .../orchestration/state/graph_state.py | 33 + .../orchestration/state/payload_codec.py | 285 ++++++ .../{nodes/helpers.py => timing_breakdown.py} | 13 +- ..._stack.py => tool_loop_message_builder.py} | 11 +- .../orchestration/tools/__init__.py | 0 .../orchestration/tools/cache.py | 213 ----- .../orchestration/tools/handlers.py | 806 ---------------- ddpui/core/dashboard_chat/sessions/cache.py | 101 -- .../{service.py => session_service.py} | 2 +- ...py => org_vector_context_build_service.py} | 6 +- .../vector/{store.py => org_vector_store.py} | 2 +- .../{documents.py => vector_documents.py} | 0 .../dashboard_chat/warehouse/sql_guard.py | 2 +- .../{tools.py => warehouse_access_tools.py} | 36 + ...hatprompttemplate_intent_follow_up_refs.py | 157 ++++ ddpui/schemas/notifications_api_schemas.py | 7 +- ddpui/schemas/org_schema.py | 12 +- ddpui/settings.py | 14 +- ddpui/tests/core/dashboard_chat/conftest.py | 19 + .../test_langgraph_checkpointing.py | 407 +++++++++ .../core/dashboard_chat/test_llm_client.py | 9 +- .../core/dashboard_chat/test_prompt_store.py | 11 +- .../tests/core/dashboard_chat/test_runtime.py | 542 +++++------ .../dashboard_chat/test_session_service.py | 113 ++- ddpui/tests/core/dashboard_chat/test_tasks.py | 176 +--- .../dashboard_chat/test_vector_building.py | 26 +- .../core/dashboard_chat/test_vector_store.py | 4 +- .../dashboard_chat/test_warehouse_tools.py | 10 +- ddpui/websockets/dashboard_chat_consumer.py | 4 +- pyproject.toml | 18 +- uv.lock | 864 ++++++++++++++++-- 84 files changed, 4477 insertions(+), 2402 deletions(-) create mode 100644 ddpui/core/dashboard_chat/README.md rename ddpui/core/dashboard_chat/agents/{answer_formatting.py => final_answer_formatting.py} (100%) rename ddpui/core/dashboard_chat/agents/{interface.py => llm_client_interface.py} (100%) rename ddpui/core/dashboard_chat/agents/{openai.py => openai_llm_client.py} (92%) rename ddpui/core/dashboard_chat/agents/{prompt_store.py => prompt_template_store.py} (96%) rename ddpui/core/dashboard_chat/context/{allowlist.py => dashboard_table_allowlist.py} (100%) rename ddpui/core/dashboard_chat/context/{dbt_docs.py => dbt_docs_artifacts.py} (100%) rename ddpui/core/dashboard_chat/contracts/{conversation.py => conversation_contracts.py} (100%) rename ddpui/core/dashboard_chat/contracts/{intents.py => intent_contracts.py} (100%) rename ddpui/core/dashboard_chat/contracts/{response.py => response_contracts.py} (95%) rename ddpui/core/dashboard_chat/contracts/{retrieval.py => retrieval_contracts.py} (100%) rename ddpui/core/dashboard_chat/contracts/{sql.py => sql_contracts.py} (100%) create mode 100644 ddpui/core/dashboard_chat/orchestration/checkpoints.py rename ddpui/core/dashboard_chat/orchestration/{conversation.py => conversation_context.py} (90%) create mode 100644 ddpui/core/dashboard_chat/orchestration/intent_routing.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/__init__.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/__init__.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/dbt_tools.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_corrections.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py rename ddpui/core/dashboard_chat/orchestration/{tools => llm_tools/implementations}/sql_parsing.py (93%) create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/__init__.py rename ddpui/core/dashboard_chat/orchestration/{tools/loop.py => llm_tools/runtime/tool_loop.py} (68%) rename ddpui/core/dashboard_chat/orchestration/{tools/specifications.py => llm_tools/runtime/tool_specifications.py} (100%) create mode 100644 ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py create mode 100644 ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py rename ddpui/core/dashboard_chat/orchestration/{presentation.py => response_composer.py} (96%) rename ddpui/core/dashboard_chat/orchestration/{retrieval.py => retrieval_support.py} (95%) delete mode 100644 ddpui/core/dashboard_chat/orchestration/session_snapshot.py rename ddpui/core/dashboard_chat/orchestration/{source_identifiers.py => source_identifier_parsing.py} (100%) delete mode 100644 ddpui/core/dashboard_chat/orchestration/state.py create mode 100644 ddpui/core/dashboard_chat/orchestration/state/__init__.py create mode 100644 ddpui/core/dashboard_chat/orchestration/state/accessors.py create mode 100644 ddpui/core/dashboard_chat/orchestration/state/graph_state.py create mode 100644 ddpui/core/dashboard_chat/orchestration/state/payload_codec.py rename ddpui/core/dashboard_chat/orchestration/{nodes/helpers.py => timing_breakdown.py} (71%) rename ddpui/core/dashboard_chat/orchestration/{message_stack.py => tool_loop_message_builder.py} (80%) delete mode 100644 ddpui/core/dashboard_chat/orchestration/tools/__init__.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/tools/cache.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/tools/handlers.py delete mode 100644 ddpui/core/dashboard_chat/sessions/cache.py rename ddpui/core/dashboard_chat/sessions/{service.py => session_service.py} (99%) rename ddpui/core/dashboard_chat/vector/{ingest.py => org_vector_context_build_service.py} (99%) rename ddpui/core/dashboard_chat/vector/{store.py => org_vector_store.py} (99%) rename ddpui/core/dashboard_chat/vector/{documents.py => vector_documents.py} (100%) rename ddpui/core/dashboard_chat/warehouse/{tools.py => warehouse_access_tools.py} (71%) create mode 100644 ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py create mode 100644 ddpui/tests/core/dashboard_chat/conftest.py create mode 100644 ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py diff --git a/ddpui/celeryworkers/tasks.py b/ddpui/celeryworkers/tasks.py index 18dabbb0b..b4615b7e4 100644 --- a/ddpui/celeryworkers/tasks.py +++ b/ddpui/celeryworkers/tasks.py @@ -75,7 +75,7 @@ ) from ddpui.core.orgdbt_manager import DbtProjectManager, DbtCommandError from ddpui.core.git_manager import GitManager, GitManagerError -from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildService +from ddpui.core.dashboard_chat.vector.org_vector_context_build_service import OrgVectorBuildService from ddpui.ddpdbt.schema import DbtProjectParams from ddpui.ddpairbyte import airbyte_service, airbytehelpers from ddpui.ddpprefect.prefect_service import ( diff --git a/ddpui/core/dashboard_chat/CLAUDE.md b/ddpui/core/dashboard_chat/CLAUDE.md index dff3f7ddd..90e884cc3 100644 --- a/ddpui/core/dashboard_chat/CLAUDE.md +++ b/ddpui/core/dashboard_chat/CLAUDE.md @@ -34,11 +34,11 @@ The feature is intentionally scoped to the current dashboard only: v +--------------------------------+ | DashboardChatRuntime.run | -| - load snapshot | +| - load context / bootstrap | | - route intent | -| - retrieve / tools / SQL | -| - compose answer | -| - attach citations + metadata | +| - route-specific node | +| - compose response | +| - finalize metadata/citations | +--------------------------------+ | v @@ -291,7 +291,7 @@ Flow: +----------------------------------------------+ | handle_follow_up_sql | | -> shorter tool loop | -| -> reuse conversation context + caches | +| -> reuse conversation context + checkpointed state | | -> run updated SQL | | -> compose final answer | +----------------------------------------------+ @@ -467,8 +467,8 @@ This layer knows how to talk to Chroma, but does not know anything about dashboa ### 2. Dashboard-chat vector layer Location: -- [`vector/store.py`](./vector/store.py) -- [`vector/documents.py`](./vector/documents.py) +- [`vector/org_vector_store.py`](./vector/org_vector_store.py) +- [`vector/vector_documents.py`](./vector/vector_documents.py) - [`vector/builder.py`](./vector/builder.py) - [`vector/building.py`](./vector/building.py) - [`vector/embeddings.py`](./vector/embeddings.py) @@ -576,39 +576,30 @@ These sources are intentionally different: - the compact dbt index is good for deterministic dbt lookups (ex: upstream models) - warehouse tools are good for trustworthy data answers -## Why We Cache +## LangGraph Persistence -This feature uses caching for stability and cost control +This feature now uses official LangGraph Postgres checkpoints for session continuity. -### 1. Session snapshot cache -Location: -- `orchestration/session_snapshot.py` -- `sessions/cache.py` - -What is cached: -- dashboard export +What is persisted in checkpoints: +- dashboard export payload +- allowlist payload - compact dbt index -- allowlist -- schema snippet cache -- validated distinct-value cache - -What is not cached here: -- prior chat turns themselves +- schema snippet payloads +- validated distinct-value payloads +- turn state needed for follow-ups and resume -Previous questions and answers are persisted separately as chat messages and are passed back in as conversation history on each new turn. +What is not stored there: +- product transcript rows -TTL: -- 24 hours +Chat history remains in `dashboard_chat_session` / `dashboard_chat_message`. LangGraph owns resumable workflow state; Django models still own the user-visible transcript. Why this exists: -- a chat session should keep using a stable dashboard context across follow-up turns -- other users or background refreshes should not change the dashboard/dbt context underneath an active conversation -- schema lookups and distinct-value validations should carry across follow-ups instead of starting cold every turn -- the runtime should not have to rebuild the same dashboard export, allowlist, and compact dbt index on every user message - -This is not LangGraph checkpoint persistence. It is app-level session context freezing. +- a chat session should keep using stable dashboard/dbt context across follow-up turns +- schema lookups and distinct validations should carry across turns +- interrupted runs should be resumable at graph-step boundaries +- continuity should survive process restarts without relying on Django cache -### 2. Shared process-level clients +### Shared process-level clients Location: - `orchestration/orchestrator.py` - `ddpui/utils/vector/chroma/client.py` @@ -626,7 +617,7 @@ Why this exists: Prompt lookup works like this: - read the prompt row from the DB if present -- otherwise fall back to the built-in default in `agents/prompt_store.py` +- otherwise fall back to the built-in default in `agents/prompt_template_store.py` ## DB-Backed Logging / Trace @@ -697,87 +688,73 @@ This is the quickest way to navigate the package. - websocket event helpers / channel group naming ### `agents/` -- [`interface.py`](./agents/interface.py) +- [`llm_client_interface.py`](./agents/llm_client_interface.py) - LLM client protocol used by the runtime -- [`openai.py`](./agents/openai.py) +- [`openai_llm_client.py`](./agents/openai_llm_client.py) - OpenAI-backed intent classification, tool-loop, and final-answer composition -- [`answer_formatting.py`](./agents/answer_formatting.py) +- [`final_answer_formatting.py`](./agents/final_answer_formatting.py) - helpers for structured final answer composition -- [`prompt_store.py`](./agents/prompt_store.py) +- [`prompt_template_store.py`](./agents/prompt_template_store.py) - DB-backed prompt lookup with built-in defaults ### `context/` -- [`allowlist.py`](./context/allowlist.py) +- [`dashboard_table_allowlist.py`](./context/dashboard_table_allowlist.py) - dashboard export -> allowlisted tables/dbt lineage -> compact dbt index -- [`dbt_docs.py`](./context/dbt_docs.py) +- [`dbt_docs_artifacts.py`](./context/dbt_docs_artifacts.py) - dbt docs generation/loading helpers for manifest/catalog artifacts ### `contracts/` -- [`conversation.py`](./contracts/conversation.py) +- [`conversation_contracts.py`](./contracts/conversation_contracts.py) - conversation history and follow-up context contracts -- [`intents.py`](./contracts/intents.py) +- [`intent_contracts.py`](./contracts/intent_contracts.py) - intent enums and routing decisions -- [`response.py`](./contracts/response.py) +- [`response_contracts.py`](./contracts/response_contracts.py) - final response, citations, usage, tool-call metadata -- [`retrieval.py`](./contracts/retrieval.py) +- [`retrieval_contracts.py`](./contracts/retrieval_contracts.py) - retrieved document contracts -- [`sql.py`](./contracts/sql.py) +- [`sql_contracts.py`](./contracts/sql_contracts.py) - SQL validation and schema snippet contracts ### `orchestration/` - [`orchestrator.py`](./orchestration/orchestrator.py) - - runtime entry point and shared runtime getter -- [`definition.py`](./orchestration/definition.py) - - explicit LangGraph wiring -- [`bindings.py`](./orchestration/bindings.py) - - binds split helper modules onto the runtime class -- [`state.py`](./orchestration/state.py) - - graph state shape and lightweight constants -- [`nodes.py`](./orchestration/nodes.py) - - node handlers and route-specific behavior -- [`tool_specifications.py`](./orchestration/tool_specifications.py) - - tool schema exposed to the LLM -- [`tool_loop.py`](./orchestration/tool_loop.py) - - bounded tool-loop execution -- [`tool_handlers.py`](./orchestration/tool_handlers.py) - - implementation of each tool -- [`retrieval.py`](./orchestration/retrieval.py) + - runtime entry point, shared runtime getter, and backend resume API +- [`checkpoints.py`](./orchestration/checkpoints.py) + - official LangGraph Postgres checkpoint wiring +- [`state/`](./orchestration/state) + - grouped graph-state definitions, payload codecs, and typed runtime accessors +- [`nodes/`](./orchestration/nodes) + - graph node handlers, including explicit query/follow-up route files plus `compose_response` and `finalize` +- [`retrieval_support.py`](./orchestration/retrieval_support.py) - Chroma retrieval + citations -- [`conversation.py`](./orchestration/conversation.py) +- [`conversation_context.py`](./orchestration/conversation_context.py) - conversation-context extraction and follow-up helpers -- [`session_snapshot.py`](./orchestration/session_snapshot.py) - - session-stable snapshot load/persist logic -- [`sql_parsing.py`](./orchestration/sql_parsing.py) - - SQL parsing helpers used during validation/execution -- [`sql_execution.py`](./orchestration/sql_execution.py) - - safe SQL execution path -- [`presentation.py`](./orchestration/presentation.py) +- [`response_composer.py`](./orchestration/response_composer.py) - response format selection and final answer assembly -- [`message_stack.py`](./orchestration/message_stack.py) +- [`tool_loop_message_builder.py`](./orchestration/tool_loop_message_builder.py) - message-building helpers for the tool loop -- [`source_identifiers.py`](./orchestration/source_identifiers.py) +- [`source_identifier_parsing.py`](./orchestration/source_identifier_parsing.py) - parsing helpers for chart/dbt source identifiers +- [`intent_routing.py`](./orchestration/intent_routing.py) + - graph route selection after intent classification +- [`timing_breakdown.py`](./orchestration/timing_breakdown.py) + - timing merge helpers for node and tool-loop execution +- [`llm_tools/`](./orchestration/llm_tools) + - `runtime/` for tool-loop execution and turn context, `implementations/` for concrete tool handlers and SQL helpers ### `sessions/` -- [`service.py`](./sessions/service.py) +- [`session_service.py`](./sessions/session_service.py) - create/reuse sessions, persist messages, serialize message payloads -- [`cache.py`](./sessions/cache.py) - - cache key + serializer helpers for session snapshots ### `vector/` -- [`documents.py`](./vector/documents.py) +- [`vector_documents.py`](./vector/vector_documents.py) - vector document dataclasses, source types, collection naming helpers -- [`embeddings.py`](./vector/embeddings.py) - - embedding provider protocol + OpenAI embeddings adapter -- [`store.py`](./vector/store.py) +- [`org_vector_store.py`](./vector/org_vector_store.py) - dashboard-chat adapter on top of the shared Chroma wrapper -- [`builder.py`](./vector/builder.py) - - build vector documents from org/dashboard/dbt context -- [`building.py`](./vector/building.py) - - end-to-end org vector rebuild workflow and collection GC +- [`org_vector_context_build_service.py`](./vector/org_vector_context_build_service.py) + - document chunking, vector document building, and org-level rebuild workflow ### `warehouse/` -- [`tools.py`](./warehouse/tools.py) +- [`warehouse_access_tools.py`](./warehouse/warehouse_access_tools.py) - read-only warehouse helpers for schema, distincts, row counts, SQL execution - [`sql_guard.py`](./warehouse/sql_guard.py) - allowlist enforcement and SQL safety checks @@ -786,7 +763,7 @@ This is the quickest way to navigate the package. Main files: - `ddpui/websockets/dashboard_chat_consumer.py` -- `sessions/service.py` +- `sessions/session_service.py` - `ddpui/celeryworkers/tasks.py` Important behavior: @@ -803,7 +780,8 @@ If chat is failing, the fastest places to inspect are: - websocket consumer for auth/session issues - `execute_dashboard_chat_turn` for persistence/runtime wiring - `orchestrator/orchestrator.py` for runtime construction -- `orchestration/nodes.py` for route choice and final response creation +- `orchestration/checkpoints.py` for LangGraph Postgres persistence setup +- `orchestration/nodes/` for route choice and final response creation - `vector/building.py` if retrieval data is stale or missing - `warehouse/sql_guard.py` if SQL is being rejected -- `sessions/cache.py` + `orchestration/session_snapshot.py` if follow-ups behave inconsistently +- LangGraph checkpoint tables plus `orchestration/state/` if follow-up state behaves inconsistently diff --git a/ddpui/core/dashboard_chat/README.md b/ddpui/core/dashboard_chat/README.md new file mode 100644 index 000000000..ee591ba6e --- /dev/null +++ b/ddpui/core/dashboard_chat/README.md @@ -0,0 +1,184 @@ +# Dashboard Chat + +This package implements dashboard-scoped chat for one dashboard at a time. + +The current backend shape is: +- `DashboardChatConsumer` handles the websocket/session/message lifecycle +- `DashboardChatRuntime` runs a LangGraph workflow per turn +- LangGraph state is checkpointed to Postgres through the official `PostgresSaver` +- product transcript history still lives in `DashboardChatSession` and `DashboardChatMessage` + +## Runtime Flow + +``` ++-------------------------+ +| websocket consumer | +| persist user message | ++-------------------------+ + | + v ++-------------------------+ +| DashboardChatRuntime | +| run / resume | ++-------------------------+ + | + v ++-------------------------+ +| load_context | ++-------------------------+ + | + v ++-------------------------+ +| route_intent | ++-------------------------+ + | + v ++-----------------------------------------------+ +| one route node | +| - handle_small_talk | +| - handle_irrelevant | +| - handle_needs_clarification | +| - handle_query_with_sql | +| - handle_query_without_sql | +| - handle_follow_up_sql | +| - handle_follow_up_context | ++-----------------------------------------------+ + | + v ++-------------------------+ +| compose_response | ++-------------------------+ + | + v ++-------------------------+ +| finalize | ++-------------------------+ + | + v ++-------------------------+ +| persist assistant reply | ++-------------------------+ +``` + +## Package Layout + +### `agents/` +- LLM-facing client abstractions and implementations +- `llm_client_interface.py` defines the runtime-facing LLM contract +- `openai_llm_client.py` contains the OpenAI-backed implementation +- `final_answer_formatting.py` formats structured final-answer payloads +- `prompt_template_store.py` loads prompt templates with DB-backed overrides + +### `context/` +- `dashboard_table_allowlist.py` builds the dashboard-scoped table/dbt allowlist +- `dbt_docs_artifacts.py` generates and loads manifest/catalog artifacts for chat context builds + +### `contracts/` +- `conversation_contracts.py`, `intent_contracts.py`, `response_contracts.py`, `retrieval_contracts.py`, `sql_contracts.py` + +### `orchestration/` +- graph wiring and all per-turn runtime logic + +- `conversation_context.py`: extracts reusable follow-up context from prior messages +- `tool_loop_message_builder.py`: builds tool-loop prompt stacks +- `response_composer.py`: builds final answer text and response-format decisions +- `retrieval_support.py`: retrieval normalization and citation helpers +- `source_identifier_parsing.py`: parses chart/dbt identifiers from stored vector sources +- `intent_routing.py`: maps classified intents to route node names +- `timing_breakdown.py`: merges timing payloads across node/tool execution + +#### `orchestration/state/` +- `graph_state.py`: JSON-safe LangGraph state contract +- `payload_codec.py`: serialize/deserialize checkpoint payloads +- `accessors.py`: reconstruct typed runtime views from persisted payloads + +#### `orchestration/nodes/` +- graph nodes only +- query/follow-up routes each have explicit node files in `nodes/` +- `compose_response.py` builds the final `DashboardChatResponse` +- `finalize.py` enriches the finished response with metadata and warehouse citations + +#### `orchestration/llm_tools/` +- `runtime/`: tool-loop execution, turn context, tool specifications +- `implementations/`: concrete LLM-callable tools and SQL helpers + +### `sessions/` +- `session_service.py`: session/message persistence and message serialization + +### `vector/` +- `vector_documents.py`: vector document contracts and collection naming +- `org_vector_store.py`: org-scoped Chroma adapter +- `org_vector_context_build_service.py`: document building and org-level vector rebuilds + +### `warehouse/` +- `warehouse_access_tools.py`: schema lookups, distincts, row counts, SQL execution +- `sql_guard.py`: allowlist enforcement and SQL safety checks + +## Agents vs Nodes + +These are different layers: + +- `agents/` are LLM client adapters + - classify intent + - run the tool loop + - compose final answer text + - compose small-talk text + +- `orchestration/nodes/` are LangGraph workflow steps + - load context + - route the turn + - execute one route + - compose/finalize the response + +In short: +- `agents` = how the backend talks to the model +- `nodes` = how the workflow is structured + +## Checkpointing + +Checkpointing is wired in: +- `orchestration/checkpoints.py` +- `orchestration/orchestrator.py` + +`checkpoints.py`: +- builds the Postgres connection info +- creates the LangGraph `PostgresSaver` +- calls `setup()` so LangGraph manages its own checkpoint tables + +`orchestrator.py`: +- compiles a persistent graph with that saver +- uses `thread_id = session_id` +- exposes backend `resume(session_id, checkpoint_id=None)` + +The checkpointed state is durable in Postgres and is separate from the product transcript tables. + +## How Checkpoints Are Written + +LangGraph does not write a checkpoint “for every edge”. + +Edges are just routing links. The checkpointer persists state when the graph commits work for a step: +- after node execution updates state +- when LangGraph records checkpoint writes/versions for that step + +So the mental model is: +- node runs +- state updates are committed +- checkpoint is saved +- graph follows the next edge + +That is why checkpointing is meaningful at graph-step boundaries, not in the middle of a blocking OpenAI call or warehouse query. + +## Resume Behavior + +Backend resume is currently supported. + +What exists now: +- durable checkpointed state in Postgres +- `thread_id = session_id` +- backend `resume(session_id, checkpoint_id=None)` + +What does not exist yet: +- frontend resume UX +- mid-call interruption/resume inside a single blocking external call + +Resume works at graph-step boundaries, not inside an in-flight tool/LLM/database call. diff --git a/ddpui/core/dashboard_chat/agents/answer_formatting.py b/ddpui/core/dashboard_chat/agents/final_answer_formatting.py similarity index 100% rename from ddpui/core/dashboard_chat/agents/answer_formatting.py rename to ddpui/core/dashboard_chat/agents/final_answer_formatting.py diff --git a/ddpui/core/dashboard_chat/agents/interface.py b/ddpui/core/dashboard_chat/agents/llm_client_interface.py similarity index 100% rename from ddpui/core/dashboard_chat/agents/interface.py rename to ddpui/core/dashboard_chat/agents/llm_client_interface.py diff --git a/ddpui/core/dashboard_chat/agents/openai.py b/ddpui/core/dashboard_chat/agents/openai_llm_client.py similarity index 92% rename from ddpui/core/dashboard_chat/agents/openai.py rename to ddpui/core/dashboard_chat/agents/openai_llm_client.py index 28e4a411f..462f36307 100644 --- a/ddpui/core/dashboard_chat/agents/openai.py +++ b/ddpui/core/dashboard_chat/agents/openai_llm_client.py @@ -7,13 +7,13 @@ from openai import OpenAI -from ddpui.core.dashboard_chat.agents.answer_formatting import ( +from ddpui.core.dashboard_chat.agents.final_answer_formatting import ( TABLE_SUMMARY_JSON_INSTRUCTIONS, build_final_answer_context_payload, format_table_summary_markdown, serialize_final_answer_context_payload, ) -from ddpui.core.dashboard_chat.agents.prompt_store import DashboardChatPromptStore +from ddpui.core.dashboard_chat.agents.prompt_template_store import DashboardChatPromptStore from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatFollowUpContext, @@ -69,14 +69,24 @@ def classify_intent( ) -> DashboardChatIntentDecision: """Classify intent with prototype-style conversation awareness.""" system_prompt = self.prompt_store.get(DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION) - if conversation_context.last_sql_query or conversation_context.last_chart_ids: + if ( + conversation_context.last_sql_query + or conversation_context.last_chart_ids + or conversation_context.last_answer_text + or conversation_context.last_intent + ): system_prompt += ( "\n\nCONVERSATION CONTEXT:\n" f"- Previous SQL: {conversation_context.last_sql_query or 'None'}\n" f"- Previous tables: {', '.join(conversation_context.last_tables_used) or 'None'}\n" f"- Previous charts: {', '.join(conversation_context.last_chart_ids) or 'None'}\n" - f"- Last response type: {conversation_context.last_response_type or 'None'}\n\n" - "Use this context to detect follow-up queries that want to modify or expand on previous results." + f"- Last response type: {conversation_context.last_response_type or 'None'}\n" + f"- Last intent: {conversation_context.last_intent or 'None'}\n" + f"- Last answer text: {conversation_context.last_answer_text or 'None'}\n\n" + "Use this context to detect follow-up queries that want to modify or expand on previous results. " + "If the new query refers to entities returned in the immediately previous answer " + '(for example "these facilitators", "those students", "they", or "them"), ' + "treat that as follow-up context rather than as missing specificity." ) try: result = self._complete_json( diff --git a/ddpui/core/dashboard_chat/agents/prompt_store.py b/ddpui/core/dashboard_chat/agents/prompt_template_store.py similarity index 96% rename from ddpui/core/dashboard_chat/agents/prompt_store.py rename to ddpui/core/dashboard_chat/agents/prompt_template_store.py index 5b3b677db..55044271f 100644 --- a/ddpui/core/dashboard_chat/agents/prompt_store.py +++ b/ddpui/core/dashboard_chat/agents/prompt_template_store.py @@ -41,6 +41,9 @@ - "Filter to CGI donors only" (add filter) - "Same but for last quarter" (modify timeframe) - "Show weekly instead" (change aggregation) +- "Which districts are these facilitators from?" (use the facilitators returned in the previous result) +- "Which programs are those students in?" (expand the previously returned entity set with a new dimension) +- "Which states are they from?" (resolve the pronoun from the immediately previous result set) **follow_up_context** examples (requires previous context): - "Explain that metric" @@ -126,6 +129,8 @@ Use conversation history to: - Detect follow-up patterns - Understand context references ("that metric", "same query") +- Resolve referential follow-ups that point to the immediately previous result set + ("these facilitators", "those students", "they", "them", "that result") - Determine if SQL modification or explanation is needed - Extract reusable elements (tables, metrics, filters) from previous queries diff --git a/ddpui/core/dashboard_chat/config.py b/ddpui/core/dashboard_chat/config.py index 493bad3a1..14c0c8cbc 100644 --- a/ddpui/core/dashboard_chat/config.py +++ b/ddpui/core/dashboard_chat/config.py @@ -4,7 +4,7 @@ from dataclasses import dataclass import os -from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType def _parse_bool(value: str | None, default: bool) -> bool: diff --git a/ddpui/core/dashboard_chat/context/allowlist.py b/ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py similarity index 100% rename from ddpui/core/dashboard_chat/context/allowlist.py rename to ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py diff --git a/ddpui/core/dashboard_chat/context/dbt_docs.py b/ddpui/core/dashboard_chat/context/dbt_docs_artifacts.py similarity index 100% rename from ddpui/core/dashboard_chat/context/dbt_docs.py rename to ddpui/core/dashboard_chat/context/dbt_docs_artifacts.py diff --git a/ddpui/core/dashboard_chat/contracts/__init__.py b/ddpui/core/dashboard_chat/contracts/__init__.py index 6f2fba813..421bb38f3 100644 --- a/ddpui/core/dashboard_chat/contracts/__init__.py +++ b/ddpui/core/dashboard_chat/contracts/__init__.py @@ -1,20 +1,20 @@ """Typed contracts for dashboard chat orchestration.""" -from ddpui.core.dashboard_chat.contracts.conversation import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, ) -from ddpui.core.dashboard_chat.contracts.intents import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, ) -from ddpui.core.dashboard_chat.contracts.response import ( +from ddpui.core.dashboard_chat.contracts.response_contracts import ( DashboardChatCitation, DashboardChatResponse, ) -from ddpui.core.dashboard_chat.contracts.retrieval import ( +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import ( DashboardChatRetrievedDocument, DashboardChatSchemaSnippet, ) -from ddpui.core.dashboard_chat.contracts.sql import DashboardChatSqlValidationResult +from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult diff --git a/ddpui/core/dashboard_chat/contracts/conversation.py b/ddpui/core/dashboard_chat/contracts/conversation_contracts.py similarity index 100% rename from ddpui/core/dashboard_chat/contracts/conversation.py rename to ddpui/core/dashboard_chat/contracts/conversation_contracts.py diff --git a/ddpui/core/dashboard_chat/contracts/intents.py b/ddpui/core/dashboard_chat/contracts/intent_contracts.py similarity index 100% rename from ddpui/core/dashboard_chat/contracts/intents.py rename to ddpui/core/dashboard_chat/contracts/intent_contracts.py diff --git a/ddpui/core/dashboard_chat/contracts/response.py b/ddpui/core/dashboard_chat/contracts/response_contracts.py similarity index 95% rename from ddpui/core/dashboard_chat/contracts/response.py rename to ddpui/core/dashboard_chat/contracts/response_contracts.py index 30d85c700..2bfb28de4 100644 --- a/ddpui/core/dashboard_chat/contracts/response.py +++ b/ddpui/core/dashboard_chat/contracts/response_contracts.py @@ -6,7 +6,7 @@ from django.core.serializers.json import DjangoJSONEncoder -from ddpui.core.dashboard_chat.contracts.intents import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent @dataclass(frozen=True) diff --git a/ddpui/core/dashboard_chat/contracts/retrieval.py b/ddpui/core/dashboard_chat/contracts/retrieval_contracts.py similarity index 100% rename from ddpui/core/dashboard_chat/contracts/retrieval.py rename to ddpui/core/dashboard_chat/contracts/retrieval_contracts.py diff --git a/ddpui/core/dashboard_chat/contracts/sql.py b/ddpui/core/dashboard_chat/contracts/sql_contracts.py similarity index 100% rename from ddpui/core/dashboard_chat/contracts/sql.py rename to ddpui/core/dashboard_chat/contracts/sql_contracts.py diff --git a/ddpui/core/dashboard_chat/orchestration/checkpoints.py b/ddpui/core/dashboard_chat/orchestration/checkpoints.py new file mode 100644 index 000000000..2a77a341e --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/checkpoints.py @@ -0,0 +1,71 @@ +"""Official LangGraph Postgres checkpoint wiring for dashboard chat.""" + +from dataclasses import dataclass +from functools import lru_cache +import os + +from django.conf import settings +from psycopg import Connection +from psycopg.conninfo import make_conninfo +from psycopg.rows import dict_row + +from langgraph.checkpoint.postgres import PostgresSaver + + +@dataclass(frozen=True) +class DashboardChatCheckpointConfig: + """Configuration for dashboard chat LangGraph checkpoint persistence.""" + + conninfo: str + + @classmethod + def from_env(cls) -> "DashboardChatCheckpointConfig": + """Build checkpoint configuration from env or Django DB settings.""" + conninfo = os.getenv("AI_DASHBOARD_CHAT_LANGGRAPH_CHECKPOINT_CONNINFO") + if conninfo: + return cls(conninfo=conninfo) + + default_db = settings.DATABASES["default"] + return cls( + conninfo=make_conninfo( + dbname=default_db.get("NAME") or "", + user=default_db.get("USER") or "", + password=default_db.get("PASSWORD") or "", + host=default_db.get("HOST") or "", + port=str(default_db.get("PORT") or ""), + ) + ) + + +class DashboardChatCheckpointer: + """Long-lived Postgres saver wrapper used by the shared dashboard chat runtime.""" + + def __init__(self, config: DashboardChatCheckpointConfig): + self.config = config + self.connection = Connection.connect( + config.conninfo, + autocommit=True, + prepare_threshold=0, + row_factory=dict_row, + ) + self.saver = PostgresSaver(self.connection) + # LangGraph owns its checkpoint schema on this path. + self.saver.setup() + + def close(self) -> None: + """Close the underlying Postgres connection when tests/process shutdown need it.""" + if not self.connection.closed: + self.connection.close() + + +@lru_cache(maxsize=1) +def get_dashboard_chat_checkpointer() -> DashboardChatCheckpointer: + """Return the shared checkpoint wrapper for dashboard chat runtime persistence.""" + return DashboardChatCheckpointer(DashboardChatCheckpointConfig.from_env()) + + +def reset_dashboard_chat_checkpointer() -> None: + """Tear down the shared checkpointer so tests do not leak DB sessions.""" + if get_dashboard_chat_checkpointer.cache_info().currsize: + get_dashboard_chat_checkpointer().close() + get_dashboard_chat_checkpointer.cache_clear() diff --git a/ddpui/core/dashboard_chat/orchestration/conversation.py b/ddpui/core/dashboard_chat/orchestration/conversation_context.py similarity index 90% rename from ddpui/core/dashboard_chat/orchestration/conversation.py rename to ddpui/core/dashboard_chat/orchestration/conversation_context.py index a46035643..84158d743 100644 --- a/ddpui/core/dashboard_chat/orchestration/conversation.py +++ b/ddpui/core/dashboard_chat/orchestration/conversation_context.py @@ -10,19 +10,25 @@ ) from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -from ddpui.core.dashboard_chat.orchestration.source_identifiers import ( +from ddpui.core.dashboard_chat.orchestration.source_identifier_parsing import ( chart_id_from_source_identifier, ) def extract_conversation_context( - conversation_history: Sequence[DashboardChatConversationMessage], + conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]], ) -> DashboardChatConversationContext: """Extract reusable conversation context from message history.""" context = DashboardChatConversationContext() recent_history = list(conversation_history)[-10:] for message in reversed(recent_history): + if isinstance(message, dict): + message = DashboardChatConversationMessage( + role=str(message.get("role") or "user"), + content=str(message.get("content") or ""), + payload=message.get("payload") or {}, + ) if message.role != "assistant": continue @@ -225,18 +231,24 @@ def extract_filters_from_sql(sql: str) -> list[str]: def normalize_conversation_history( conversation_history: Sequence[DashboardChatConversationMessage | dict[str, Any]] | None, -) -> list[DashboardChatConversationMessage]: - """Normalize stored history into the typed runtime message format.""" - normalized_messages: list[DashboardChatConversationMessage] = [] +) -> list[dict[str, Any]]: + """Normalize stored history into checkpoint-safe message payloads.""" + normalized_messages: list[dict[str, Any]] = [] for item in conversation_history or []: if isinstance(item, DashboardChatConversationMessage): - normalized_messages.append(item) + normalized_messages.append( + { + "role": item.role, + "content": item.content, + "payload": item.payload or {}, + } + ) continue normalized_messages.append( - DashboardChatConversationMessage( - role=str(item.get("role") or "user"), - content=str(item.get("content") or ""), - payload=item.get("payload") or {}, - ) + { + "role": str(item.get("role") or "user"), + "content": str(item.get("content") or ""), + "payload": item.get("payload") or {}, + } ) return normalized_messages diff --git a/ddpui/core/dashboard_chat/orchestration/intent_routing.py b/ddpui/core/dashboard_chat/orchestration/intent_routing.py new file mode 100644 index 000000000..01d87d497 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/intent_routing.py @@ -0,0 +1,9 @@ +"""Graph intent-routing helpers for dashboard chat orchestration.""" + +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision + + +def route_after_intent(state: DashboardChatGraphState) -> str: + """Return the next node name for the current classified intent.""" + return get_intent_decision(state).intent.value diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/__init__.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/__init__.py new file mode 100644 index 000000000..f8c886c29 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/__init__.py @@ -0,0 +1,5 @@ +"""Dashboard-chat LLM tooling package. + +`implementations/` contains concrete tool handlers and SQL helper modules. +`runtime/` contains the tool-loop runtime, turn context, and static tool specs. +""" diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/__init__.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/__init__.py new file mode 100644 index 000000000..608ad0f39 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/__init__.py @@ -0,0 +1 @@ +"""Concrete LLM-callable tool implementations for dashboard chat.""" diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/dbt_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/dbt_tools.py new file mode 100644 index 000000000..61a9ea643 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/dbt_tools.py @@ -0,0 +1,91 @@ +"""Deterministic dbt lookup tool handlers.""" + +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + dbt_resources_by_unique_id, +) + + +def handle_search_dbt_models_tool( + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Search allowlisted dbt nodes by name, description, and column metadata.""" + del turn_context + query = str(args.get("query") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 8)), 20)) + if not query: + return {"models": [], "count": 0} + + results: list[dict[str, Any]] = [] + for node in dbt_resources_by_unique_id(state).values(): + table_name = node.get("table") + haystacks = [ + str(node.get("name") or ""), + str(node.get("description") or ""), + str(table_name or ""), + ] + for column in node.get("columns") or []: + haystacks.append(str(column.get("name") or "")) + haystacks.append(str(column.get("description") or "")) + if query not in " ".join(haystacks).lower(): + continue + results.append( + { + "name": str(node.get("name") or ""), + "schema": str(node.get("schema") or ""), + "database": str(node.get("database") or ""), + "description": str(node.get("description") or ""), + "columns": [ + str(column.get("name") or "") for column in (node.get("columns") or []) + ][:20], + "table": table_name, + } + ) + if len(results) >= limit: + break + + return {"models": results, "count": len(results)} + + +def handle_get_dbt_model_info_tool( + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Return one dbt model's description, columns, and lineage.""" + del turn_context + model_name = str(args.get("model_name") or "").strip().lower() + if not model_name: + return {"error": "model_name is required"} + + matched_unique_id: str | None = None + matched_node: dict[str, Any] | None = None + for unique_id, node in dbt_resources_by_unique_id(state).items(): + table_name = node.get("table") + candidates = { + str(node.get("name") or "").lower(), + str(table_name or "").lower(), + } + if model_name not in candidates: + continue + matched_unique_id = unique_id + matched_node = node + break + + if matched_unique_id is None or matched_node is None: + return {"error": f"Model not found: {model_name}"} + + return { + "model": str(matched_node.get("name") or ""), + "schema": str(matched_node.get("schema") or ""), + "database": str(matched_node.get("database") or ""), + "description": str(matched_node.get("description") or ""), + "columns": list(matched_node.get("columns") or [])[:50], + "upstream": list(matched_node.get("upstream") or []), + "downstream": list(matched_node.get("downstream") or []), + } diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py new file mode 100644 index 000000000..8f0b4d9ee --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py @@ -0,0 +1,220 @@ +"""Schema and metadata lookup tool handlers.""" + +from typing import Any + +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from ddpui.utils.custom_logger import CustomLogger + +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_allowlist +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import find_tables_with_column +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + get_or_load_schema_snippets, + get_turn_warehouse_tools, + record_validated_distinct_values, +) + +logger = CustomLogger("dashboard_chat") + + +def handle_get_schema_snippets_tool( + warehouse_tools_factory, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Return schema snippets for allowlisted tables only.""" + allowlist = get_runtime_allowlist(state) + requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] + allowed_tables = [ + table_name for table_name in requested_tables if allowlist.is_allowed(table_name) + ] + filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) + schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, + state, + turn_context, + tables=allowed_tables, + ) + tables_payload = [ + {"table": table_name, "columns": snippet.columns} + for table_name, snippet in schema_snippets_by_table.items() + if table_name in allowed_tables + ] + response: dict[str, Any] = {"tables": tables_payload} + if filtered_tables: + response["filtered_tables"] = filtered_tables + response[ + "filter_note" + ] = f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." + return response + + +def handle_get_distinct_values_tool( + warehouse_tools_factory, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Return distinct values and persist validated filter values for the session.""" + allowlist = get_runtime_allowlist(state) + table_name = str(args.get("table") or "").lower() + column_name = str(args.get("column") or "") + limit = max(1, min(int(args.get("limit", 50)), 200)) + if not allowlist.is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": (f"Table {table_name} is not accessible in the current dashboard context."), + } + + schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + snippet = schema_snippets_by_table.get(table_name) + normalized_column_name = column_name.lower() + if snippet is not None and normalized_column_name not in { + str(column.get("name") or "").lower() for column in snippet.columns + }: + candidates = find_tables_with_column(normalized_column_name, schema_snippets_by_table) + return { + "error": "column_not_in_table", + "table": table_name, + "column": column_name, + "candidates": candidates, + "message": ( + f"Column {column_name} is not available on {table_name}. " + "Use a table that contains it, inspect that schema, and retry the lookup." + ), + } + + values = get_turn_warehouse_tools( + warehouse_tools_factory, + turn_context, + state, + ).get_distinct_values( + table_name=table_name, + column_name=column_name, + limit=limit, + ) + record_validated_distinct_values( + turn_context=turn_context, + table_name=table_name, + column_name=column_name, + values=values, + ) + return { + "table": table_name, + "column": column_name, + "values": values, + "count": len(values), + } + + +def handle_list_tables_by_keyword_tool( + warehouse_tools_factory, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Search allowlisted tables by table name or column name.""" + allowlist = get_runtime_allowlist(state) + keyword = str(args.get("keyword") or "").strip().lower() + limit = max(1, min(int(args.get("limit", 15)), 50)) + if not keyword: + return {"tables": []} + + allowlist_tables_source = allowlist.prioritized_tables() or sorted(allowlist.allowed_tables) + allowlisted_tables = list( + dict.fromkeys(table_name.lower() for table_name in allowlist_tables_source) + ) + direct_match_tables = [ + table_name + for table_name in allowlisted_tables + if keyword in table_name or keyword in table_name.rsplit(".", 1)[-1] + ] + + schema_snippets_by_table: dict[str, Any] = {} + lookup_tables = direct_match_tables or allowlisted_tables + if lookup_tables: + try: + schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, + state, + turn_context, + tables=lookup_tables, + ) + except Exception as error: + logger.warning("Dashboard chat keyword table lookup fell back to names only: %s", error) + turn_context.warnings.append(str(error)) + + matches: list[dict[str, Any]] = [] + seen_tables: set[str] = set() + + for table_name in direct_match_tables: + column_names = [ + str(column.get("name") or "") + for column in getattr(schema_snippets_by_table.get(table_name), "columns", []) + ] + matches.append({"table": table_name, "columns": column_names[:40]}) + seen_tables.add(table_name) + if len(matches) >= limit: + break + + for table_name, snippet in schema_snippets_by_table.items(): + if table_name in seen_tables: + continue + column_names = [str(column.get("name") or "") for column in snippet.columns] + if not any(keyword in column_name.lower() for column_name in column_names): + continue + matches.append({"table": table_name, "columns": column_names[:40]}) + if len(matches) >= limit: + break + + if matches: + return { + "tables": matches, + "hint": ( + f"Found {len(matches)} allowlisted tables. Check schema before assuming table structure." + ), + } + return { + "tables": [], + "hint": ( + f"No allowlisted tables matched '{keyword}'. Try a broader keyword or retrieve chart docs first." + ), + } + + +def handle_check_table_row_count_tool( + warehouse_tools_factory, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Count rows in one allowlisted table.""" + allowlist = get_runtime_allowlist(state) + table_name = str(args.get("table") or "").lower() + if not allowlist.is_allowed(table_name): + return { + "error": "table_not_allowed", + "table": table_name, + "message": (f"Table {table_name} is not accessible in the current dashboard context."), + } + + sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" + validation = DashboardChatSqlGuard( + allowlist=allowlist, + max_rows=1, + ).validate(sql) + if not validation.is_valid or not validation.sanitized_sql: + return {"error": "sql_validation_failed", "issues": validation.errors} + + rows = get_turn_warehouse_tools( + warehouse_tools_factory, + turn_context, + state, + ).execute_sql(validation.sanitized_sql) + row_count = 0 + if rows: + row_count = int(rows[0].get("row_count") or 0) + return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_corrections.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_corrections.py new file mode 100644 index 000000000..c837247d1 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_corrections.py @@ -0,0 +1,142 @@ +"""Structured SQL correction helpers for model-authored warehouse queries.""" + +import re +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( + best_table_for_missing_columns, + find_tables_with_column, + primary_table_name, + referenced_sql_identifier_refs, + resolve_identifier_table, + resolve_table_qualifier, + table_references, + tables_with_column, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + get_or_load_schema_snippets, +) + + +def missing_columns_in_primary_table( + warehouse_tools_factory, + *, + sql: str, + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any] | None: + """Return a corrective tool error when SQL references absent columns.""" + table_refs = table_references(sql) + referenced_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + if not referenced_tables: + return None + + schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, + state, + turn_context, + tables=referenced_tables, + ) + all_schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + missing_columns_by_table: dict[str, set[str]] = {} + candidate_tables_by_column: dict[str, list[str]] = {} + tables_in_query = list(dict.fromkeys(referenced_tables)) + + for qualifier, column_name in referenced_sql_identifier_refs(sql): + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=column_name, + table_refs=table_refs, + schema_snippets_by_table=schema_snippets_by_table, + ) + if resolved_table is not None: + continue + + if qualifier is not None: + target_table = ( + resolve_table_qualifier(qualifier, table_refs) + or primary_table_name(sql) + or tables_in_query[0] + ) + else: + matching_tables = tables_with_column(column_name, tables_in_query, schema_snippets_by_table) + if len(matching_tables) > 1: + continue + target_table = primary_table_name(sql) or tables_in_query[0] + + missing_columns_by_table.setdefault(target_table, set()).add(column_name) + candidate_tables_by_column[column_name] = find_tables_with_column( + column_name, + all_schema_snippets_by_table, + ) + + missing_columns = sorted( + {column_name for columns in missing_columns_by_table.values() for column_name in columns} + ) + if not missing_columns: + return None + + primary = primary_table_name(sql) or tables_in_query[0] + target_table = ( + next(iter(missing_columns_by_table)) if len(missing_columns_by_table) == 1 else primary + ) + best_table = best_table_for_missing_columns(missing_columns, all_schema_snippets_by_table) + message = ( + f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " + "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." + ) + if best_table: + message += f" Best candidate table: {best_table}." + result = { + "error": "column_not_in_table", + "table": target_table, + "missing_columns": missing_columns, + "candidate_tables": candidate_tables_by_column, + "best_table": best_table, + "message": message, + } + if len(missing_columns) == 1: + column_name = missing_columns[0] + result["column"] = column_name + result["candidates"] = candidate_tables_by_column.get(column_name, []) + return result + + +def structured_sql_execution_error( + warehouse_tools_factory, + *, + sql: str, + error: Exception, + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any] | None: + """Convert warehouse execution errors into structured corrective feedback.""" + error_text = str(error) + missing_column_match = re.search( + r'column "(?:[\w]+\.)?([^"]+)" does not exist', + error_text, + flags=re.IGNORECASE, + ) + if missing_column_match: + missing_column = missing_column_match.group(1).lower() + schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + candidate_tables = find_tables_with_column(missing_column, schema_snippets_by_table) + return { + "error": "column_not_in_table", + "table": primary_table_name(sql), + "column": missing_column, + "missing_columns": [missing_column], + "candidates": candidate_tables, + "candidate_tables": {missing_column: candidate_tables}, + "best_table": candidate_tables[0] if candidate_tables else None, + "message": ( + f"Column {missing_column} is not available on the current table. " + "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." + ), + "sql_used": sql, + } + return None diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py new file mode 100644 index 000000000..380d06f36 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py @@ -0,0 +1,130 @@ +"""SQL execution orchestration for the LLM tool loop.""" + +import json +from typing import Any + +from django.core.serializers.json import DjangoJSONEncoder + +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard + +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_allowlist +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_corrections import ( + missing_columns_in_primary_table, + structured_sql_execution_error, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_validation import ( + find_missing_distinct_filters, + validate_follow_up_dimension_usage, + validate_sql_allowlist, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + get_turn_warehouse_tools, + record_validated_filters_from_sql, +) + + +def handle_run_sql_query_tool( + warehouse_tools_factory, + runtime_config, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Validate SQL and let the tool loop self-correct on structured failures.""" + allowlist = get_runtime_allowlist(state) + sql = str(args.get("sql") or "").strip() + if not sql: + return {"error": "sql_missing", "message": "SQL is required"} + + allowlist_validation = validate_sql_allowlist(sql, allowlist) + if not allowlist_validation["valid"]: + return { + "error": "table_not_allowed", + "invalid_tables": allowlist_validation["invalid_tables"], + "message": allowlist_validation["message"], + } + + follow_up_dimension_validation = validate_follow_up_dimension_usage( + warehouse_tools_factory, + sql=sql, + state=state, + turn_context=turn_context, + ) + if follow_up_dimension_validation is not None: + return follow_up_dimension_validation + + missing_distinct = find_missing_distinct_filters( + warehouse_tools_factory, + sql, + state, + turn_context, + ) + if missing_distinct: + return { + "error": "must_fetch_distinct_values", + "missing": missing_distinct, + "message": ( + "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." + ), + } + + validation = DashboardChatSqlGuard( + allowlist=allowlist, + max_rows=runtime_config.max_query_rows, + ).validate(sql) + turn_context.last_sql_validation = validation + if not validation.is_valid or not validation.sanitized_sql: + return { + "error": "sql_validation_failed", + "issues": validation.errors, + "warnings": validation.warnings, + } + + missing_columns = missing_columns_in_primary_table( + warehouse_tools_factory, + sql=validation.sanitized_sql, + state=state, + turn_context=turn_context, + ) + if missing_columns is not None: + return missing_columns + + turn_context.last_sql = validation.sanitized_sql + try: + rows = get_turn_warehouse_tools( + warehouse_tools_factory, + turn_context, + state, + ).execute_sql(validation.sanitized_sql) + except Exception as error: + structured_error = structured_sql_execution_error( + warehouse_tools_factory, + sql=validation.sanitized_sql, + error=error, + state=state, + turn_context=turn_context, + ) + if structured_error is not None: + return structured_error + return { + "success": False, + "error": str(error), + "sql_used": validation.sanitized_sql, + } + + serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) + turn_context.last_sql_results = serialized_rows + record_validated_filters_from_sql( + turn_context=turn_context, + sql=validation.sanitized_sql, + ) + return { + "success": True, + "row_count": len(serialized_rows), + "error": None, + "sql_used": validation.sanitized_sql, + "columns": list(serialized_rows[0].keys()) if serialized_rows else [], + "rows": serialized_rows, + } diff --git a/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py similarity index 93% rename from ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py rename to ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py index cbfedb3f1..560cf88ad 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py @@ -4,9 +4,9 @@ import re from typing import Any -from ddpui.core.dashboard_chat.context.allowlist import normalize_dashboard_chat_table_name +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import normalize_dashboard_chat_table_name from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet -from ddpui.core.dashboard_chat.orchestration.conversation import extract_dimensions_from_sql +from ddpui.core.dashboard_chat.orchestration.conversation_context import extract_dimensions_from_sql from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard @@ -72,14 +72,14 @@ def table_columns(snippet: DashboardChatSchemaSnippet | Any) -> set[str]: def tables_with_column( column_name: str, table_names: Sequence[str], - schema_cache: dict[str, Any], + schema_snippets_by_table: dict[str, Any], ) -> list[str]: """Return the query tables that contain one column.""" normalized_column_name = column_name.lower() return [ table_name for table_name in table_names - if normalized_column_name in table_columns(schema_cache.get(table_name)) + if normalized_column_name in table_columns(schema_snippets_by_table.get(table_name)) ] @@ -88,21 +88,21 @@ def resolve_identifier_table( qualifier: str | None, column_name: str, table_refs: Sequence[dict[str, str | None]], - schema_cache: dict[str, Any], + schema_snippets_by_table: dict[str, Any], ) -> str | None: """Resolve one referenced column to a concrete query table when it is unambiguous.""" if qualifier is not None: resolved_table = resolve_table_qualifier(qualifier, table_refs) if not resolved_table: return None - if column_name.lower() in table_columns(schema_cache.get(resolved_table)): + if column_name.lower() in table_columns(schema_snippets_by_table.get(resolved_table)): return resolved_table return None query_tables = [ str(reference["table_name"]) for reference in table_refs if reference.get("table_name") ] - matching_tables = tables_with_column(column_name, query_tables, schema_cache) + matching_tables = tables_with_column(column_name, query_tables, schema_snippets_by_table) if len(matching_tables) == 1: return matching_tables[0] return None @@ -233,11 +233,11 @@ def referenced_sql_identifier_refs(sql: str) -> list[tuple[str | None, str]]: def best_table_for_missing_columns( missing_columns: Sequence[str], - schema_cache: dict[str, Any], + schema_snippets_by_table: dict[str, Any], ) -> str | None: """Return the first allowlisted table that covers all missing columns.""" wanted_columns = {column_name.lower() for column_name in missing_columns} - for table_name, snippet in schema_cache.items(): + for table_name, snippet in schema_snippets_by_table.items(): available_columns = {str(column.get("name") or "").lower() for column in snippet.columns} if wanted_columns.issubset(available_columns): return table_name @@ -268,13 +268,13 @@ def extract_text_filter_values(where_clause: str) -> list[tuple[str | None, str, def find_tables_with_column( column_name: str, - schema_cache: dict[str, Any], + schema_snippets_by_table: dict[str, Any], limit: int = 10, ) -> list[str]: """Find allowlisted tables that contain one column.""" matches: list[str] = [] normalized_column_name = column_name.lower() - for table_name, snippet in schema_cache.items(): + for table_name, snippet in schema_snippets_by_table.items(): if any( normalized_column_name == str(column.get("name") or "").lower() for column in snippet.columns diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py new file mode 100644 index 000000000..0c3727119 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py @@ -0,0 +1,181 @@ +"""Validation helpers that gate LLM-authored SQL before execution.""" + +import re +from typing import Any + +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent + +from ddpui.core.dashboard_chat.orchestration.conversation_context import ( + extract_requested_follow_up_dimension, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import ( + get_conversation_context, + get_intent_decision, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( + extract_text_filter_values, + find_tables_with_column, + normalize_dimension_name, + primary_table_name, + resolve_identifier_table, + structural_dimensions_from_sql, + table_references, + tables_with_column, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + get_or_load_schema_snippets, + has_validated_distinct_value, + is_text_type, +) +from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard + + +def validate_sql_allowlist( + sql: str, + allowlist: DashboardChatAllowlist, +) -> dict[str, Any]: + """Validate that all referenced tables are in the dashboard allowlist.""" + referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) + invalid_tables = [ + table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) + ] + if invalid_tables: + return { + "valid": False, + "invalid_tables": invalid_tables, + "message": ( + "SQL references tables not available in the current dashboard: " + + ", ".join(invalid_tables) + + ". Use list_tables_by_keyword to find allowed tables." + ), + } + return {"valid": True, "invalid_tables": [], "message": ""} + + +def validate_follow_up_dimension_usage( + warehouse_tools_factory, + *, + sql: str, + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any] | None: + """Keep add-dimension follow-ups from succeeding without changing query granularity.""" + intent_decision = get_intent_decision(state) + if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: + return None + if intent_decision.follow_up_context.follow_up_type != "add_dimension": + return None + + requested_dimension = extract_requested_follow_up_dimension( + intent_decision.follow_up_context.modification_instruction or state["user_query"] + ) + if not requested_dimension: + return None + + previous_sql = get_conversation_context(state).last_sql_query or "" + current_dimensions = structural_dimensions_from_sql(sql) + previous_dimensions = structural_dimensions_from_sql(previous_sql) + normalized_requested_dimension = normalize_dimension_name(requested_dimension) + if ( + normalized_requested_dimension in current_dimensions + and normalized_requested_dimension not in previous_dimensions + ): + return None + + candidate_tables = find_tables_with_column( + requested_dimension, + get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context), + ) + return { + "error": "requested_dimension_missing", + "requested_dimension": requested_dimension, + "previous_dimensions": sorted(previous_dimensions), + "current_dimensions": sorted(current_dimensions), + "candidate_tables": candidate_tables, + "message": ( + f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " + "Use the requested dimension exactly, or pick a table that contains it." + ), + } + + +def find_missing_distinct_filters( + warehouse_tools_factory, + sql: str, + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> list[dict[str, Any]]: + """Detect text filters that require a prior distinct-values call.""" + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return [] + + table_refs = table_references(sql) + query_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + if not query_tables: + return [] + primary = primary_table_name(sql) or query_tables[0] + + full_schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, + state, + turn_context, + tables=query_tables, + ) + all_schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + + column_types = { + table_name: { + str(column.get("name") or "") + .lower(): str(column.get("data_type") or column.get("type") or "") + .lower() + for column in getattr(snippet, "columns", []) + } + for table_name, snippet in full_schema_snippets_by_table.items() + } + missing: list[dict[str, Any]] = [] + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_refs=table_refs, + schema_snippets_by_table=full_schema_snippets_by_table, + ) + if resolved_table is None and qualifier is None: + matching_tables = tables_with_column(normalized_column, query_tables, full_schema_snippets_by_table) + if len(matching_tables) > 1: + continue + if resolved_table is None: + candidate_tables = find_tables_with_column(normalized_column, all_schema_snippets_by_table) + if qualifier is None and candidate_tables: + continue + missing.append( + { + "table": primary, + "column": column_name, + "error": "column_not_in_table", + "candidates": candidate_tables, + } + ) + continue + data_type = column_types.get(resolved_table, {}).get(normalized_column, "") + if not data_type or not is_text_type(data_type): + continue + if not has_validated_distinct_value( + turn_context.validated_distinct_values, + table_name=resolved_table, + column_name=normalized_column, + value=value, + ): + missing.append({"table": resolved_table, "column": column_name, "value": value}) + return missing diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py new file mode 100644 index 000000000..7c41f0321 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py @@ -0,0 +1,116 @@ +"""Retrieval-facing tool handlers exposed to the LLM tool loop.""" + +from typing import Any + +from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType + +from ddpui.core.dashboard_chat.orchestration.retrieval_support import ( + build_tool_document_payload, + dedupe_retrieved_documents, + filter_allowlisted_dbt_results, + get_or_embed_query, + retrieve_vector_documents, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import ( + get_runtime_allowlist, + get_runtime_org, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import DashboardChatTurnContext + + +def handle_retrieve_docs_tool( + vector_store, + source_config, + runtime_config, + args: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> dict[str, Any]: + """Retrieve current-dashboard, org, and dbt context using the tool contract.""" + org = get_runtime_org(state) + allowlist = get_runtime_allowlist(state) + dashboard_export = state.get("dashboard_export_payload") or {} + query = str(args.get("query") or state["user_query"]).strip() + limit = max(1, min(int(args.get("limit", 8)), 20)) + requested_types = [ + str(doc_type) + for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) + ] + retrieved_documents = [] + cached_embedding = get_or_embed_query( + vector_store, + query, + turn_context.query_embeddings, + ) + + if "chart" in requested_types: + retrieved_documents.extend( + retrieve_vector_documents( + vector_store, + runtime_config, + org=org, + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_EXPORT] + ), + dashboard_id=state["dashboard_id"], + query_embedding=cached_embedding, + ) + ) + if "context" in requested_types: + retrieved_documents.extend( + retrieve_vector_documents( + vector_store, + runtime_config, + org=org, + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=source_config.filter_enabled( + [DashboardChatSourceType.DASHBOARD_CONTEXT] + ), + dashboard_id=state["dashboard_id"], + query_embedding=cached_embedding, + ) + ) + retrieved_documents.extend( + retrieve_vector_documents( + vector_store, + runtime_config, + org=org, + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=source_config.filter_enabled([DashboardChatSourceType.ORG_CONTEXT]), + query_embedding=cached_embedding, + ) + ) + if "dataset" in requested_types or "dbt_model" in requested_types: + dbt_results = retrieve_vector_documents( + vector_store, + runtime_config, + org=org, + collection_name=state.get("vector_collection_name"), + query_text=query, + source_types=source_config.filter_enabled( + [ + DashboardChatSourceType.DBT_MANIFEST, + DashboardChatSourceType.DBT_CATALOG, + ] + ), + query_embedding=cached_embedding, + ) + retrieved_documents.extend(filter_allowlisted_dbt_results(dbt_results, allowlist)) + + merged_results = dedupe_retrieved_documents(retrieved_documents)[:limit] + for document in merged_results: + if document.document_id in turn_context.retrieved_document_ids: + continue + turn_context.retrieved_document_ids.add(document.document_id) + turn_context.retrieved_documents.append(document) + + docs = [ + build_tool_document_payload(document, allowlist, dashboard_export) + for document in merged_results + ] + return {"docs": docs, "count": len(docs)} diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/__init__.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/__init__.py new file mode 100644 index 000000000..637b7e6a5 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/__init__.py @@ -0,0 +1 @@ +"""Tool-loop runtime helpers for dashboard chat.""" diff --git a/ddpui/core/dashboard_chat/orchestration/tools/loop.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py similarity index 68% rename from ddpui/core/dashboard_chat/orchestration/tools/loop.py rename to ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py index a66496f48..dcdbe7a65 100644 --- a/ddpui/core/dashboard_chat/orchestration/tools/loop.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py @@ -6,29 +6,39 @@ from django.core.serializers.json import DjangoJSONEncoder -from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseToolsError +from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseToolsError from ddpui.utils.custom_logger import CustomLogger -from ddpui.core.dashboard_chat.orchestration.presentation import ( +from ddpui.core.dashboard_chat.orchestration.response_composer import ( serialize_tool_result, summarize_tool_call, max_turns_message, fallback_answer_text, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState -from ddpui.core.dashboard_chat.orchestration.tools.cache import ( - seed_distinct_cache_from_previous_sql, +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.dbt_tools import ( + handle_get_dbt_model_info_tool, + handle_search_dbt_models_tool, ) -from ddpui.core.dashboard_chat.orchestration.tools.handlers import ( +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.vector_retrieval_tool import ( handle_retrieve_docs_tool, - handle_get_schema_snippets_tool, - handle_search_dbt_models_tool, - handle_get_dbt_model_info_tool, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.schema_tools import ( + handle_check_table_row_count_tool, handle_get_distinct_values_tool, + handle_get_schema_snippets_tool, handle_list_tables_by_keyword_tool, - handle_check_table_row_count_tool, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_execution_tools import ( handle_run_sql_query_tool, ) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + current_validated_distinct_payloads, + current_schema_snippet_payloads, + seed_validated_distinct_values_from_previous_sql, +) logger = CustomLogger("dashboard_chat") @@ -41,31 +51,19 @@ def execute_tool_loop( runtime_config, tool_specifications, *, - state: DashboardChatRuntimeState, + state: DashboardChatGraphState, messages: list[dict[str, Any]], max_turns: int, - initial_embedding_cache: dict[str, list[float]] | None = None, + initial_query_embeddings: dict[str, list[float]] | None = None, ) -> dict[str, Any]: """Execute the prototype's iterative tool loop.""" - execution_context: dict[str, Any] = { - "distinct_cache": set(state.get("session_distinct_cache") or set()), - "embedding_cache": dict(initial_embedding_cache or {}), - "schema_cache": dict(state.get("session_schema_cache") or {}), - "retrieved_documents": [], - "retrieved_document_ids": set(), - "tool_calls": [], - "warnings": list(state.get("warnings", [])), - "warehouse_tools": None, - "last_sql": None, - "last_sql_results": None, - "last_sql_validation": None, - "timing_breakdown": { - "tool_calls_ms": list((state.get("timing_breakdown") or {}).get("tool_calls_ms") or []), - }, - } + turn_context = DashboardChatTurnContext.from_state( + state, + initial_query_embeddings=initial_query_embeddings, + ) tool_loop_started_at = perf_counter() - seed_distinct_cache_from_previous_sql(state, execution_context) - intent_decision = state["intent_decision"] + seed_validated_distinct_values_from_previous_sql(state, turn_context) + intent_decision = get_intent_decision(state) for turn_index in range(max_turns): tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" @@ -103,11 +101,11 @@ def execute_tool_loop( answer_text=( (ai_message.get("content") or "").strip() or fallback_answer_text( - execution_context["retrieved_documents"], - execution_context["last_sql_results"], + turn_context.retrieved_documents, + turn_context.last_sql_results, ) ), - execution_context=execution_context, + turn_context=turn_context, max_turns_reached=False, tool_loop_started_at=tool_loop_started_at, ) @@ -129,14 +127,14 @@ def execute_tool_loop( tool_name=str(tool_call.get("name") or ""), args=args, state=state, - execution_context=execution_context, + turn_context=turn_context, ) tool_duration_ms = round((perf_counter() - tool_started_at) * 1000, 2) tool_name = str(tool_call.get("name") or "") - execution_context["timing_breakdown"]["tool_calls_ms"].append( + turn_context.timing_breakdown["tool_calls_ms"].append( {"name": tool_name, "duration_ms": tool_duration_ms} ) - execution_context["tool_calls"].append( + turn_context.tool_calls.append( summarize_tool_call( tool_name=tool_name, args=args, @@ -157,7 +155,7 @@ def execute_tool_loop( if tool_name == "run_sql_query" and result.get("success"): return build_tool_loop_result( answer_text="", - execution_context=execution_context, + turn_context=turn_context, max_turns_reached=False, tool_loop_started_at=tool_loop_started_at, ) @@ -165,9 +163,9 @@ def execute_tool_loop( return build_tool_loop_result( answer_text=max_turns_message( state["user_query"], - execution_context["retrieved_documents"], + turn_context.retrieved_documents, ), - execution_context=execution_context, + turn_context=turn_context, max_turns_reached=True, tool_loop_started_at=tool_loop_started_at, ) @@ -181,70 +179,72 @@ def execute_tool_call( *, tool_name: str, args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Execute one prototype tool against the Dalgo runtime primitives.""" try: if tool_name == "retrieve_docs": return handle_retrieve_docs_tool( - vector_store, source_config, runtime_config, args, state, execution_context + vector_store, source_config, runtime_config, args, state, turn_context ) if tool_name == "get_schema_snippets": return handle_get_schema_snippets_tool( - warehouse_tools_factory, args, state, execution_context + warehouse_tools_factory, args, state, turn_context ) if tool_name == "search_dbt_models": - return handle_search_dbt_models_tool(args, state, execution_context) + return handle_search_dbt_models_tool(args, state, turn_context) if tool_name == "get_dbt_model_info": - return handle_get_dbt_model_info_tool(args, state, execution_context) + return handle_get_dbt_model_info_tool(args, state, turn_context) if tool_name == "get_distinct_values": return handle_get_distinct_values_tool( - warehouse_tools_factory, args, state, execution_context + warehouse_tools_factory, args, state, turn_context ) if tool_name == "run_sql_query": return handle_run_sql_query_tool( - warehouse_tools_factory, runtime_config, args, state, execution_context + warehouse_tools_factory, runtime_config, args, state, turn_context ) if tool_name == "list_tables_by_keyword": return handle_list_tables_by_keyword_tool( - warehouse_tools_factory, args, state, execution_context + warehouse_tools_factory, args, state, turn_context ) if tool_name == "check_table_row_count": return handle_check_table_row_count_tool( - warehouse_tools_factory, args, state, execution_context + warehouse_tools_factory, args, state, turn_context ) return {"error": f"Unknown tool: {tool_name}"} except DashboardChatWarehouseToolsError as error: logger.warning("Dashboard chat tool %s failed: %s", tool_name, error) - execution_context["warnings"].append(str(error)) + turn_context.warnings.append(str(error)) return {"error": str(error)} except Exception as error: logger.exception("Dashboard chat tool %s failed", tool_name) - execution_context["warnings"].append(str(error)) + turn_context.warnings.append(str(error)) return {"error": str(error)} def build_tool_loop_result( *, answer_text: str, - execution_context: dict[str, Any], + turn_context: DashboardChatTurnContext, max_turns_reached: bool, tool_loop_started_at: float, ) -> dict[str, Any]: """Normalize tool-loop state into one runtime response payload.""" if max_turns_reached: - execution_context["tool_calls"].append({"name": "max_turns_reached"}) - warnings = list(dict.fromkeys(execution_context["warnings"])) - timing_breakdown = dict(execution_context.get("timing_breakdown") or {}) + turn_context.tool_calls.append({"name": "max_turns_reached"}) + warnings = list(dict.fromkeys(turn_context.warnings)) + timing_breakdown = dict(turn_context.timing_breakdown) timing_breakdown["tool_loop_ms"] = round((perf_counter() - tool_loop_started_at) * 1000, 2) return { "answer_text": answer_text.strip(), - "retrieved_documents": execution_context["retrieved_documents"], - "tool_calls": execution_context["tool_calls"], + "retrieved_documents": turn_context.retrieved_documents, + "tool_calls": turn_context.tool_calls, "timing_breakdown": timing_breakdown, - "sql": execution_context["last_sql"], - "sql_validation": execution_context["last_sql_validation"], - "sql_results": execution_context["last_sql_results"], + "schema_snippet_payloads": current_schema_snippet_payloads(turn_context), + "validated_distinct_payloads": current_validated_distinct_payloads(turn_context), + "sql": turn_context.last_sql, + "sql_validation": turn_context.last_sql_validation, + "sql_results": turn_context.last_sql_results, "warnings": warnings, } diff --git a/ddpui/core/dashboard_chat/orchestration/tools/specifications.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_specifications.py similarity index 100% rename from ddpui/core/dashboard_chat/orchestration/tools/specifications.py rename to ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_specifications.py diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py new file mode 100644 index 000000000..ac326776d --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py @@ -0,0 +1,264 @@ +"""Turn-scoped state helpers for dashboard chat tool execution.""" + +from collections.abc import Sequence +from dataclasses import dataclass, field +import re +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatRetrievedDocument +from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult +from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools +from ddpui.utils.custom_logger import CustomLogger + +from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + deserialize_distinct_payloads, + deserialize_schema_snippets, + serialize_distinct_payloads, + serialize_schema_snippets, +) +from ddpui.core.dashboard_chat.orchestration.state.accessors import ( + get_conversation_context, + get_runtime_allowlist, + get_runtime_org, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( + extract_text_filter_values, + resolve_identifier_table, + table_references, + tables_with_column, +) + +logger = CustomLogger("dashboard_chat") + + +@dataclass +class DashboardChatTurnContext: + """Ephemeral per-turn execution context kept outside checkpointed graph state.""" + + validated_distinct_values: set[tuple[str, str, str]] + query_embeddings: dict[str, list[float]] + schema_snippets_by_table: dict[str, Any] + retrieved_documents: list[DashboardChatRetrievedDocument] = field(default_factory=list) + retrieved_document_ids: set[str] = field(default_factory=set) + tool_calls: list[dict[str, Any]] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + warehouse_tools: DashboardChatWarehouseTools | None = None + last_sql: str | None = None + last_sql_results: list[dict[str, Any]] | None = None + last_sql_validation: DashboardChatSqlValidationResult | None = None + timing_breakdown: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_state( + cls, + state: DashboardChatGraphState, + *, + initial_query_embeddings: dict[str, list[float]] | None = None, + ) -> "DashboardChatTurnContext": + """Build a fresh turn context from checkpointed state plus per-run inputs.""" + return cls( + validated_distinct_values=hydrate_validated_distinct_values(state), + query_embeddings=dict(initial_query_embeddings or {}), + schema_snippets_by_table=hydrate_schema_snippets_by_table(state), + warnings=list(state.get("warnings", [])), + timing_breakdown={ + "tool_calls_ms": list( + (state.get("timing_breakdown") or {}).get("tool_calls_ms") or [] + ), + }, + ) + + +def get_turn_warehouse_tools( + warehouse_tools_factory, + turn_context: DashboardChatTurnContext, + state: DashboardChatGraphState, +) -> DashboardChatWarehouseTools: + """Build the warehouse tool helper lazily for the turn.""" + warehouse_tools = turn_context.warehouse_tools + if warehouse_tools is None: + warehouse_tools = warehouse_tools_factory(get_runtime_org(state)) + turn_context.warehouse_tools = warehouse_tools + return warehouse_tools + + +def hydrate_schema_snippets_by_table(state: DashboardChatGraphState) -> dict[str, Any]: + """Rebuild schema snippets from checkpoint payloads for one turn.""" + return deserialize_schema_snippets(state.get("schema_snippet_payloads")) + + +def hydrate_validated_distinct_values(state: DashboardChatGraphState) -> set[tuple[str, str, str]]: + """Rebuild validated distinct values from checkpoint payloads for one turn.""" + return deserialize_distinct_payloads(state.get("validated_distinct_payloads")) + + +def current_schema_snippet_payloads(turn_context: DashboardChatTurnContext) -> dict[str, Any]: + """Serialize the current turn's schema snippets back into checkpoint-safe payloads.""" + return serialize_schema_snippets(turn_context.schema_snippets_by_table) + + +def current_validated_distinct_payloads(turn_context: DashboardChatTurnContext) -> dict[str, Any]: + """Serialize the current turn's validated distinct values back into checkpoint-safe payloads.""" + return serialize_distinct_payloads(turn_context.validated_distinct_values) + + +def get_or_load_schema_snippets( + warehouse_tools_factory, + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, + tables: Sequence[str] | None = None, +) -> dict[str, Any]: + """Load and keep schema snippets in the current turn state.""" + allowlist = get_runtime_allowlist(state) + requested_tables = [ + table_name.lower() + for table_name in (tables if tables is not None else allowlist.prioritized_tables()) + if allowlist.is_allowed(table_name) + ] + cache = turn_context.schema_snippets_by_table + missing_tables = [table_name for table_name in requested_tables if table_name not in cache] + if missing_tables: + snippets = get_turn_warehouse_tools( + warehouse_tools_factory, + turn_context, + state, + ).get_schema_snippets(missing_tables) + for table_name, snippet in snippets.items(): + cache[table_name.lower()] = snippet + if tables is None: + return cache + return {table_name: cache[table_name] for table_name in requested_tables if table_name in cache} + + +def normalize_distinct_value(value: Any) -> str: + """Normalize one distinct value for exact cache lookups.""" + return str(value).strip().lower() + + +def has_validated_distinct_value( + validated_distinct_values: set[tuple[Any, ...]], + *, + table_name: str, + column_name: str, + value: Any, +) -> bool: + """Return whether this exact text filter value was already validated in-session.""" + normalized_value = normalize_distinct_value(value) + normalized_column = column_name.lower() + normalized_table = table_name.lower() + return ( + (normalized_table, normalized_column, normalized_value) in validated_distinct_values + or ("*", normalized_column, normalized_value) in validated_distinct_values + or (normalized_table, normalized_column) in validated_distinct_values + or ("*", normalized_column) in validated_distinct_values + ) + + +def is_text_type(data_type: str) -> bool: + """Treat common string-like warehouse types as requiring distinct-value lookup.""" + return any(token in data_type for token in ["char", "text", "string", "varchar"]) + + +def record_validated_distinct_values( + *, + turn_context: DashboardChatTurnContext, + table_name: str, + column_name: str, + values: Sequence[Any], +) -> None: + """Persist exact validated filter values into the current turn state.""" + normalized_table = table_name.lower() + normalized_column = column_name.lower() + validated_distinct_values = turn_context.validated_distinct_values + for value in values: + normalized_value = normalize_distinct_value(value) + validated_distinct_values.add((normalized_table, normalized_column, normalized_value)) + validated_distinct_values.add(("*", normalized_column, normalized_value)) + + +def record_validated_filters_from_sql( + *, + turn_context: DashboardChatTurnContext, + sql: str, +) -> None: + """Seed exact validated filter values from a successful SQL statement.""" + table_refs = table_references(sql) + if not table_refs: + return + where_match = re.search( + r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", + sql, + flags=re.IGNORECASE | re.DOTALL, + ) + if not where_match: + return + + query_tables = [ + reference["table_name"] for reference in table_refs if reference.get("table_name") + ] + schema_snippets_by_table = dict(turn_context.schema_snippets_by_table) + values_by_target: dict[tuple[str, str], list[str]] = {} + for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): + normalized_column = column_name.lower() + resolved_table = resolve_identifier_table( + qualifier=qualifier, + column_name=normalized_column, + table_refs=table_refs, + schema_snippets_by_table=schema_snippets_by_table, + ) + if resolved_table is None and qualifier is None: + if schema_snippets_by_table: + matching = tables_with_column(normalized_column, query_tables, schema_snippets_by_table) + if len(matching) == 1: + resolved_table = matching[0] + elif len(query_tables) == 1: + resolved_table = query_tables[0] + values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) + + for (tbl, col), vals in values_by_target.items(): + record_validated_distinct_values( + turn_context=turn_context, + table_name=tbl, + column_name=col, + values=vals, + ) + + +def seed_validated_distinct_values_from_previous_sql( + state: DashboardChatGraphState, + turn_context: DashboardChatTurnContext, +) -> None: + """Treat text filters from the previous successful SQL as already validated for follow-ups.""" + previous_sql = get_conversation_context(state).last_sql_query + if not previous_sql: + return + record_validated_filters_from_sql( + turn_context=turn_context, + sql=previous_sql, + ) + + +def dbt_resources_by_unique_id(state: DashboardChatGraphState) -> dict[str, dict[str, Any]]: + """Return the allowlisted dbt index built at session start.""" + dbt_index = state.get("dbt_index") or {} + return dict(dbt_index.get("resources_by_unique_id") or {}) + + +__all__ = [ + "DashboardChatTurnContext", + "current_validated_distinct_payloads", + "current_schema_snippet_payloads", + "dbt_resources_by_unique_id", + "get_or_embed_query", + "get_or_load_schema_snippets", + "get_turn_warehouse_tools", + "has_validated_distinct_value", + "hydrate_validated_distinct_values", + "hydrate_schema_snippets_by_table", + "is_text_type", + "record_validated_distinct_values", + "record_validated_filters_from_sql", + "seed_validated_distinct_values_from_previous_sql", +] diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py new file mode 100644 index 000000000..116705daf --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py @@ -0,0 +1,93 @@ +"""Compose-response node for dashboard chat graph.""" + +from typing import Any + +from ddpui.core.dashboard_chat.contracts import DashboardChatResponse + +from ddpui.core.dashboard_chat.orchestration.response_composer import ( + build_usage_summary, + compose_final_answer_text, + determine_response_format, + sql_result_columns, +) +from ddpui.core.dashboard_chat.orchestration.retrieval_support import build_citations +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import ( + get_intent_decision, + get_retrieved_documents, + get_runtime_allowlist, + get_runtime_response, +) +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_citations, + serialize_response, +) + + +def compose_response_node( + state: DashboardChatGraphState, + llm_client, + vector_store, +) -> dict[str, Any]: + """Compose the final dashboard-chat response from state accumulated by prior nodes.""" + if state.get("response") is not None: + response = get_runtime_response(state) + return { + "response": serialize_response( + DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=response.citations, + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response.metadata, + ) + ) + } + + allowlist = get_runtime_allowlist(state) + retrieved_documents = get_retrieved_documents(state) + citations = build_citations( + retrieved_documents=retrieved_documents, + dashboard_export=state.get("dashboard_export_payload") or {}, + allowlist=allowlist, + ) + response_format = determine_response_format( + user_query=state["user_query"], + sql_results=state.get("sql_results"), + ) + execution_result = { + "answer_text": state.get("draft_answer_text"), + "retrieved_documents": retrieved_documents, + "sql": state.get("sql"), + "sql_results": state.get("sql_results"), + "warnings": list(state.get("warnings") or []), + "tool_calls": list(state.get("tool_calls") or []), + } + return { + "citations": serialize_citations(citations), + "response": serialize_response( + DashboardChatResponse( + answer_text=compose_final_answer_text( + llm_client, + state, + execution_result, + response_format=response_format, + ), + intent=get_intent_decision(state).intent, + citations=citations, + warnings=list(state.get("warnings") or []), + sql=state.get("sql"), + sql_results=state.get("sql_results"), + usage=build_usage_summary(llm_client, vector_store), + tool_calls=list(state.get("tool_calls") or []), + metadata={ + "response_format": response_format, + "table_columns": sql_result_columns(state.get("sql_results")), + }, + ) + ), + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index e923e89f3..52fea65d5 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -2,17 +2,24 @@ from typing import Any -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.contracts import DashboardChatCitation, DashboardChatResponse -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import ( + get_intent_decision, + get_retrieved_documents, + get_runtime_allowlist, + get_runtime_response, + get_sql_validation_result, +) -def finalize_node(state: DashboardChatRuntimeState) -> dict[str, Any]: +def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: """Attach warehouse citations and metadata to the finished response.""" - response = state["response"] + response = get_runtime_response(state) citations = list(response.citations) - sql_validation = state.get("sql_validation") + sql_validation = get_sql_validation_result(state) if ( sql_validation is not None and sql_validation.is_valid @@ -30,31 +37,31 @@ def finalize_node(state: DashboardChatRuntimeState) -> dict[str, Any]: if table_name ) - allowlist = state.get("allowlist") or DashboardChatAllowlist() + allowlist = get_runtime_allowlist(state) response_metadata = dict(response.metadata) response_metadata.update( { "dashboard_id": state["dashboard_id"], - "retrieved_document_ids": [ - document.document_id for document in state.get("retrieved_documents") or [] - ], + "retrieved_document_ids": [document.document_id for document in get_retrieved_documents(state)], "allowlisted_tables": sorted(allowlist.allowed_tables), "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], - "intent_reason": state["intent_decision"].reason, - "missing_info": state["intent_decision"].missing_info, - "follow_up_type": state["intent_decision"].follow_up_context.follow_up_type, + "intent_reason": get_intent_decision(state).reason, + "missing_info": get_intent_decision(state).missing_info, + "follow_up_type": get_intent_decision(state).follow_up_context.follow_up_type, } ) return { - "response": DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=list(dict.fromkeys(citations)), - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata=response_metadata, + "response": serialize_response( + DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ) ) } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py deleted file mode 100644 index acc60dc2b..000000000 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_data_query.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Handle-data-query node for dashboard chat graph (covers query_with_sql and query_without_sql).""" - -from typing import Any - -from ddpui.core.dashboard_chat.contracts import DashboardChatResponse - -from ddpui.core.dashboard_chat.orchestration.message_stack import build_new_query_messages -from ddpui.core.dashboard_chat.orchestration.presentation import ( - build_usage_summary, - compose_final_answer_text, - determine_response_format, - sql_result_columns, -) -from ddpui.core.dashboard_chat.orchestration.retrieval import ( - build_citations, - get_cached_query_embedding, -) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState -from ddpui.core.dashboard_chat.orchestration.tools.loop import execute_tool_loop -from ddpui.core.dashboard_chat.orchestration.nodes.helpers import merge_tool_loop_timing - - -def handle_data_query_node( - state: DashboardChatRuntimeState, - llm_client, - vector_store, - warehouse_tools_factory, - runtime_config, - source_config, - tool_specifications, -) -> dict[str, Any]: - """Execute the new-query tool loop for SQL and context-only questions.""" - allowlist = state["allowlist"] - query_embedding = get_cached_query_embedding( - vector_store, state["user_query"], embedding_cache={} - ) - messages = build_new_query_messages(llm_client, state) - - execution_result = execute_tool_loop( - llm_client, - warehouse_tools_factory, - vector_store, - source_config, - runtime_config, - tool_specifications, - state=state, - messages=messages, - max_turns=15, - initial_embedding_cache={state["user_query"]: query_embedding}, - ) - - citations = build_citations( - retrieved_documents=execution_result["retrieved_documents"], - dashboard_export=state["dashboard_export"], - allowlist=allowlist, - ) - response_format = determine_response_format( - user_query=state["user_query"], - sql_results=execution_result["sql_results"], - ) - - return { - "retrieved_documents": execution_result["retrieved_documents"], - "citations": citations, - "tool_calls": execution_result["tool_calls"], - "sql": execution_result["sql"], - "sql_validation": execution_result["sql_validation"], - "sql_results": execution_result["sql_results"], - "warnings": execution_result["warnings"], - "timing_breakdown": merge_tool_loop_timing(state, execution_result), - "response": DashboardChatResponse( - answer_text=compose_final_answer_text( - llm_client, state, execution_result, response_format=response_format - ), - intent=state["intent_decision"].intent, - citations=citations, - warnings=execution_result["warnings"], - sql=execution_result["sql"], - sql_results=execution_result["sql_results"], - usage=build_usage_summary(llm_client, vector_store), - tool_calls=execution_result["tool_calls"], - metadata={ - "response_format": response_format, - "table_columns": sql_result_columns(execution_result["sql_results"]), - }, - ), - } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py deleted file mode 100644 index 8cb90093a..000000000 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Handle-follow-up node for dashboard chat graph (covers follow_up_sql and follow_up_context).""" - -from typing import Any - -from ddpui.core.dashboard_chat.contracts import DashboardChatResponse - -from ddpui.core.dashboard_chat.orchestration.message_stack import build_follow_up_messages -from ddpui.core.dashboard_chat.orchestration.presentation import ( - build_usage_summary, - compose_final_answer_text, - determine_response_format, - sql_result_columns, -) -from ddpui.core.dashboard_chat.orchestration.retrieval import ( - build_citations, - get_cached_query_embedding, -) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState -from ddpui.core.dashboard_chat.orchestration.tools.loop import execute_tool_loop -from ddpui.core.dashboard_chat.orchestration.nodes.helpers import merge_tool_loop_timing - - -def handle_follow_up_node( - state: DashboardChatRuntimeState, - llm_client, - vector_store, - warehouse_tools_factory, - runtime_config, - source_config, - tool_specifications, -) -> dict[str, Any]: - """Execute the follow-up tool loop for SQL-modifying and explanatory follow-up questions.""" - allowlist = state["allowlist"] - query_embedding = get_cached_query_embedding( - vector_store, state["user_query"], embedding_cache={} - ) - messages = build_follow_up_messages(llm_client, state) - - execution_result = execute_tool_loop( - llm_client, - warehouse_tools_factory, - vector_store, - source_config, - runtime_config, - tool_specifications, - state=state, - messages=messages, - max_turns=6, - initial_embedding_cache={state["user_query"]: query_embedding}, - ) - - citations = build_citations( - retrieved_documents=execution_result["retrieved_documents"], - dashboard_export=state["dashboard_export"], - allowlist=allowlist, - ) - response_format = determine_response_format( - user_query=state["user_query"], - sql_results=execution_result["sql_results"], - ) - - return { - "retrieved_documents": execution_result["retrieved_documents"], - "citations": citations, - "tool_calls": execution_result["tool_calls"], - "sql": execution_result["sql"], - "sql_validation": execution_result["sql_validation"], - "sql_results": execution_result["sql_results"], - "warnings": execution_result["warnings"], - "timing_breakdown": merge_tool_loop_timing(state, execution_result), - "response": DashboardChatResponse( - answer_text=compose_final_answer_text( - llm_client, state, execution_result, response_format=response_format - ), - intent=state["intent_decision"].intent, - citations=citations, - warnings=execution_result["warnings"], - sql=execution_result["sql"], - sql_results=execution_result["sql_results"], - usage=build_usage_summary(llm_client, vector_store), - tool_calls=execution_result["tool_calls"], - metadata={ - "response_format": response_format, - "table_columns": sql_result_columns(execution_result["sql_results"]), - }, - ), - } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py new file mode 100644 index 000000000..ac29e3687 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py @@ -0,0 +1,57 @@ +"""Node for context-only dashboard-chat follow-up questions.""" + +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_follow_up_messages +from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_retrieved_documents, + serialize_sql_validation_result, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing + + +def handle_follow_up_context_node( + state: DashboardChatGraphState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Handle follow-ups that continue explanation without requiring new SQL.""" + query_embedding = get_or_embed_query( + vector_store, state["user_query"], query_embeddings={} + ) + messages = build_follow_up_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=6, + initial_query_embeddings={state["user_query"]: query_embedding}, + ) + + return { + "retrieved_documents": serialize_retrieved_documents( + execution_result["retrieved_documents"] + ), + "tool_calls": execution_result["tool_calls"], + "draft_answer_text": execution_result["answer_text"], + "sql": execution_result["sql"], + "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "schema_snippet_payloads": execution_result["schema_snippet_payloads"], + "validated_distinct_payloads": execution_result["validated_distinct_payloads"], + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py new file mode 100644 index 000000000..52919f0fb --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py @@ -0,0 +1,57 @@ +"""Node for SQL-modifying dashboard-chat follow-up questions.""" + +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_follow_up_messages +from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_retrieved_documents, + serialize_sql_validation_result, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing + + +def handle_follow_up_sql_node( + state: DashboardChatGraphState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Handle follow-ups that are expected to modify or rerun SQL.""" + query_embedding = get_or_embed_query( + vector_store, state["user_query"], query_embeddings={} + ) + messages = build_follow_up_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=6, + initial_query_embeddings={state["user_query"]: query_embedding}, + ) + + return { + "retrieved_documents": serialize_retrieved_documents( + execution_result["retrieved_documents"] + ), + "tool_calls": execution_result["tool_calls"], + "draft_answer_text": execution_result["answer_text"], + "sql": execution_result["sql"], + "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "schema_snippet_payloads": execution_result["schema_snippet_payloads"], + "validated_distinct_payloads": execution_result["validated_distinct_payloads"], + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py index f9f18d0aa..0c79281f2 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py @@ -4,20 +4,23 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ddpui.core.dashboard_chat.orchestration.presentation import build_usage_summary -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.response_composer import build_usage_summary +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState def handle_irrelevant_node( - state: DashboardChatRuntimeState, llm_client, vector_store + state: DashboardChatGraphState, llm_client, vector_store ) -> dict[str, Any]: """Handle questions outside dashboard chat scope.""" return { - "response": DashboardChatResponse( - answer_text=( - "I can only answer questions about this dashboard, its charts, and the data behind them." - ), - intent=DashboardChatIntent.IRRELEVANT, - usage=build_usage_summary(llm_client, vector_store), + "response": serialize_response( + DashboardChatResponse( + answer_text=( + "I can only answer questions about this dashboard, its charts, and the data behind them." + ), + intent=DashboardChatIntent.IRRELEVANT, + usage=build_usage_summary(llm_client, vector_store), + ) ) } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py index 7d65e45c9..67636ec06 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py @@ -4,25 +4,29 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ddpui.core.dashboard_chat.orchestration.presentation import ( +from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, clarification_fallback, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision def handle_needs_clarification_node( - state: DashboardChatRuntimeState, llm_client, vector_store + state: DashboardChatGraphState, llm_client, vector_store ) -> dict[str, Any]: """Ask for clarification when the router says the query is underspecified.""" - intent_decision = state["intent_decision"] + intent_decision = get_intent_decision(state) return { - "response": DashboardChatResponse( - answer_text=( - intent_decision.clarification_question - or clarification_fallback(intent_decision.missing_info) - ), - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - usage=build_usage_summary(llm_client, vector_store), + "response": serialize_response( + DashboardChatResponse( + answer_text=( + intent_decision.clarification_question + or clarification_fallback(intent_decision.missing_info) + ), + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + usage=build_usage_summary(llm_client, vector_store), + ) ) } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py new file mode 100644 index 000000000..91950cf65 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py @@ -0,0 +1,57 @@ +"""Node for SQL-backed new dashboard-chat questions.""" + +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_new_query_messages +from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_retrieved_documents, + serialize_sql_validation_result, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing + + +def handle_query_with_sql_node( + state: DashboardChatGraphState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Handle new questions that are expected to produce SQL-backed answers.""" + query_embedding = get_or_embed_query( + vector_store, state["user_query"], query_embeddings={} + ) + messages = build_new_query_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=15, + initial_query_embeddings={state["user_query"]: query_embedding}, + ) + + return { + "retrieved_documents": serialize_retrieved_documents( + execution_result["retrieved_documents"] + ), + "tool_calls": execution_result["tool_calls"], + "draft_answer_text": execution_result["answer_text"], + "sql": execution_result["sql"], + "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "schema_snippet_payloads": execution_result["schema_snippet_payloads"], + "validated_distinct_payloads": execution_result["validated_distinct_payloads"], + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py new file mode 100644 index 000000000..1c4bc7056 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py @@ -0,0 +1,57 @@ +"""Node for context-only new dashboard-chat questions.""" + +from typing import Any + +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_new_query_messages +from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_retrieved_documents, + serialize_sql_validation_result, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop +from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing + + +def handle_query_without_sql_node( + state: DashboardChatGraphState, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, +) -> dict[str, Any]: + """Handle new questions that continue with retrieved context but no SQL requirement.""" + query_embedding = get_or_embed_query( + vector_store, state["user_query"], query_embeddings={} + ) + messages = build_new_query_messages(llm_client, state) + + execution_result = execute_tool_loop( + llm_client, + warehouse_tools_factory, + vector_store, + source_config, + runtime_config, + tool_specifications, + state=state, + messages=messages, + max_turns=15, + initial_query_embeddings={state["user_query"]: query_embedding}, + ) + + return { + "retrieved_documents": serialize_retrieved_documents( + execution_result["retrieved_documents"] + ), + "tool_calls": execution_result["tool_calls"], + "draft_answer_text": execution_result["answer_text"], + "sql": execution_result["sql"], + "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_results": execution_result["sql_results"], + "warnings": execution_result["warnings"], + "timing_breakdown": merge_tool_loop_timing(state, execution_result), + "schema_snippet_payloads": execution_result["schema_snippet_payloads"], + "validated_distinct_payloads": execution_result["validated_distinct_payloads"], + } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py index 046653ca2..2ce1ea3a8 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py @@ -4,24 +4,27 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse -from ddpui.core.dashboard_chat.orchestration.presentation import ( +from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, compose_small_talk_response, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState def handle_small_talk_node( - state: DashboardChatRuntimeState, llm_client, vector_store + state: DashboardChatGraphState, llm_client, vector_store ) -> dict[str, Any]: """Handle simple social turns without any tool use.""" return { - "response": DashboardChatResponse( - answer_text=( - state.get("small_talk_response") - or compose_small_talk_response(llm_client, state["user_query"]) - ), - intent=DashboardChatIntent.SMALL_TALK, - usage=build_usage_summary(llm_client, vector_store), + "response": serialize_response( + DashboardChatResponse( + answer_text=( + state.get("small_talk_response") + or compose_small_talk_response(llm_client, state["user_query"]) + ), + intent=DashboardChatIntent.SMALL_TALK, + usage=build_usage_summary(llm_client, vector_store), + ) ) } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py index 45c3ab632..2d372e973 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py @@ -2,17 +2,40 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.session_snapshot import load_session_snapshot -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlistBuilder +from ddpui.services.dashboard_service import DashboardService +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_allowlist +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_org -def load_context_node(state: DashboardChatRuntimeState) -> dict[str, Any]: - """Load or reuse the session-stable dashboard context snapshot.""" - snapshot = load_session_snapshot(state) + +def load_context_node(state: DashboardChatGraphState) -> dict[str, Any]: + """Bootstrap or reuse the session-stable checkpointed dashboard context.""" + if ( + state.get("dashboard_export_payload") is not None + and state.get("allowlist_payload") is not None + and state.get("dbt_index") is not None + ): + return { + "schema_snippet_payloads": dict(state.get("schema_snippet_payloads") or {}), + "validated_distinct_payloads": dict(state.get("validated_distinct_payloads") or {}), + } + + org = get_runtime_org(state) + dashboard_export = DashboardService.export_dashboard_context( + state["dashboard_id"], + org, + ) + manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(org.dbt) + allowlist = DashboardChatAllowlistBuilder.build( + dashboard_export, + manifest_json=manifest_json, + ) return { - "dashboard_export": snapshot["dashboard_export"], - "dbt_index": snapshot["dbt_index"], - "allowlist": snapshot["allowlist"], - "session_schema_cache": snapshot["schema_cache"], - "session_distinct_cache": snapshot["distinct_cache"], + "dashboard_export_payload": dashboard_export, + "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index(manifest_json, allowlist), + "allowlist_payload": serialize_allowlist(allowlist), + "schema_snippet_payloads": dict(state.get("schema_snippet_payloads") or {}), + "validated_distinct_payloads": dict(state.get("validated_distinct_payloads") or {}), } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py index b7f23560d..c191923e6 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py @@ -2,22 +2,26 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.conversation import extract_conversation_context -from ddpui.core.dashboard_chat.orchestration.presentation import ( +from ddpui.core.dashboard_chat.orchestration.conversation_context import extract_conversation_context +from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_fast_path_intent, build_fast_path_small_talk_response, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_conversation_context, + serialize_intent_decision, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -def route_intent_node(state: DashboardChatRuntimeState, llm_client) -> dict[str, Any]: +def route_intent_node(state: DashboardChatGraphState, llm_client) -> dict[str, Any]: """Use the prototype router prompt for all non-trivial routing.""" conversation_context = extract_conversation_context(state["conversation_history"]) fast_path_intent = build_fast_path_intent(state["user_query"]) if fast_path_intent is not None: return { - "conversation_context": conversation_context, - "intent_decision": fast_path_intent, + "conversation_context": serialize_conversation_context(conversation_context), + "intent_decision": serialize_intent_decision(fast_path_intent), "small_talk_response": build_fast_path_small_talk_response(state["user_query"]), } intent_decision = llm_client.classify_intent( @@ -25,6 +29,6 @@ def route_intent_node(state: DashboardChatRuntimeState, llm_client) -> dict[str, conversation_context=conversation_context, ) return { - "conversation_context": conversation_context, - "intent_decision": intent_decision, + "conversation_context": serialize_conversation_context(conversation_context), + "intent_decision": serialize_intent_decision(intent_decision), } diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index 796b0a9b1..aea961a06 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -8,30 +8,42 @@ from langgraph.graph import END, START, StateGraph from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig -from ddpui.core.dashboard_chat.agents.interface import DashboardChatLlmClient -from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient +from ddpui.core.dashboard_chat.agents.llm_client_interface import DashboardChatLlmClient +from ddpui.core.dashboard_chat.agents.openai_llm_client import OpenAIDashboardChatLlmClient from ddpui.core.dashboard_chat.contracts import DashboardChatResponse -from ddpui.core.dashboard_chat.vector.store import OrgVectorStore -from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools +from ddpui.core.dashboard_chat.vector.org_vector_store import OrgVectorStore +from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools from ddpui.models.org import Org -from ddpui.core.dashboard_chat.orchestration.conversation import normalize_conversation_history +from ddpui.core.dashboard_chat.orchestration.conversation_context import normalize_conversation_history +from ddpui.core.dashboard_chat.orchestration.checkpoints import get_dashboard_chat_checkpointer +from ddpui.core.dashboard_chat.orchestration.nodes.compose_response import compose_response_node from ddpui.core.dashboard_chat.orchestration.nodes.finalize import finalize_node -from ddpui.core.dashboard_chat.orchestration.nodes.handle_data_query import handle_data_query_node -from ddpui.core.dashboard_chat.orchestration.nodes.handle_follow_up import handle_follow_up_node +from ddpui.core.dashboard_chat.orchestration.nodes.handle_query_with_sql import ( + handle_query_with_sql_node, +) +from ddpui.core.dashboard_chat.orchestration.nodes.handle_query_without_sql import ( + handle_query_without_sql_node, +) +from ddpui.core.dashboard_chat.orchestration.nodes.handle_follow_up_context import ( + handle_follow_up_context_node, +) +from ddpui.core.dashboard_chat.orchestration.nodes.handle_follow_up_sql import ( + handle_follow_up_sql_node, +) from ddpui.core.dashboard_chat.orchestration.nodes.handle_irrelevant import handle_irrelevant_node from ddpui.core.dashboard_chat.orchestration.nodes.handle_needs_clarification import ( handle_needs_clarification_node, ) from ddpui.core.dashboard_chat.orchestration.nodes.handle_small_talk import handle_small_talk_node -from ddpui.core.dashboard_chat.orchestration.nodes.helpers import route_after_intent +from ddpui.core.dashboard_chat.orchestration.intent_routing import route_after_intent from ddpui.core.dashboard_chat.orchestration.nodes.load_context import load_context_node from ddpui.core.dashboard_chat.orchestration.nodes.route_intent import route_intent_node from ddpui.core.dashboard_chat.orchestration.state import ( - DashboardChatRuntimeState, - SMALL_TALK_FAST_PATH_PATTERN, + DashboardChatGraphState, ) -from ddpui.core.dashboard_chat.orchestration.tools.specifications import ( +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_response +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_specifications import ( DASHBOARD_CHAT_TOOL_SPECIFICATIONS, ) @@ -39,7 +51,7 @@ def _timed_node(node_name: str, handler): """Wrap one graph node so per-node duration is recorded in timing_breakdown.""" - def wrapped(state: DashboardChatRuntimeState) -> dict: + def wrapped(state: DashboardChatGraphState) -> dict: started_at = perf_counter() updates = handler(state) elapsed_ms = round((perf_counter() - started_at) * 1000, 2) @@ -61,6 +73,8 @@ def _build_graph( runtime_config, source_config, tool_specifications, + *, + checkpointer=None, ): """Build the intent-routing graph with all deps injected via closures.""" @@ -79,8 +93,30 @@ def _handle_irrelevant(state): def _handle_needs_clarification(state): return handle_needs_clarification_node(state, llm_client, vector_store) - def _handle_data_query(state): - return handle_data_query_node( + def _handle_query_with_sql(state): + return handle_query_with_sql_node( + state, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, + ) + + def _handle_query_without_sql(state): + return handle_query_without_sql_node( + state, + llm_client, + vector_store, + warehouse_tools_factory, + runtime_config, + source_config, + tool_specifications, + ) + + def _handle_follow_up_sql(state): + return handle_follow_up_sql_node( state, llm_client, vector_store, @@ -90,8 +126,8 @@ def _handle_data_query(state): tool_specifications, ) - def _handle_follow_up(state): - return handle_follow_up_node( + def _handle_follow_up_context(state): + return handle_follow_up_context_node( state, llm_client, vector_store, @@ -104,7 +140,10 @@ def _handle_follow_up(state): def _finalize(state): return finalize_node(state) - graph = StateGraph(DashboardChatRuntimeState) + def _compose_response(state): + return compose_response_node(state, llm_client, vector_store) + + graph = StateGraph(DashboardChatGraphState) graph.add_node("load_context", _timed_node("load_context", _load_context)) graph.add_node("route_intent", _timed_node("route_intent", _route_intent)) graph.add_node("handle_small_talk", _timed_node("handle_small_talk", _handle_small_talk)) @@ -113,8 +152,23 @@ def _finalize(state): "handle_needs_clarification", _timed_node("handle_needs_clarification", _handle_needs_clarification), ) - graph.add_node("handle_data_query", _timed_node("handle_data_query", _handle_data_query)) - graph.add_node("handle_follow_up", _timed_node("handle_follow_up", _handle_follow_up)) + graph.add_node( + "handle_query_with_sql", + _timed_node("handle_query_with_sql", _handle_query_with_sql), + ) + graph.add_node( + "handle_query_without_sql", + _timed_node("handle_query_without_sql", _handle_query_without_sql), + ) + graph.add_node( + "handle_follow_up_sql", + _timed_node("handle_follow_up_sql", _handle_follow_up_sql), + ) + graph.add_node( + "handle_follow_up_context", + _timed_node("handle_follow_up_context", _handle_follow_up_context), + ) + graph.add_node("compose_response", _timed_node("compose_response", _compose_response)) graph.add_node("finalize", _timed_node("finalize", _finalize)) graph.add_edge(START, "load_context") @@ -126,18 +180,23 @@ def _finalize(state): "small_talk": "handle_small_talk", "irrelevant": "handle_irrelevant", "needs_clarification": "handle_needs_clarification", - "query_with_sql": "handle_data_query", - "query_without_sql": "handle_data_query", - "follow_up_sql": "handle_follow_up", - "follow_up_context": "handle_follow_up", + "query_with_sql": "handle_query_with_sql", + "query_without_sql": "handle_query_without_sql", + "follow_up_sql": "handle_follow_up_sql", + "follow_up_context": "handle_follow_up_context", }, ) - graph.add_edge("handle_small_talk", "finalize") - graph.add_edge("handle_irrelevant", "finalize") - graph.add_edge("handle_needs_clarification", "finalize") - graph.add_edge("handle_data_query", "finalize") - graph.add_edge("handle_follow_up", "finalize") + graph.add_edge("handle_small_talk", "compose_response") + graph.add_edge("handle_irrelevant", "compose_response") + graph.add_edge("handle_needs_clarification", "compose_response") + graph.add_edge("handle_query_with_sql", "compose_response") + graph.add_edge("handle_query_without_sql", "compose_response") + graph.add_edge("handle_follow_up_sql", "compose_response") + graph.add_edge("handle_follow_up_context", "compose_response") + graph.add_edge("compose_response", "finalize") graph.add_edge("finalize", END) + if checkpointer is not None: + return graph.compile(checkpointer=checkpointer) return graph.compile() @@ -151,6 +210,7 @@ def __init__( warehouse_tools_factory: Union[Callable[[Org], DashboardChatWarehouseTools], None] = None, runtime_config: Union[DashboardChatRuntimeConfig, None] = None, source_config: Union[DashboardChatSourceConfig, None] = None, + checkpointer=None, ): self.runtime_config = runtime_config or DashboardChatRuntimeConfig.from_env() self.source_config = source_config or DashboardChatSourceConfig.from_env() @@ -174,6 +234,57 @@ def __init__( self.source_config, DASHBOARD_CHAT_TOOL_SPECIFICATIONS, ) + self.persistent_graph = ( + _build_graph( + self.llm_client, + self.vector_store, + self.warehouse_tools_factory, + self.runtime_config, + self.source_config, + DASHBOARD_CHAT_TOOL_SPECIFICATIONS, + checkpointer=checkpointer, + ) + if checkpointer is not None + else None + ) + + @staticmethod + def _thread_config( + session_id: str, + checkpoint_id: str | None = None, + ) -> dict[str, Any]: + """Build the top-level LangGraph thread config for one dashboard-chat session. + + LangGraph uses ``checkpoint_ns`` for graph/subgraph namespaces. Dashboard chat runs + as one top-level graph, so we keep that namespace empty and scope persistence + through ``thread_id=session_id``. + """ + configurable: dict[str, Any] = { + "thread_id": session_id, + "checkpoint_ns": "", + } + if checkpoint_id: + configurable["checkpoint_id"] = checkpoint_id + return {"configurable": configurable} + + def _persistent_graph_or_raise(self): + """Return the persistent graph or fail loudly if checkpointing is unavailable.""" + if self.persistent_graph is None: + raise RuntimeError("Dashboard chat runtime was not initialized with checkpointing") + return self.persistent_graph + + def get_state_snapshot( + self, + session_id: str, + checkpoint_id: str | None = None, + ): + """Return the current persisted LangGraph state snapshot for one chat session.""" + try: + return self._persistent_graph_or_raise().get_state( + self._thread_config(session_id, checkpoint_id) + ) + except ValueError: + return None def run( self, @@ -183,30 +294,101 @@ def run( session_id: str | None = None, vector_collection_name: str | None = None, conversation_history: Sequence[dict[str, Any]] | None = None, + interrupt_before: Sequence[str] | None = None, + interrupt_after: Sequence[str] | None = None, ) -> DashboardChatResponse: """Run one dashboard chat turn.""" if hasattr(self.llm_client, "reset_usage"): self.llm_client.reset_usage() if hasattr(self.vector_store, "reset_usage"): self.vector_store.reset_usage() - initial_state: DashboardChatRuntimeState = { - "org": org, + initial_state: DashboardChatGraphState = { + "org_id": org.id, "dashboard_id": dashboard_id, "session_id": session_id, "vector_collection_name": vector_collection_name, "user_query": user_query, "conversation_history": normalize_conversation_history(conversation_history), + "small_talk_response": None, + "retrieved_documents": [], + "citations": [], + "tool_calls": [], + "sql": None, + "sql_validation": None, + "sql_results": None, "timing_breakdown": { "graph_nodes_ms": {}, "tool_calls_ms": [], }, + "draft_answer_text": None, "warnings": [], "usage": {}, + # The checkpointed graph persists terminal response state across turns. + # Each new user turn must clear that field so compose_response rebuilds + # a fresh answer instead of short-circuiting on the previous turn's reply. + "response": None, } runtime_started_at = perf_counter() - final_state = self.graph.invoke(initial_state) + invocation_config = None + graph = self.graph + if session_id and self.persistent_graph is not None: + graph = self.persistent_graph + invocation_config = self._thread_config(session_id) + final_state = graph.invoke( + initial_state, + config=invocation_config, + interrupt_before=interrupt_before, + interrupt_after=interrupt_after, + ) + if final_state.get("response") is None: + raise RuntimeError( + "Dashboard chat run was interrupted before a final response was produced. " + "Resume the session from its persisted checkpoint." + ) + runtime_total_ms = round((perf_counter() - runtime_started_at) * 1000, 2) + response = get_runtime_response(final_state) + timing_breakdown = dict(final_state.get("timing_breakdown") or {}) + timing_breakdown["runtime_total_ms"] = runtime_total_ms + response_metadata = dict(response.metadata) + response_metadata["timing_breakdown"] = timing_breakdown + return DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=response.citations, + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ) + + def resume( + self, + session_id: str, + checkpoint_id: str | None = None, + ) -> DashboardChatResponse: + """Resume a previously interrupted dashboard-chat run from persisted LangGraph state. + + This operates at graph-step boundaries only. If the checkpointed session is already + complete, the latest stored response is returned directly. + """ + graph = self._persistent_graph_or_raise() + config = self._thread_config(session_id, checkpoint_id) + state_snapshot = graph.get_state(config) + if state_snapshot is None: + raise ValueError(f"No checkpointed dashboard chat state found for session {session_id}") + + persisted_state = state_snapshot.values or {} + if not state_snapshot.next: + if persisted_state.get("response") is None: + raise ValueError(f"Session {session_id} has no resumable or completed response") + return get_runtime_response(persisted_state) + + runtime_started_at = perf_counter() + final_state = graph.invoke(None, config=config) runtime_total_ms = round((perf_counter() - runtime_started_at) * 1000, 2) - response = final_state["response"] + response = get_runtime_response(final_state) timing_breakdown = dict(final_state.get("timing_breakdown") or {}) timing_breakdown["runtime_total_ms"] = runtime_total_ms response_metadata = dict(response.metadata) @@ -227,4 +409,9 @@ def run( @lru_cache(maxsize=1) def get_dashboard_chat_runtime() -> DashboardChatRuntime: """Return the shared dashboard chat runtime used by live chat turns.""" - return DashboardChatRuntime() + return DashboardChatRuntime(checkpointer=get_dashboard_chat_checkpointer().saver) + + +def reset_dashboard_chat_runtime() -> None: + """Clear the shared runtime cache so tests can release checkpoint resources cleanly.""" + get_dashboard_chat_runtime.cache_clear() diff --git a/ddpui/core/dashboard_chat/orchestration/presentation.py b/ddpui/core/dashboard_chat/orchestration/response_composer.py similarity index 96% rename from ddpui/core/dashboard_chat/orchestration/presentation.py rename to ddpui/core/dashboard_chat/orchestration/response_composer.py index 487553929..b9aaa0eb5 100644 --- a/ddpui/core/dashboard_chat/orchestration/presentation.py +++ b/ddpui/core/dashboard_chat/orchestration/response_composer.py @@ -11,13 +11,16 @@ ) from ddpui.utils.custom_logger import CustomLogger -from ddpui.core.dashboard_chat.orchestration.state import ( - DashboardChatRuntimeState, - SMALL_TALK_FAST_PATH_PATTERN, -) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision logger = CustomLogger("dashboard_chat") +SMALL_TALK_FAST_PATH_PATTERN = re.compile( + r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you|what\s+can\s+you\s+do|who\s+are\s+you)\b[\s!.?]*$", + re.IGNORECASE, +) + def serialize_tool_result(result: dict[str, Any]) -> dict[str, Any]: """Trim large tool payloads before feeding them back into the model.""" @@ -94,7 +97,7 @@ def max_turns_message( def compose_final_answer_text( llm_client, - state: DashboardChatRuntimeState, + state: DashboardChatGraphState, execution_result: dict[str, Any], *, response_format: str, @@ -106,7 +109,7 @@ def compose_final_answer_text( try: answer_text = llm_client.compose_final_answer( user_query=state["user_query"], - intent=state["intent_decision"].intent, + intent=get_intent_decision(state).intent, response_format=response_format, draft_answer=draft_answer, retrieved_documents=list(execution_result.get("retrieved_documents") or []), @@ -250,7 +253,7 @@ def fallback_answer_text( draft_answer: str | None = None, ) -> str: """Fallback response when the model returns no final text.""" - from .retrieval import compact_snippet + from .retrieval_support import compact_snippet if draft_answer: return draft_answer diff --git a/ddpui/core/dashboard_chat/orchestration/retrieval.py b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py similarity index 95% rename from ddpui/core/dashboard_chat/orchestration/retrieval.py rename to ddpui/core/dashboard_chat/orchestration/retrieval_support.py index fe7fd8661..cfc7f419e 100644 --- a/ddpui/core/dashboard_chat/orchestration/retrieval.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py @@ -3,17 +3,14 @@ from collections.abc import Sequence from typing import Any -from ddpui.core.dashboard_chat.context.allowlist import ( - DashboardChatAllowlist, - build_dashboard_chat_table_name, -) +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist, build_dashboard_chat_table_name from ddpui.core.dashboard_chat.contracts import ( DashboardChatCitation, DashboardChatRetrievedDocument, ) -from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.orchestration.source_identifiers import ( +from ddpui.core.dashboard_chat.orchestration.source_identifier_parsing import ( chart_id_from_source_identifier, unique_id_from_source_identifier, ) @@ -300,12 +297,12 @@ def looks_like_time_dimension(column_name: str) -> bool: ) -def get_cached_query_embedding( +def get_or_embed_query( vector_store, query_text: str, - embedding_cache: dict[str, list[float]], + query_embeddings: dict[str, list[float]], ) -> list[float]: """Cache embeddings per query string during one turn.""" - if query_text not in embedding_cache: - embedding_cache[query_text] = vector_store.embed_query(query_text) - return embedding_cache[query_text] + if query_text not in query_embeddings: + query_embeddings[query_text] = vector_store.embed_query(query_text) + return query_embeddings[query_text] diff --git a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py b/ddpui/core/dashboard_chat/orchestration/session_snapshot.py deleted file mode 100644 index d6ceaaeec..000000000 --- a/ddpui/core/dashboard_chat/orchestration/session_snapshot.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Session-snapshot helpers for dashboard chat graph execution.""" - -from typing import Any - -from django.core.cache import cache - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlistBuilder -from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet -from ddpui.core.dashboard_chat.sessions.cache import ( - DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, - build_dashboard_chat_session_snapshot_cache_key, - deserialize_allowlist, - deserialize_distinct_cache, - deserialize_schema_snippets, - serialize_allowlist, - serialize_distinct_cache, - serialize_schema_snippets, -) -from ddpui.services.dashboard_service import DashboardService - -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState - - -def load_session_snapshot(state: DashboardChatRuntimeState) -> dict[str, Any]: - """Return the current session's frozen dashboard context snapshot.""" - session_id = state.get("session_id") - if not session_id: - return build_session_snapshot(state) - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is not None: - return { - "dashboard_export": dict(cached_snapshot["dashboard_export"]), - "dbt_index": dict(cached_snapshot.get("dbt_index") or {"resources_by_unique_id": {}}), - "allowlist": deserialize_allowlist(cached_snapshot.get("allowlist")), - "schema_cache": deserialize_schema_snippets(cached_snapshot.get("schema_cache")), - "distinct_cache": deserialize_distinct_cache(cached_snapshot.get("distinct_cache")), - } - - snapshot = build_session_snapshot(state) - cache.set( - cache_key, - { - "dashboard_export": snapshot["dashboard_export"], - "dbt_index": snapshot["dbt_index"], - "allowlist": serialize_allowlist(snapshot["allowlist"]), - "schema_cache": serialize_schema_snippets(snapshot["schema_cache"]), - "distinct_cache": serialize_distinct_cache(snapshot["distinct_cache"]), - }, - DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS, - ) - return snapshot - - -def build_session_snapshot(state: DashboardChatRuntimeState) -> dict[str, Any]: - """Build one session-stable snapshot of dashboard-specific runtime context.""" - dashboard_export = DashboardService.export_dashboard_context( - state["dashboard_id"], - state["org"], - ) - manifest_json = DashboardChatAllowlistBuilder.load_manifest_json(state["org"].dbt) - allowlist = DashboardChatAllowlistBuilder.build( - dashboard_export, - manifest_json=manifest_json, - ) - return { - "dashboard_export": dashboard_export, - "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index(manifest_json, allowlist), - "allowlist": allowlist, - "schema_cache": {}, - "distinct_cache": set(), - } - - -def persist_session_schema_cache( - state: DashboardChatRuntimeState, - schema_cache: dict[str, DashboardChatSchemaSnippet], -) -> None: - """Persist lazily loaded schema snippets back into the session snapshot cache.""" - session_id = state.get("session_id") - if not session_id: - state["session_schema_cache"] = dict(schema_cache) - return - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is None: - return - cached_snapshot["schema_cache"] = serialize_schema_snippets(schema_cache) - cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) - state["session_schema_cache"] = dict(schema_cache) - - -def persist_session_distinct_cache( - state: DashboardChatRuntimeState, - distinct_cache: set[tuple[str, str, str]], -) -> None: - """Persist validated distinct values back into the session snapshot cache.""" - session_id = state.get("session_id") - if not session_id: - state["session_distinct_cache"] = set(distinct_cache) - return - - cache_key = build_dashboard_chat_session_snapshot_cache_key(session_id) - cached_snapshot = cache.get(cache_key) - if cached_snapshot is None: - return - cached_snapshot["distinct_cache"] = serialize_distinct_cache(distinct_cache) - cache.set(cache_key, cached_snapshot, DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS) - state["session_distinct_cache"] = set(distinct_cache) diff --git a/ddpui/core/dashboard_chat/orchestration/source_identifiers.py b/ddpui/core/dashboard_chat/orchestration/source_identifier_parsing.py similarity index 100% rename from ddpui/core/dashboard_chat/orchestration/source_identifiers.py rename to ddpui/core/dashboard_chat/orchestration/source_identifier_parsing.py diff --git a/ddpui/core/dashboard_chat/orchestration/state.py b/ddpui/core/dashboard_chat/orchestration/state.py deleted file mode 100644 index e81971314..000000000 --- a/ddpui/core/dashboard_chat/orchestration/state.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Shared graph state and constants for dashboard chat orchestration.""" - -from typing import Any, TypedDict -import re - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatCitation, - DashboardChatConversationContext, - DashboardChatConversationMessage, - DashboardChatIntentDecision, - DashboardChatResponse, - DashboardChatRetrievedDocument, - DashboardChatSchemaSnippet, - DashboardChatSqlValidationResult, -) -from ddpui.models.org import Org - -SMALL_TALK_FAST_PATH_PATTERN = re.compile( - r"^\s*(hi|hello|hey|yo|good\s+morning|good\s+afternoon|good\s+evening|thanks|thank\s+you|what\s+can\s+you\s+do|who\s+are\s+you)\b[\s!.?]*$", - re.IGNORECASE, -) - - -class DashboardChatRuntimeState(TypedDict, total=False): - """LangGraph state for one dashboard chat turn.""" - - org: Org - dashboard_id: int - session_id: str | None - vector_collection_name: str | None - user_query: str - conversation_history: list[DashboardChatConversationMessage] - conversation_context: DashboardChatConversationContext - small_talk_response: str | None - dashboard_export: dict[str, Any] - dbt_index: dict[str, Any] - allowlist: DashboardChatAllowlist - session_schema_cache: dict[str, DashboardChatSchemaSnippet] - session_distinct_cache: set[tuple[str, str, str]] - intent_decision: DashboardChatIntentDecision - retrieved_documents: list[DashboardChatRetrievedDocument] - citations: list[DashboardChatCitation] - tool_calls: list[dict[str, Any]] - timing_breakdown: dict[str, Any] - sql: str | None - sql_validation: DashboardChatSqlValidationResult | None - sql_results: list[dict[str, Any]] | None - warnings: list[str] - usage: dict[str, Any] - response: DashboardChatResponse diff --git a/ddpui/core/dashboard_chat/orchestration/state/__init__.py b/ddpui/core/dashboard_chat/orchestration/state/__init__.py new file mode 100644 index 000000000..109fe0724 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/state/__init__.py @@ -0,0 +1,7 @@ +"""Grouped LangGraph state helpers for dashboard chat orchestration.""" + +from ddpui.core.dashboard_chat.orchestration.state.graph_state import ( + DashboardChatGraphState, +) + +__all__ = ["DashboardChatGraphState"] diff --git a/ddpui/core/dashboard_chat/orchestration/state/accessors.py b/ddpui/core/dashboard_chat/orchestration/state/accessors.py new file mode 100644 index 000000000..6c83c0eae --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/state/accessors.py @@ -0,0 +1,59 @@ +"""Typed accessors for reconstructing runtime views from checkpoint-safe state payloads.""" + +from ddpui.models.org import Org + +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatConversationContext, + DashboardChatIntentDecision, + DashboardChatResponse, + DashboardChatRetrievedDocument, + DashboardChatSqlValidationResult, +) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + deserialize_allowlist, + deserialize_conversation_context, + deserialize_intent_decision, + deserialize_response, + deserialize_retrieved_documents, + deserialize_sql_validation_result, +) + +def get_runtime_org(state: DashboardChatGraphState) -> Org: + """Return the Django org object for the current runtime state.""" + return Org.objects.select_related("dbt").get(id=int(state["org_id"])) + + +def get_runtime_allowlist(state: DashboardChatGraphState) -> DashboardChatAllowlist: + """Return the reconstructed allowlist for the current runtime state.""" + return deserialize_allowlist(state.get("allowlist_payload")) + + +def get_conversation_context(state: DashboardChatGraphState) -> DashboardChatConversationContext: + """Return the reconstructed conversation context for the current runtime state.""" + return deserialize_conversation_context(state.get("conversation_context")) + + +def get_intent_decision(state: DashboardChatGraphState) -> DashboardChatIntentDecision: + """Return the reconstructed intent decision for the current runtime state.""" + return deserialize_intent_decision(state.get("intent_decision")) + + +def get_runtime_response(state: DashboardChatGraphState) -> DashboardChatResponse: + """Return the reconstructed response contract for the current runtime state.""" + return deserialize_response(state.get("response")) + + +def get_retrieved_documents( + state: DashboardChatGraphState, +) -> list[DashboardChatRetrievedDocument]: + """Return the reconstructed retrieved-document contracts for the current runtime state.""" + return deserialize_retrieved_documents(state.get("retrieved_documents")) + + +def get_sql_validation_result( + state: DashboardChatGraphState, +) -> DashboardChatSqlValidationResult | None: + """Return the reconstructed SQL validation result for the current runtime state.""" + return deserialize_sql_validation_result(state.get("sql_validation")) diff --git a/ddpui/core/dashboard_chat/orchestration/state/graph_state.py b/ddpui/core/dashboard_chat/orchestration/state/graph_state.py new file mode 100644 index 000000000..e5a2f45d7 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/state/graph_state.py @@ -0,0 +1,33 @@ +"""Shared graph state for dashboard chat orchestration.""" + +from typing import Any, TypedDict + + +class DashboardChatGraphState(TypedDict, total=False): + """LangGraph state for one dashboard chat turn.""" + + org_id: int + dashboard_id: int + session_id: str | None + vector_collection_name: str | None + user_query: str + conversation_history: list[dict[str, Any]] + conversation_context: dict[str, Any] + small_talk_response: str | None + dashboard_export_payload: dict[str, Any] + dbt_index: dict[str, Any] + allowlist_payload: dict[str, Any] + schema_snippet_payloads: dict[str, Any] + validated_distinct_payloads: dict[str, Any] + intent_decision: dict[str, Any] + retrieved_documents: list[dict[str, Any]] + citations: list[dict[str, Any]] + tool_calls: list[dict[str, Any]] + timing_breakdown: dict[str, Any] + draft_answer_text: str | None + sql: str | None + sql_validation: dict[str, Any] | None + sql_results: list[dict[str, Any]] | None + warnings: list[str] + usage: dict[str, Any] + response: dict[str, Any] diff --git a/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py b/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py new file mode 100644 index 000000000..a5daf33f7 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py @@ -0,0 +1,285 @@ +"""Serialization helpers for checkpoint-safe dashboard chat state payloads.""" + +from typing import Any + +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatCitation, + DashboardChatConversationContext, + DashboardChatFollowUpContext, + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatResponse, + DashboardChatRetrievedDocument, + DashboardChatSchemaSnippet, + DashboardChatSqlValidationResult, +) + + +def serialize_allowlist(allowlist: DashboardChatAllowlist) -> dict[str, Any]: + """Convert an allowlist into a checkpoint-safe dictionary payload.""" + return { + "chart_tables": sorted(allowlist.chart_tables), + "upstream_tables": sorted(allowlist.upstream_tables), + "allowed_tables": sorted(allowlist.allowed_tables), + "allowed_unique_ids": sorted(allowlist.allowed_unique_ids), + "unique_id_to_table": dict(allowlist.unique_id_to_table), + "table_to_unique_ids": { + table_name: sorted(unique_ids) + for table_name, unique_ids in allowlist.table_to_unique_ids.items() + }, + } + + +def deserialize_allowlist(payload: dict[str, Any] | None) -> DashboardChatAllowlist: + """Rebuild an allowlist view from checkpoint-safe payload data.""" + payload = payload or {} + return DashboardChatAllowlist( + chart_tables=set(payload.get("chart_tables") or []), + upstream_tables=set(payload.get("upstream_tables") or []), + allowed_tables=set(payload.get("allowed_tables") or []), + allowed_unique_ids=set(payload.get("allowed_unique_ids") or []), + unique_id_to_table=dict(payload.get("unique_id_to_table") or {}), + table_to_unique_ids={ + table_name: set(unique_ids) + for table_name, unique_ids in (payload.get("table_to_unique_ids") or {}).items() + }, + ) + + +def serialize_schema_snippets( + snippets: dict[str, DashboardChatSchemaSnippet], +) -> dict[str, Any]: + """Convert schema snippets into checkpoint-safe dictionary payloads.""" + return { + table_name: { + "table_name": snippet.table_name, + "columns": list(snippet.columns), + } + for table_name, snippet in snippets.items() + } + + +def deserialize_schema_snippets( + payload: dict[str, Any] | None, +) -> dict[str, DashboardChatSchemaSnippet]: + """Rebuild schema snippet contracts from checkpoint payloads.""" + snippets: dict[str, DashboardChatSchemaSnippet] = {} + for table_name, snippet_payload in (payload or {}).items(): + snippets[table_name.lower()] = DashboardChatSchemaSnippet( + table_name=str(snippet_payload.get("table_name") or table_name), + columns=list(snippet_payload.get("columns") or []), + ) + return snippets + + +def serialize_distinct_payloads( + validated_distinct_values: set[tuple[str, str, str]], +) -> dict[str, Any]: + """Convert validated distinct values into a checkpoint-safe nested payload.""" + serialized: dict[str, dict[str, list[str]]] = {} + for table_name, column_name, value in validated_distinct_values: + serialized.setdefault(table_name, {}).setdefault(column_name, []).append(value) + + return { + table_name: { + column_name: sorted(set(values)) + for column_name, values in column_map.items() + } + for table_name, column_map in serialized.items() + } + + +def deserialize_distinct_payloads( + payload: dict[str, Any] | None, +) -> set[tuple[str, str, str]]: + """Rebuild validated distinct values from checkpoint payloads.""" + validated_distinct_values: set[tuple[str, str, str]] = set() + for table_name, column_map in (payload or {}).items(): + for column_name, values in (column_map or {}).items(): + for value in values or []: + validated_distinct_values.add( + (str(table_name).lower(), str(column_name).lower(), str(value)) + ) + return validated_distinct_values + + +def serialize_conversation_context( + context: DashboardChatConversationContext, +) -> dict[str, Any]: + """Convert conversation context into a checkpoint-safe payload.""" + return { + "last_sql_query": context.last_sql_query, + "last_tables_used": list(context.last_tables_used), + "last_chart_ids": list(context.last_chart_ids), + "last_metrics": list(context.last_metrics), + "last_dimensions": list(context.last_dimensions), + "last_filters": list(context.last_filters), + "last_response_type": context.last_response_type, + "last_answer_text": context.last_answer_text, + "last_intent": context.last_intent, + } + + +def deserialize_conversation_context( + payload: dict[str, Any] | None, +) -> DashboardChatConversationContext: + """Rebuild conversation context from checkpoint payload data.""" + payload = payload or {} + return DashboardChatConversationContext( + last_sql_query=payload.get("last_sql_query"), + last_tables_used=list(payload.get("last_tables_used") or []), + last_chart_ids=list(payload.get("last_chart_ids") or []), + last_metrics=list(payload.get("last_metrics") or []), + last_dimensions=list(payload.get("last_dimensions") or []), + last_filters=list(payload.get("last_filters") or []), + last_response_type=payload.get("last_response_type"), + last_answer_text=payload.get("last_answer_text"), + last_intent=payload.get("last_intent"), + ) + + +def serialize_intent_decision(decision: DashboardChatIntentDecision) -> dict[str, Any]: + """Convert one intent decision into a checkpoint-safe payload.""" + return { + "intent": decision.intent.value, + "confidence": decision.confidence, + "reason": decision.reason, + "missing_info": list(decision.missing_info), + "force_tool_usage": decision.force_tool_usage, + "clarification_question": decision.clarification_question, + "follow_up_context": { + "is_follow_up": decision.follow_up_context.is_follow_up, + "follow_up_type": decision.follow_up_context.follow_up_type, + "reusable_elements": dict(decision.follow_up_context.reusable_elements), + "modification_instruction": decision.follow_up_context.modification_instruction, + }, + } + + +def deserialize_intent_decision(payload: dict[str, Any] | None) -> DashboardChatIntentDecision: + """Rebuild an intent decision from checkpoint payload data.""" + payload = payload or {} + follow_up_payload = payload.get("follow_up_context") or {} + return DashboardChatIntentDecision( + intent=DashboardChatIntent(str(payload.get("intent") or DashboardChatIntent.IRRELEVANT.value)), + confidence=float(payload.get("confidence") or 0.0), + reason=str(payload.get("reason") or ""), + missing_info=list(payload.get("missing_info") or []), + force_tool_usage=bool(payload.get("force_tool_usage")), + clarification_question=payload.get("clarification_question"), + follow_up_context=DashboardChatFollowUpContext( + is_follow_up=bool(follow_up_payload.get("is_follow_up")), + follow_up_type=follow_up_payload.get("follow_up_type"), + reusable_elements=dict(follow_up_payload.get("reusable_elements") or {}), + modification_instruction=follow_up_payload.get("modification_instruction"), + ), + ) + + +def serialize_retrieved_documents( + documents: list[DashboardChatRetrievedDocument], +) -> list[dict[str, Any]]: + """Convert retrieved document contracts into checkpoint-safe payloads.""" + return [ + { + "document_id": document.document_id, + "source_type": document.source_type, + "source_identifier": document.source_identifier, + "content": document.content, + "dashboard_id": document.dashboard_id, + "distance": document.distance, + } + for document in documents + ] + + +def deserialize_retrieved_documents( + payloads: list[dict[str, Any]] | None, +) -> list[DashboardChatRetrievedDocument]: + """Rebuild retrieved document contracts from checkpoint payloads.""" + return [ + DashboardChatRetrievedDocument( + document_id=str(payload.get("document_id") or ""), + source_type=str(payload.get("source_type") or ""), + source_identifier=str(payload.get("source_identifier") or ""), + content=str(payload.get("content") or ""), + dashboard_id=payload.get("dashboard_id"), + distance=payload.get("distance"), + ) + for payload in (payloads or []) + ] + + +def serialize_citations(citations: list[DashboardChatCitation]) -> list[dict[str, Any]]: + """Convert citations into checkpoint-safe payloads.""" + return [citation.to_dict() for citation in citations] + + +def deserialize_citations( + payloads: list[dict[str, Any]] | None, +) -> list[DashboardChatCitation]: + """Rebuild citation contracts from checkpoint payloads.""" + return [ + DashboardChatCitation( + source_type=str(payload.get("source_type") or ""), + source_identifier=str(payload.get("source_identifier") or ""), + title=str(payload.get("title") or ""), + snippet=str(payload.get("snippet") or ""), + dashboard_id=payload.get("dashboard_id"), + table_name=payload.get("table_name"), + ) + for payload in (payloads or []) + ] + + +def serialize_sql_validation_result( + validation: DashboardChatSqlValidationResult | None, +) -> dict[str, Any] | None: + """Convert SQL validation state into a checkpoint-safe payload.""" + if validation is None: + return None + return { + "is_valid": validation.is_valid, + "sanitized_sql": validation.sanitized_sql, + "tables": list(validation.tables), + "warnings": list(validation.warnings), + "errors": list(validation.errors), + } + + +def deserialize_sql_validation_result( + payload: dict[str, Any] | None, +) -> DashboardChatSqlValidationResult | None: + """Rebuild SQL validation state from checkpoint payload data.""" + if payload is None: + return None + return DashboardChatSqlValidationResult( + is_valid=bool(payload.get("is_valid")), + sanitized_sql=payload.get("sanitized_sql"), + tables=list(payload.get("tables") or []), + warnings=list(payload.get("warnings") or []), + errors=list(payload.get("errors") or []), + ) + + +def serialize_response(response: DashboardChatResponse) -> dict[str, Any]: + """Convert the final response contract into a checkpoint-safe payload.""" + return response.to_dict() + + +def deserialize_response(payload: dict[str, Any] | None) -> DashboardChatResponse: + """Rebuild the final response contract from checkpoint payload data.""" + payload = payload or {} + intent_value = str(payload.get("intent") or DashboardChatIntent.IRRELEVANT.value) + return DashboardChatResponse( + answer_text=str(payload.get("answer_text") or ""), + intent=DashboardChatIntent(intent_value), + citations=deserialize_citations(payload.get("citations") or []), + warnings=list(payload.get("warnings") or []), + sql=payload.get("sql"), + sql_results=payload.get("sql_results"), + usage=dict(payload.get("usage") or {}), + tool_calls=list(payload.get("tool_calls") or []), + metadata=dict(payload.get("metadata") or {}), + ) diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py b/ddpui/core/dashboard_chat/orchestration/timing_breakdown.py similarity index 71% rename from ddpui/core/dashboard_chat/orchestration/nodes/helpers.py rename to ddpui/core/dashboard_chat/orchestration/timing_breakdown.py index d94679fae..de6351224 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/helpers.py +++ b/ddpui/core/dashboard_chat/orchestration/timing_breakdown.py @@ -1,20 +1,15 @@ -"""Shared helpers for dashboard chat graph nodes.""" +"""Timing-breakdown helpers for dashboard chat orchestration.""" from typing import Any -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState - - -def route_after_intent(state: DashboardChatRuntimeState) -> str: - """Route to one explicit handler per prototype intent.""" - return state["intent_decision"].intent.value +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState def merge_tool_loop_timing( - state: DashboardChatRuntimeState, + state: DashboardChatGraphState, execution_result: dict[str, Any], ) -> dict[str, Any]: - """Merge timing from the tool loop into the state's existing timing breakdown.""" + """Merge tool-loop timing into the current graph timing payload.""" existing = dict(state.get("timing_breakdown") or {}) from_loop = dict(execution_result.get("timing_breakdown") or {}) merged = dict(existing) diff --git a/ddpui/core/dashboard_chat/orchestration/message_stack.py b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py similarity index 80% rename from ddpui/core/dashboard_chat/orchestration/message_stack.py rename to ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py index ac319ffc8..a38e8545c 100644 --- a/ddpui/core/dashboard_chat/orchestration/message_stack.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py @@ -4,16 +4,17 @@ from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey -from ddpui.core.dashboard_chat.orchestration.conversation import ( +from ddpui.core.dashboard_chat.orchestration.conversation_context import ( build_follow_up_context_prompt, detect_sql_modification_type, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.state.accessors import get_conversation_context def build_new_query_messages( llm_client, - state: DashboardChatRuntimeState, + state: DashboardChatGraphState, ) -> list[dict[str, Any]]: """Build the new-query message stack.""" system_prompt = llm_client.get_prompt(DashboardChatPromptTemplateKey.NEW_QUERY_SYSTEM) @@ -25,7 +26,7 @@ def build_new_query_messages( def build_follow_up_messages( llm_client, - state: DashboardChatRuntimeState, + state: DashboardChatGraphState, ) -> list[dict[str, Any]]: """Build the follow-up message stack.""" modification_type = detect_sql_modification_type(state["user_query"]) @@ -35,7 +36,7 @@ def build_follow_up_messages( { "role": "system", "content": build_follow_up_context_prompt( - state["conversation_context"], + get_conversation_context(state), state["user_query"], ), }, diff --git a/ddpui/core/dashboard_chat/orchestration/tools/__init__.py b/ddpui/core/dashboard_chat/orchestration/tools/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/ddpui/core/dashboard_chat/orchestration/tools/cache.py b/ddpui/core/dashboard_chat/orchestration/tools/cache.py deleted file mode 100644 index a3affbdee..000000000 --- a/ddpui/core/dashboard_chat/orchestration/tools/cache.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Execution-context cache helpers for dashboard chat tool loop.""" - -from collections.abc import Sequence -import re -from typing import Any - -from ddpui.core.dashboard_chat.warehouse.tools import DashboardChatWarehouseTools -from ddpui.models.org import Org -from ddpui.utils.custom_logger import CustomLogger - -from ddpui.core.dashboard_chat.orchestration.retrieval import ( - retrieve_vector_documents, - filter_allowlisted_dbt_results, - dedupe_retrieved_documents, - build_tool_document_payload, - get_cached_query_embedding, -) -from ddpui.core.dashboard_chat.orchestration.session_snapshot import ( - persist_session_schema_cache, - persist_session_distinct_cache, -) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState -from ddpui.core.dashboard_chat.orchestration.tools.sql_parsing import ( - table_references as sql_table_references, - resolve_identifier_table, - tables_with_column, - extract_text_filter_values, -) - -logger = CustomLogger("dashboard_chat") - - -# --------------------------------------------------------------------------- -# Warehouse tools (lazily initialized per-turn) -# --------------------------------------------------------------------------- - - -def get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context: dict[str, Any], - org: Org, -) -> DashboardChatWarehouseTools: - """Build the warehouse tool helper lazily for the turn.""" - warehouse_tools = execution_context.get("warehouse_tools") - if warehouse_tools is None: - warehouse_tools = warehouse_tools_factory(org) - execution_context["warehouse_tools"] = warehouse_tools - return warehouse_tools - - -# --------------------------------------------------------------------------- -# Schema snippet cache -# --------------------------------------------------------------------------- - - -def get_cached_schema_snippets( - warehouse_tools_factory, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - tables: Sequence[str] | None = None, -) -> dict[str, Any]: - """Load and cache schema snippets for allowlisted tables.""" - requested_tables = [ - table_name.lower() - for table_name in ( - tables if tables is not None else state["allowlist"].prioritized_tables() - ) - if state["allowlist"].is_allowed(table_name) - ] - cache = execution_context["schema_cache"] - missing_tables = [table_name for table_name in requested_tables if table_name not in cache] - if missing_tables: - snippets = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).get_schema_snippets(missing_tables) - for table_name, snippet in snippets.items(): - cache[table_name.lower()] = snippet - if snippets: - persist_session_schema_cache(state, cache) - if tables is None: - return cache - return {table_name: cache[table_name] for table_name in requested_tables if table_name in cache} - - -# --------------------------------------------------------------------------- -# Distinct value cache helpers -# --------------------------------------------------------------------------- - - -def normalize_distinct_value(value: Any) -> str: - """Normalize one distinct value for exact cache lookups.""" - return str(value).strip().lower() - - -def has_validated_distinct_value( - distinct_cache: set[tuple[Any, ...]], - *, - table_name: str, - column_name: str, - value: Any, -) -> bool: - """Return whether this exact text filter value was already validated in-session.""" - normalized_value = normalize_distinct_value(value) - normalized_column = column_name.lower() - normalized_table = table_name.lower() - return ( - (normalized_table, normalized_column, normalized_value) in distinct_cache - or ("*", normalized_column, normalized_value) in distinct_cache - or (normalized_table, normalized_column) in distinct_cache - or ("*", normalized_column) in distinct_cache - ) - - -def is_text_type(data_type: str) -> bool: - """Treat common string-like warehouse types as requiring distinct-value lookup.""" - return any(token in data_type for token in ["char", "text", "string", "varchar"]) - - -def record_validated_distinct_values( - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - table_name: str, - column_name: str, - values: Sequence[Any], -) -> None: - """Persist exact validated filter values for the current session.""" - normalized_table = table_name.lower() - normalized_column = column_name.lower() - distinct_cache = execution_context["distinct_cache"] - for value in values: - normalized_value = normalize_distinct_value(value) - distinct_cache.add((normalized_table, normalized_column, normalized_value)) - distinct_cache.add(("*", normalized_column, normalized_value)) - persist_session_distinct_cache(state, distinct_cache) - - -def record_validated_filters_from_sql( - *, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], - sql: str, -) -> None: - """Seed exact validated filter values from a successful SQL statement.""" - table_refs = sql_table_references(sql) - if not table_refs: - return - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return - - query_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - schema_cache = dict(execution_context.get("schema_cache") or {}) - values_by_target: dict[tuple[str, str], list[str]] = {} - for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_refs=table_refs, - schema_cache=schema_cache, - ) - if resolved_table is None and qualifier is None: - if schema_cache: - matching = tables_with_column(normalized_column, query_tables, schema_cache) - if len(matching) == 1: - resolved_table = matching[0] - elif len(query_tables) == 1: - resolved_table = query_tables[0] - values_by_target.setdefault((resolved_table or "*", normalized_column), []).append(value) - - for (tbl, col), vals in values_by_target.items(): - record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=tbl, - column_name=col, - values=vals, - ) - - -def seed_distinct_cache_from_previous_sql( - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> None: - """Treat text filters from the previous successful SQL as already validated for follow-ups.""" - previous_sql = state["conversation_context"].last_sql_query - if not previous_sql: - return - record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=previous_sql, - ) - - -# --------------------------------------------------------------------------- -# dbt index helper -# --------------------------------------------------------------------------- - - -def dbt_resources_by_unique_id(state: DashboardChatRuntimeState) -> dict[str, dict[str, Any]]: - """Return the allowlisted dbt index built at session start.""" - dbt_index = state.get("dbt_index") or {} - return dict(dbt_index.get("resources_by_unique_id") or {}) diff --git a/ddpui/core/dashboard_chat/orchestration/tools/handlers.py b/ddpui/core/dashboard_chat/orchestration/tools/handlers.py deleted file mode 100644 index f2ce713f5..000000000 --- a/ddpui/core/dashboard_chat/orchestration/tools/handlers.py +++ /dev/null @@ -1,806 +0,0 @@ -"""Tool handler implementations for dashboard chat.""" - -import json -import re -from typing import Any - -from django.core.serializers.json import DjangoJSONEncoder - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist -from ddpui.utils.custom_logger import CustomLogger -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent -from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType -from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard - -from ddpui.core.dashboard_chat.orchestration.conversation import ( - extract_requested_follow_up_dimension, -) -from ddpui.core.dashboard_chat.orchestration.retrieval import ( - retrieve_vector_documents, - filter_allowlisted_dbt_results, - dedupe_retrieved_documents, - build_tool_document_payload, - get_cached_query_embedding, -) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatRuntimeState -from ddpui.core.dashboard_chat.orchestration.tools.cache import ( - get_turn_warehouse_tools, - get_cached_schema_snippets, - has_validated_distinct_value, - is_text_type, - record_validated_distinct_values, - record_validated_filters_from_sql, - dbt_resources_by_unique_id, -) -from ddpui.core.dashboard_chat.orchestration.tools.sql_parsing import ( - table_references, - resolve_identifier_table, - tables_with_column, - extract_text_filter_values, - find_tables_with_column, - primary_table_name, - referenced_sql_identifier_refs, - resolve_table_qualifier, - best_table_for_missing_columns, - structural_dimensions_from_sql, - normalize_dimension_name, -) - -logger = CustomLogger("dashboard_chat") - - -# --------------------------------------------------------------------------- -# Tool handlers -# --------------------------------------------------------------------------- - - -def handle_retrieve_docs_tool( - vector_store, - source_config, - runtime_config, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Retrieve current-dashboard, org, and dbt context using the prototype tool contract.""" - query = str(args.get("query") or state["user_query"]).strip() - limit = max(1, min(int(args.get("limit", 8)), 20)) - requested_types = [ - str(doc_type) - for doc_type in (args.get("types") or ["chart", "dataset", "context", "dbt_model"]) - ] - retrieved_documents = [] - cached_embedding = get_cached_query_embedding( - vector_store, query, execution_context["embedding_cache"] - ) - - if "chart" in requested_types: - retrieved_documents.extend( - retrieve_vector_documents( - vector_store, - runtime_config, - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=source_config.filter_enabled( - [DashboardChatSourceType.DASHBOARD_EXPORT] - ), - dashboard_id=state["dashboard_id"], - query_embedding=cached_embedding, - ) - ) - if "context" in requested_types: - retrieved_documents.extend( - retrieve_vector_documents( - vector_store, - runtime_config, - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=source_config.filter_enabled( - [DashboardChatSourceType.DASHBOARD_CONTEXT] - ), - dashboard_id=state["dashboard_id"], - query_embedding=cached_embedding, - ) - ) - retrieved_documents.extend( - retrieve_vector_documents( - vector_store, - runtime_config, - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=source_config.filter_enabled([DashboardChatSourceType.ORG_CONTEXT]), - query_embedding=cached_embedding, - ) - ) - if "dataset" in requested_types or "dbt_model" in requested_types: - dbt_results = retrieve_vector_documents( - vector_store, - runtime_config, - org=state["org"], - collection_name=state.get("vector_collection_name"), - query_text=query, - source_types=source_config.filter_enabled( - [ - DashboardChatSourceType.DBT_MANIFEST, - DashboardChatSourceType.DBT_CATALOG, - ] - ), - query_embedding=cached_embedding, - ) - retrieved_documents.extend(filter_allowlisted_dbt_results(dbt_results, state["allowlist"])) - - merged_results = dedupe_retrieved_documents(retrieved_documents)[:limit] - for document in merged_results: - if document.document_id in execution_context["retrieved_document_ids"]: - continue - execution_context["retrieved_document_ids"].add(document.document_id) - execution_context["retrieved_documents"].append(document) - - docs = [ - build_tool_document_payload(document, state["allowlist"], state["dashboard_export"]) - for document in merged_results - ] - return {"docs": docs, "count": len(docs)} - - -def handle_get_schema_snippets_tool( - warehouse_tools_factory, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Return schema snippets for allowlisted tables only.""" - requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] - allowed_tables = [ - table_name for table_name in requested_tables if state["allowlist"].is_allowed(table_name) - ] - filtered_tables = sorted(set(requested_tables) - set(allowed_tables)) - schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=allowed_tables, - ) - tables_payload = [ - {"table": table_name, "columns": snippet.columns} - for table_name, snippet in schema_cache.items() - if table_name in allowed_tables - ] - response: dict[str, Any] = {"tables": tables_payload} - if filtered_tables: - response["filtered_tables"] = filtered_tables - response[ - "filter_note" - ] = f"{len(filtered_tables)} tables were filtered out because they are not used by the current dashboard." - return response - - -def handle_search_dbt_models_tool( - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Search allowlisted dbt nodes by name, description, and column metadata.""" - query = str(args.get("query") or "").strip().lower() - limit = max(1, min(int(args.get("limit", 8)), 20)) - if not query: - return {"models": [], "count": 0} - - results: list[dict[str, Any]] = [] - for node in dbt_resources_by_unique_id(state).values(): - table_name = node.get("table") - haystacks = [ - str(node.get("name") or ""), - str(node.get("description") or ""), - str(table_name or ""), - ] - for column in node.get("columns") or []: - haystacks.append(str(column.get("name") or "")) - haystacks.append(str(column.get("description") or "")) - if query not in " ".join(haystacks).lower(): - continue - results.append( - { - "name": str(node.get("name") or ""), - "schema": str(node.get("schema") or ""), - "database": str(node.get("database") or ""), - "description": str(node.get("description") or ""), - "columns": [ - str(column.get("name") or "") for column in (node.get("columns") or []) - ][:20], - "table": table_name, - } - ) - if len(results) >= limit: - break - - return {"models": results, "count": len(results)} - - -def handle_get_dbt_model_info_tool( - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Return one dbt model's description, columns, and lineage.""" - model_name = str(args.get("model_name") or "").strip().lower() - if not model_name: - return {"error": "model_name is required"} - - matched_unique_id: str | None = None - matched_node: dict[str, Any] | None = None - for unique_id, node in dbt_resources_by_unique_id(state).items(): - table_name = node.get("table") - candidates = { - str(node.get("name") or "").lower(), - str(table_name or "").lower(), - } - if model_name not in candidates: - continue - matched_unique_id = unique_id - matched_node = node - break - - if matched_unique_id is None or matched_node is None: - return {"error": f"Model not found: {model_name}"} - - return { - "model": str(matched_node.get("name") or ""), - "schema": str(matched_node.get("schema") or ""), - "database": str(matched_node.get("database") or ""), - "description": str(matched_node.get("description") or ""), - "columns": list(matched_node.get("columns") or [])[:50], - "upstream": list(matched_node.get("upstream") or []), - "downstream": list(matched_node.get("downstream") or []), - } - - -def handle_get_distinct_values_tool( - warehouse_tools_factory, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Return distinct values and persist validated filter values for the session.""" - table_name = str(args.get("table") or "").lower() - column_name = str(args.get("column") or "") - limit = max(1, min(int(args.get("limit", 50)), 200)) - if not state["allowlist"].is_allowed(table_name): - return { - "error": "table_not_allowed", - "table": table_name, - "message": (f"Table {table_name} is not accessible in the current dashboard context."), - } - - schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - snippet = schema_cache.get(table_name) - normalized_column_name = column_name.lower() - if snippet is not None and normalized_column_name not in { - str(column.get("name") or "").lower() for column in snippet.columns - }: - candidates = find_tables_with_column(normalized_column_name, schema_cache) - return { - "error": "column_not_in_table", - "table": table_name, - "column": column_name, - "candidates": candidates, - "message": ( - f"Column {column_name} is not available on {table_name}. " - "Use a table that contains it, inspect that schema, and retry the lookup." - ), - } - - values = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).get_distinct_values( - table_name=table_name, - column_name=column_name, - limit=limit, - ) - record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name=table_name, - column_name=column_name, - values=values, - ) - return { - "table": table_name, - "column": column_name, - "values": values, - "count": len(values), - } - - -def handle_list_tables_by_keyword_tool( - warehouse_tools_factory, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Search allowlisted tables by table name or column name.""" - keyword = str(args.get("keyword") or "").strip().lower() - limit = max(1, min(int(args.get("limit", 15)), 50)) - if not keyword: - return {"tables": []} - - allowlist_tables_source = state["allowlist"].prioritized_tables() or sorted( - state["allowlist"].allowed_tables - ) - allowlisted_tables = list( - dict.fromkeys(table_name.lower() for table_name in allowlist_tables_source) - ) - direct_match_tables = [ - table_name - for table_name in allowlisted_tables - if keyword in table_name or keyword in table_name.rsplit(".", 1)[-1] - ] - - schema_cache: dict[str, Any] = {} - lookup_tables = direct_match_tables or allowlisted_tables - if lookup_tables: - try: - schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=lookup_tables, - ) - except Exception as error: - logger.warning("Dashboard chat keyword table lookup fell back to names only: %s", error) - execution_context["warnings"].append(str(error)) - - matches: list[dict[str, Any]] = [] - seen_tables: set[str] = set() - - for table_name in direct_match_tables: - column_names = [ - str(column.get("name") or "") - for column in getattr(schema_cache.get(table_name), "columns", []) - ] - matches.append({"table": table_name, "columns": column_names[:40]}) - seen_tables.add(table_name) - if len(matches) >= limit: - break - - for table_name, snippet in schema_cache.items(): - if table_name in seen_tables: - continue - column_names = [str(column.get("name") or "") for column in snippet.columns] - if not any(keyword in column_name.lower() for column_name in column_names): - continue - matches.append({"table": table_name, "columns": column_names[:40]}) - if len(matches) >= limit: - break - - if matches: - return { - "tables": matches, - "hint": ( - f"Found {len(matches)} allowlisted tables. Check schema before assuming table structure." - ), - } - return { - "tables": [], - "hint": ( - f"No allowlisted tables matched '{keyword}'. Try a broader keyword or retrieve chart docs first." - ), - } - - -def handle_check_table_row_count_tool( - warehouse_tools_factory, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Count rows in one allowlisted table.""" - table_name = str(args.get("table") or "").lower() - if not state["allowlist"].is_allowed(table_name): - return { - "error": "table_not_allowed", - "table": table_name, - "message": (f"Table {table_name} is not accessible in the current dashboard context."), - } - - sql = f"SELECT COUNT(*) AS row_count FROM {table_name} LIMIT 1" - validation = DashboardChatSqlGuard( - allowlist=state["allowlist"], - max_rows=1, - ).validate(sql) - if not validation.is_valid or not validation.sanitized_sql: - return {"error": "sql_validation_failed", "issues": validation.errors} - - rows = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).execute_sql(validation.sanitized_sql) - row_count = 0 - if rows: - row_count = int(rows[0].get("row_count") or 0) - return {"table": table_name, "row_count": row_count, "has_data": row_count > 0} - - -def handle_run_sql_query_tool( - warehouse_tools_factory, - runtime_config, - args: dict[str, Any], - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any]: - """Validate SQL like the prototype and let the tool loop self-correct on failures.""" - sql = str(args.get("sql") or "").strip() - if not sql: - return {"error": "sql_missing", "message": "SQL is required"} - - allowlist_validation = _validate_sql_allowlist(sql, state["allowlist"]) - if not allowlist_validation["valid"]: - return { - "error": "table_not_allowed", - "invalid_tables": allowlist_validation["invalid_tables"], - "message": allowlist_validation["message"], - } - - follow_up_dimension_validation = _validate_follow_up_dimension_usage( - warehouse_tools_factory, - sql=sql, - state=state, - execution_context=execution_context, - ) - if follow_up_dimension_validation is not None: - return follow_up_dimension_validation - - missing_distinct = _check_missing_distinct( - warehouse_tools_factory, sql, state, execution_context - ) - if missing_distinct: - return { - "error": "must_fetch_distinct_values", - "missing": missing_distinct, - "message": ( - "Call get_distinct_values for these columns, then regenerate the SQL using one of the returned values." - ), - } - - validation = DashboardChatSqlGuard( - allowlist=state["allowlist"], - max_rows=runtime_config.max_query_rows, - ).validate(sql) - execution_context["last_sql_validation"] = validation - if not validation.is_valid or not validation.sanitized_sql: - return { - "error": "sql_validation_failed", - "issues": validation.errors, - "warnings": validation.warnings, - } - - missing_columns = _missing_columns_in_primary_table( - warehouse_tools_factory, - sql=validation.sanitized_sql, - state=state, - execution_context=execution_context, - ) - if missing_columns is not None: - return missing_columns - - execution_context["last_sql"] = validation.sanitized_sql - try: - rows = get_turn_warehouse_tools( - warehouse_tools_factory, - execution_context, - state["org"], - ).execute_sql(validation.sanitized_sql) - except Exception as error: - structured_error = _structured_sql_execution_error( - warehouse_tools_factory, - sql=validation.sanitized_sql, - error=error, - state=state, - execution_context=execution_context, - ) - if structured_error is not None: - return structured_error - return { - "success": False, - "error": str(error), - "sql_used": validation.sanitized_sql, - } - - serialized_rows = json.loads(json.dumps(rows, cls=DjangoJSONEncoder)) - execution_context["last_sql_results"] = serialized_rows - record_validated_filters_from_sql( - state=state, - execution_context=execution_context, - sql=validation.sanitized_sql, - ) - return { - "success": True, - "row_count": len(serialized_rows), - "error": None, - "sql_used": validation.sanitized_sql, - "columns": list(serialized_rows[0].keys()) if serialized_rows else [], - "rows": serialized_rows, - } - - -# --------------------------------------------------------------------------- -# SQL execution (run_sql_query tool handler helpers) -# --------------------------------------------------------------------------- - - -def _validate_sql_allowlist( - sql: str, - allowlist: DashboardChatAllowlist, -) -> dict[str, Any]: - """Validate that all referenced tables are in the dashboard allowlist.""" - referenced_tables = DashboardChatSqlGuard._extract_table_names(sql) - invalid_tables = [ - table_name for table_name in referenced_tables if not allowlist.is_allowed(table_name) - ] - if invalid_tables: - return { - "valid": False, - "invalid_tables": invalid_tables, - "message": ( - "SQL references tables not available in the current dashboard: " - + ", ".join(invalid_tables) - + ". Use list_tables_by_keyword to find allowed tables." - ), - } - return {"valid": True, "invalid_tables": [], "message": ""} - - -def _missing_columns_in_primary_table( - warehouse_tools_factory, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Return a corrective tool error when SQL references columns absent from the referenced query tables.""" - table_refs = table_references(sql) - referenced_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - if not referenced_tables: - return None - - schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=referenced_tables, - ) - all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - missing_columns_by_table: dict[str, set[str]] = {} - candidate_tables_by_column: dict[str, list[str]] = {} - tables_in_query = list(dict.fromkeys(referenced_tables)) - - for qualifier, column_name in referenced_sql_identifier_refs(sql): - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=column_name, - table_refs=table_refs, - schema_cache=schema_cache, - ) - if resolved_table is not None: - continue - - if qualifier is not None: - target_table = ( - resolve_table_qualifier(qualifier, table_refs) - or primary_table_name(sql) - or tables_in_query[0] - ) - else: - matching_tables = tables_with_column(column_name, tables_in_query, schema_cache) - if len(matching_tables) > 1: - continue - target_table = primary_table_name(sql) or tables_in_query[0] - - missing_columns_by_table.setdefault(target_table, set()).add(column_name) - candidate_tables_by_column[column_name] = find_tables_with_column( - column_name, - all_schema_cache, - ) - - missing_columns = sorted( - {column_name for columns in missing_columns_by_table.values() for column_name in columns} - ) - if not missing_columns: - return None - - primary = primary_table_name(sql) or tables_in_query[0] - target_table = ( - next(iter(missing_columns_by_table)) if len(missing_columns_by_table) == 1 else primary - ) - best_table = best_table_for_missing_columns(missing_columns, all_schema_cache) - message = ( - f"Column(s) {', '.join(missing_columns)} do not exist on {target_table}. " - "Use a table that contains the requested dimension or measure, and rewrite the SQL using columns from that table." - ) - if best_table: - message += f" Best candidate table: {best_table}." - result = { - "error": "column_not_in_table", - "table": target_table, - "missing_columns": missing_columns, - "candidate_tables": candidate_tables_by_column, - "best_table": best_table, - "message": message, - } - if len(missing_columns) == 1: - column_name = missing_columns[0] - result["column"] = column_name - result["candidates"] = candidate_tables_by_column.get(column_name, []) - return result - - -def _structured_sql_execution_error( - warehouse_tools_factory, - *, - sql: str, - error: Exception, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Convert warehouse execution errors into prototype-style corrective feedback when possible.""" - error_text = str(error) - missing_column_match = re.search( - r'column "(?:[\w]+\.)?([^"]+)" does not exist', - error_text, - flags=re.IGNORECASE, - ) - if missing_column_match: - missing_column = missing_column_match.group(1).lower() - schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - candidate_tables = find_tables_with_column(missing_column, schema_cache) - return { - "error": "column_not_in_table", - "table": primary_table_name(sql), - "column": missing_column, - "missing_columns": [missing_column], - "candidates": candidate_tables, - "candidate_tables": {missing_column: candidate_tables}, - "best_table": candidate_tables[0] if candidate_tables else None, - "message": ( - f"Column {missing_column} is not available on the current table. " - "Pick a table that contains it, inspect that schema, and rewrite the SQL using that table's real columns." - ), - "sql_used": sql, - } - return None - - -def _validate_follow_up_dimension_usage( - warehouse_tools_factory, - *, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> dict[str, Any] | None: - """Keep add-dimension follow-ups from succeeding without actually changing query granularity.""" - intent_decision = state["intent_decision"] - if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: - return None - if intent_decision.follow_up_context.follow_up_type != "add_dimension": - return None - - requested_dimension = extract_requested_follow_up_dimension( - intent_decision.follow_up_context.modification_instruction or state["user_query"] - ) - if not requested_dimension: - return None - - previous_sql = state["conversation_context"].last_sql_query or "" - current_dimensions = structural_dimensions_from_sql(sql) - previous_dimensions = structural_dimensions_from_sql(previous_sql) - normalized_requested_dimension = normalize_dimension_name(requested_dimension) - if ( - normalized_requested_dimension in current_dimensions - and normalized_requested_dimension not in previous_dimensions - ): - return None - - candidate_tables = find_tables_with_column( - requested_dimension, - get_cached_schema_snippets(warehouse_tools_factory, state, execution_context), - ) - return { - "error": "requested_dimension_missing", - "requested_dimension": requested_dimension, - "previous_dimensions": sorted(previous_dimensions), - "current_dimensions": sorted(current_dimensions), - "candidate_tables": candidate_tables, - "message": ( - f"The follow-up asked to split by '{requested_dimension}', but the SQL does not use that column. " - "Use the requested dimension exactly, or pick a table that contains it." - ), - } - - -def _check_missing_distinct( - warehouse_tools_factory, - sql: str, - state: DashboardChatRuntimeState, - execution_context: dict[str, Any], -) -> list[dict[str, Any]]: - """Detect text filters that require a prior distinct-values call.""" - where_match = re.search( - r"\bWHERE\s+(.+?)(?:\bGROUP\b|\bORDER\b|\bLIMIT\b|$)", - sql, - flags=re.IGNORECASE | re.DOTALL, - ) - if not where_match: - return [] - - table_refs = table_references(sql) - query_tables = [ - reference["table_name"] for reference in table_refs if reference.get("table_name") - ] - if not query_tables: - return [] - primary = primary_table_name(sql) or query_tables[0] - - full_schema_cache = get_cached_schema_snippets( - warehouse_tools_factory, - state, - execution_context, - tables=query_tables, - ) - all_schema_cache = get_cached_schema_snippets(warehouse_tools_factory, state, execution_context) - - column_types = { - table_name: { - str(column.get("name") or "") - .lower(): str(column.get("data_type") or column.get("type") or "") - .lower() - for column in getattr(snippet, "columns", []) - } - for table_name, snippet in full_schema_cache.items() - } - missing: list[dict[str, Any]] = [] - for qualifier, column_name, value in extract_text_filter_values(where_match.group(1)): - normalized_column = column_name.lower() - resolved_table = resolve_identifier_table( - qualifier=qualifier, - column_name=normalized_column, - table_refs=table_refs, - schema_cache=full_schema_cache, - ) - if resolved_table is None and qualifier is None: - matching_tables = tables_with_column(normalized_column, query_tables, full_schema_cache) - if len(matching_tables) > 1: - continue - if resolved_table is None: - candidate_tables = find_tables_with_column(normalized_column, all_schema_cache) - if qualifier is None and candidate_tables: - continue - missing.append( - { - "table": primary, - "column": column_name, - "error": "column_not_in_table", - "candidates": candidate_tables, - } - ) - continue - data_type = column_types.get(resolved_table, {}).get(normalized_column, "") - if not data_type: - continue - if not is_text_type(data_type): - continue - if not has_validated_distinct_value( - execution_context["distinct_cache"], - table_name=resolved_table, - column_name=normalized_column, - value=value, - ): - missing.append({"table": resolved_table, "column": column_name, "value": value}) - return missing diff --git a/ddpui/core/dashboard_chat/sessions/cache.py b/ddpui/core/dashboard_chat/sessions/cache.py deleted file mode 100644 index ea9300e26..000000000 --- a/ddpui/core/dashboard_chat/sessions/cache.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Session-scoped cache helpers for dashboard chat runtime snapshots.""" - -from typing import Any - -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet - -DASHBOARD_CHAT_SESSION_CACHE_TTL_SECONDS = 24 * 60 * 60 - - -def build_dashboard_chat_session_snapshot_cache_key(session_id: str) -> str: - """Return the cache key used for one chat session's runtime snapshot.""" - return f"dashboard_chat:session_snapshot:{session_id}" - - -def serialize_allowlist(allowlist: DashboardChatAllowlist) -> dict[str, Any]: - """Convert an allowlist to a cache-safe dictionary payload.""" - return { - "chart_tables": sorted(allowlist.chart_tables), - "upstream_tables": sorted(allowlist.upstream_tables), - "allowed_tables": sorted(allowlist.allowed_tables), - "allowed_unique_ids": sorted(allowlist.allowed_unique_ids), - "unique_id_to_table": dict(allowlist.unique_id_to_table), - "table_to_unique_ids": { - table_name: sorted(unique_ids) - for table_name, unique_ids in allowlist.table_to_unique_ids.items() - }, - } - - -def deserialize_allowlist(payload: dict[str, Any] | None) -> DashboardChatAllowlist: - """Rebuild an allowlist from cached data.""" - payload = payload or {} - return DashboardChatAllowlist( - chart_tables=set(payload.get("chart_tables") or []), - upstream_tables=set(payload.get("upstream_tables") or []), - allowed_tables=set(payload.get("allowed_tables") or []), - allowed_unique_ids=set(payload.get("allowed_unique_ids") or []), - unique_id_to_table=dict(payload.get("unique_id_to_table") or {}), - table_to_unique_ids={ - table_name: set(unique_ids) - for table_name, unique_ids in (payload.get("table_to_unique_ids") or {}).items() - }, - ) - - -def serialize_schema_snippets( - snippets: dict[str, DashboardChatSchemaSnippet], -) -> dict[str, Any]: - """Convert schema snippets to a cache-safe dictionary payload.""" - return { - table_name: { - "table_name": snippet.table_name, - "columns": list(snippet.columns), - } - for table_name, snippet in snippets.items() - } - - -def deserialize_schema_snippets( - payload: dict[str, Any] | None, -) -> dict[str, DashboardChatSchemaSnippet]: - """Rebuild schema snippets from cached data.""" - snippets: dict[str, DashboardChatSchemaSnippet] = {} - for table_name, snippet_payload in (payload or {}).items(): - snippets[table_name.lower()] = DashboardChatSchemaSnippet( - table_name=str(snippet_payload.get("table_name") or table_name), - columns=list(snippet_payload.get("columns") or []), - ) - return snippets - - -def serialize_distinct_cache( - distinct_cache: set[tuple[str, str, str]], -) -> dict[str, Any]: - """Convert validated distinct values to a cache-safe nested payload.""" - serialized: dict[str, dict[str, list[str]]] = {} - for table_name, column_name, value in distinct_cache: - serialized.setdefault(table_name, {}).setdefault(column_name, []).append(value) - - return { - table_name: { - column_name: sorted(set(values)) - for column_name, values in column_map.items() - } - for table_name, column_map in serialized.items() - } - - -def deserialize_distinct_cache( - payload: dict[str, Any] | None, -) -> set[tuple[str, str, str]]: - """Rebuild validated distinct values from cached data.""" - distinct_cache: set[tuple[str, str, str]] = set() - for table_name, column_map in (payload or {}).items(): - for column_name, values in (column_map or {}).items(): - for value in values or []: - distinct_cache.add( - (str(table_name).lower(), str(column_name).lower(), str(value)) - ) - return distinct_cache diff --git a/ddpui/core/dashboard_chat/sessions/service.py b/ddpui/core/dashboard_chat/sessions/session_service.py similarity index 99% rename from ddpui/core/dashboard_chat/sessions/service.py rename to ddpui/core/dashboard_chat/sessions/session_service.py index a51c03da0..2dcbdc5fb 100644 --- a/ddpui/core/dashboard_chat/sessions/service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -9,7 +9,7 @@ from django.utils import timezone from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector.documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.vector.vector_documents import build_dashboard_chat_collection_name from ddpui.core.dashboard_chat.contracts import DashboardChatConversationMessage from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import ( diff --git a/ddpui/core/dashboard_chat/vector/ingest.py b/ddpui/core/dashboard_chat/vector/org_vector_context_build_service.py similarity index 99% rename from ddpui/core/dashboard_chat/vector/ingest.py rename to ddpui/core/dashboard_chat/vector/org_vector_context_build_service.py index 7fd13fa48..10c4fe23d 100644 --- a/ddpui/core/dashboard_chat/vector/ingest.py +++ b/ddpui/core/dashboard_chat/vector/org_vector_context_build_service.py @@ -9,15 +9,15 @@ from django.utils import timezone from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.context.dbt_docs import ( +from ddpui.core.dashboard_chat.context.dbt_docs_artifacts import ( DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) -from ddpui.core.dashboard_chat.vector.documents import ( +from ddpui.core.dashboard_chat.vector.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, ) -from ddpui.core.dashboard_chat.vector.store import OrgVectorStore +from ddpui.core.dashboard_chat.vector.org_vector_store import OrgVectorStore from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardAIContext, DashboardChatSession, OrgAIContext from ddpui.models.org import Org diff --git a/ddpui/core/dashboard_chat/vector/store.py b/ddpui/core/dashboard_chat/vector/org_vector_store.py similarity index 99% rename from ddpui/core/dashboard_chat/vector/store.py rename to ddpui/core/dashboard_chat/vector/org_vector_store.py index b6d85ffff..263b2172d 100644 --- a/ddpui/core/dashboard_chat/vector/store.py +++ b/ddpui/core/dashboard_chat/vector/org_vector_store.py @@ -8,7 +8,7 @@ from openai import OpenAI from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector.documents import ( +from ddpui.core.dashboard_chat.vector.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, build_dashboard_chat_collection_base_name, diff --git a/ddpui/core/dashboard_chat/vector/documents.py b/ddpui/core/dashboard_chat/vector/vector_documents.py similarity index 100% rename from ddpui/core/dashboard_chat/vector/documents.py rename to ddpui/core/dashboard_chat/vector/vector_documents.py diff --git a/ddpui/core/dashboard_chat/warehouse/sql_guard.py b/ddpui/core/dashboard_chat/warehouse/sql_guard.py index 9681b3b96..cf121a024 100644 --- a/ddpui/core/dashboard_chat/warehouse/sql_guard.py +++ b/ddpui/core/dashboard_chat/warehouse/sql_guard.py @@ -4,7 +4,7 @@ import sqlparse -from ddpui.core.dashboard_chat.context.allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.contracts import DashboardChatSqlValidationResult FORBIDDEN_SQL_KEYWORDS = { diff --git a/ddpui/core/dashboard_chat/warehouse/tools.py b/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py similarity index 71% rename from ddpui/core/dashboard_chat/warehouse/tools.py rename to ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py index 5b8eb5701..10f529ed3 100644 --- a/ddpui/core/dashboard_chat/warehouse/tools.py +++ b/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py @@ -1,11 +1,13 @@ """Warehouse access helpers used by dashboard chat runtime.""" +import json import re from typing import Any from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse from ddpui.utils.custom_logger import CustomLogger +from ddpui.utils import secretsmanager from ddpui.utils.warehouse.client.warehouse_factory import WarehouseFactory from ddpui.utils.warehouse.client.warehouse_interface import Warehouse @@ -96,6 +98,40 @@ def execute_sql(self, sql: str) -> list[dict[str, Any]]: rows = self.warehouse_client.execute(sql) return list(rows[: self.max_rows]) + def _quote_bigquery_table_ref(self, schema_name: str, table_name: str) -> str: + """Return a quoted BigQuery table ref using the configured project id.""" + project_id = self._resolve_bigquery_project_id() + normalized_project_id = self._normalize_identifier_component(project_id, "project id") + normalized_schema_name = self._normalize_identifier_component(schema_name, "schema name") + normalized_table_name = self._normalize_identifier_component(table_name, "table name") + return ( + f"`{normalized_project_id}.{normalized_schema_name}.{normalized_table_name}`" + ) + + def _resolve_bigquery_project_id(self) -> str: + """Resolve the BigQuery project id from stored warehouse credentials.""" + if self.org_warehouse is None: + raise DashboardChatWarehouseToolsError("Warehouse not configured for dashboard chat") + credentials = secretsmanager.retrieve_warehouse_credentials(self.org_warehouse) or {} + project_id = credentials.get("project_id") + if not project_id: + credentials_json = credentials.get("credentials_json") + if credentials_json: + try: + parsed_credentials = ( + json.loads(credentials_json) + if isinstance(credentials_json, str) + else dict(credentials_json) + ) + except Exception as error: + raise DashboardChatWarehouseToolsError( + "Failed to parse BigQuery credentials JSON" + ) from error + project_id = parsed_credentials.get("project_id") + if not project_id: + raise DashboardChatWarehouseToolsError("BigQuery project id not configured") + return str(project_id) + @staticmethod def _parse_table_name(table_name: str | None) -> tuple[str, str] | None: """Parse schema.table into separate pieces.""" diff --git a/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py b/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py new file mode 100644 index 000000000..f94b69542 --- /dev/null +++ b/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py @@ -0,0 +1,157 @@ +# Generated by Django 4.2 on 2026-03-27 19:00 + +from django.db import migrations + + +INTENT_CLASSIFICATION_PROMPT = """# Enhanced Intent Classification System Prompt + +You are an intent classification agent for a "Chat with Dashboards" system. Your job is to classify user queries about the CURRENT dashboard, its charts, its datasets, the dbt models that power it, and the organization/dashboard context attached to it. Questions about other dashboards, similar dashboards, or dashboards beyond the current one are **irrelevant**. + +## Intent Categories + +1. **query_with_sql** - Needs data analysis (numbers, trends, rankings, breakdowns, comparisons) +2. **query_without_sql** - Can be answered from metadata (definitions, calculation logic, chart explanations) +3. **follow_up_sql** - Follow-up query that modifies previous SQL query (add dimension, filter, timeframe) +4. **follow_up_context** - Follow-up requesting more explanation about previous results +5. **needs_clarification** - Question is too vague or ambiguous +6. **small_talk** - Greetings, jokes, non-business conversation +7. **irrelevant** - Questions outside the current dashboard's scope, including requests about other dashboards + +## Classification Guidelines + +**query_with_sql** examples: +- "How many students are in the EcoChamps program?" +- "Show me session completion trends over time" +- "Top 10 schools by assessment performance" +- "Compare reading comprehension by city" +- "What's the monthly breakdown of planned vs conducted sessions?" + +**query_without_sql** examples: +- "What does 'planned_session' mean?" +- "How is reading comprehension calculated?" +- "Which dataset powers the student count chart?" +- "What metrics are available in this dashboard?" +- "Explain what this chart shows" +- "What is the mission and vision of Bhumi?" +- "Summarize the Bhumi programs described in the context file" + +**follow_up_sql** examples (requires previous SQL context): +- "Now split by chapter" (add dimension) +- "Filter to CGI donors only" (add filter) +- "Same but for last quarter" (modify timeframe) +- "Show weekly instead" (change aggregation) +- "Which districts are these facilitators from?" (use the facilitators returned in the previous result) +- "Which programs are those students in?" (expand the previously returned entity set with a new dimension) +- "Which states are they from?" (resolve the pronoun from the immediately previous result set) + +**follow_up_context** examples (requires previous context): +- "Explain that metric" +- "How is that calculated?" +- "What does that mean?" +- "Tell me more about that" + +**needs_clarification** examples: +- "Is performance improving?" (missing: which metric, time period) +- "Show me the data" (missing: which data, program) +- "What's the biggest issue?" (missing: context, metric) + +## Follow-up Detection + +When conversation history is available, classify as follow-up **only if the new query depends on the previous turn**. Use all three tests: +1. Explicit reference to prior output ("that", "same", "those results", "the previous query"). +2. Modification language applied to prior query ("now split by", "filter that", "same but", "add chapter", "remove donor"). +3. Explanations about prior output ("explain that", "what does that mean"). + +If the question can stand alone and be answered without previous context, treat it as a new `query_with_sql` or `query_without_sql`, **not** follow_up_sql/follow_up_context. + +If so, classify as follow_up_sql or follow_up_context based on whether SQL modification is needed. + +## Current-Dashboard Boundary + +- Treat requests about "other dashboards", "related dashboards", "similar dashboards", or "which dashboard should I look at" as **irrelevant**. +- Treat requests that compare this dashboard to some other dashboard as **irrelevant** unless the question can be answered entirely from the current dashboard's own data and context. +- The assistant is scoped to one dashboard only. + +## Output Format + +Respond with valid JSON only: + +For new queries: +```json +{ + "intent": "query_with_sql", + "confidence": 0.9, + "reason": "User is asking for specific numbers requiring data analysis", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": false, + "follow_up_type": null, + "reusable_elements": {}, + "modification_instruction": null + } +} +``` + +For follow-up queries: +```json +{ + "intent": "follow_up_sql", + "confidence": 0.95, + "reason": "User wants to modify previous query by adding dimension", + "force_tool_usage": true, + "follow_up_context": { + "is_follow_up": true, + "follow_up_type": "add_dimension", + "reusable_elements": { + "previous_sql": "from conversation context", + "previous_tables": ["staging.eco_student25_26_stg"], + "add_instruction": "group by chapter" + }, + "modification_instruction": "split by chapter" + } +} +``` + +## Tool Usage Rules + +Set `force_tool_usage: true` for: +- All query_with_sql intents +- All follow_up_sql intents +- query_without_sql when specific chart/dataset lookup needed + +Set `force_tool_usage: false` for: +- small_talk, needs_clarification, irrelevant +- query_without_sql for general explanation questions + +## Context Awareness + +Use conversation history to: +- Detect follow-up patterns +- Understand context references ("that metric", "same query") +- Resolve referential follow-ups that point to the immediately previous result set + ("these facilitators", "those students", "they", "them", "that result") +- Determine if SQL modification or explanation is needed +- Extract reusable elements (tables, metrics, filters) from previous queries + +Classify the following user query:""" + + +def update_intent_classification_prompt(apps, schema_editor): + DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") + DashboardChatPromptTemplate.objects.update_or_create( + key="intent_classification", + defaults={"prompt": INTENT_CLASSIFICATION_PROMPT}, + ) + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0158_dashboardchatmessage_response_latency_ms_and_more"), + ] + + operations = [ + migrations.RunPython( + update_intent_classification_prompt, + migrations.RunPython.noop, + ), + ] diff --git a/ddpui/schemas/notifications_api_schemas.py b/ddpui/schemas/notifications_api_schemas.py index 1c6ffd723..8162cba6b 100644 --- a/ddpui/schemas/notifications_api_schemas.py +++ b/ddpui/schemas/notifications_api_schemas.py @@ -1,6 +1,6 @@ from typing import List, Optional from datetime import datetime -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from enum import Enum from ninja import Schema @@ -19,6 +19,8 @@ class SentToEnum(str, Enum): class CreateNotificationPayloadSchema(BaseModel): """Schema for creating a new notification api.""" + model_config = ConfigDict(use_enum_values=True) + author: str message: str sent_to: SentToEnum @@ -28,9 +30,6 @@ class CreateNotificationPayloadSchema(BaseModel): manager_or_above: Optional[bool] = False org_slug: Optional[str] = None - class Config: - use_enum_values = True - class UpdateReadStatusSchema(Schema): """Schema for updating the read status of a notification.""" diff --git a/ddpui/schemas/org_schema.py b/ddpui/schemas/org_schema.py index 02126ac89..4321687af 100644 --- a/ddpui/schemas/org_schema.py +++ b/ddpui/schemas/org_schema.py @@ -10,9 +10,9 @@ class OrgSchema(Schema): name: str slug: str = None airbyte_workspace_id: str = None - viz_url: HttpUrl = None - viz_login_type: str = None - tnc_accepted: bool = None + viz_url: Optional[HttpUrl] = None + viz_login_type: Optional[str] = None + tnc_accepted: Optional[bool] = None is_demo: bool = False @@ -22,9 +22,9 @@ class CreateOrgSchema(Schema): name: str slug: str = None airbyte_workspace_id: str = None - viz_url: HttpUrl = None - viz_login_type: str = None - tnc_accepted: bool = None + viz_url: Optional[HttpUrl] = None + viz_login_type: Optional[str] = None + tnc_accepted: Optional[bool] = None is_demo: bool = False base_plan: str can_upgrade_plan: bool diff --git a/ddpui/settings.py b/ddpui/settings.py index bc6971a20..6d8c6bf68 100644 --- a/ddpui/settings.py +++ b/ddpui/settings.py @@ -341,8 +341,16 @@ # Cookie settings -COOKIE_SECURE = True -COOKIE_SAMESITE = "Lax" if os.getenv("ENVIRONMENT", "") == "production" else "None" -COOKIE_HTTPONLY = True +_cookie_secure_override = os.getenv("COOKIE_SECURE") +COOKIE_SECURE = ( + _cookie_secure_override.lower() in {"1", "true", "yes", "on"} + if _cookie_secure_override is not None + else True +) +COOKIE_SAMESITE = os.getenv( + "COOKIE_SAMESITE", + "Lax" if os.getenv("ENVIRONMENT", "") == "production" else "None", +) +COOKIE_HTTPONLY = os.getenv("COOKIE_HTTPONLY", "true").lower() in {"1", "true", "yes", "on"} DATA_UPLOAD_MAX_MEMORY_SIZE = 5242880 # 5 MB diff --git a/ddpui/tests/core/dashboard_chat/conftest.py b/ddpui/tests/core/dashboard_chat/conftest.py new file mode 100644 index 000000000..30a7cb592 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/conftest.py @@ -0,0 +1,19 @@ +"""Shared pytest fixtures for dashboard-chat backend tests.""" + +import pytest + +from ddpui.core.dashboard_chat.orchestration.checkpoints import reset_dashboard_chat_checkpointer +from ddpui.core.dashboard_chat.orchestration.orchestrator import reset_dashboard_chat_runtime + + +@pytest.fixture(autouse=True) +def reset_dashboard_chat_runtime_state(): + """Release shared LangGraph runtime resources between dashboard-chat tests only. + + The dashboard-chat runtime holds a shared Postgres-backed LangGraph checkpointer open. + Resetting it after each dashboard-chat test avoids leaking DB sessions into teardown + without imposing that cleanup on the rest of the backend test suite. + """ + yield + reset_dashboard_chat_runtime() + reset_dashboard_chat_checkpointer() diff --git a/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py b/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py new file mode 100644 index 000000000..d5440b694 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py @@ -0,0 +1,407 @@ +"""Focused tests for dashboard chat LangGraph checkpoint bootstrap/persistence.""" + +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from django.contrib.auth.models import User +from django.db import connection + +from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.orchestration.checkpoints import get_dashboard_chat_checkpointer +from ddpui.core.dashboard_chat.orchestration.nodes.load_context import load_context_node +from ddpui.core.dashboard_chat.orchestration.orchestrator import DashboardChatRuntime +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_sql_validation_result, +) +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatFollowUpContext, + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatSqlValidationResult, +) +from ddpui.models.dashboard import Dashboard +from ddpui.models.org import Org +from ddpui.models.org_user import OrgUser +from ddpui.models.role_based_access import Role +from ddpui.models.visualization import Chart +from ddpui.tests.api_tests.test_user_org_api import seed_db + +pytestmark = pytest.mark.django_db(transaction=True) + + +class MinimalVectorStore: + """Minimal vector store for checkpointing tests that do not use retrieval.""" + + def usage_summary(self): + return {} + + +class MinimalLlmClient: + """Minimal LLM client used for fast-path small-talk runtime turns.""" + + def usage_summary(self): + return {} + + +class QuerySecondTurnLlmClient: + """LLM stub that keeps first-turn small talk fast path and classifies later queries.""" + + def classify_intent(self, user_query, conversation_context): + return DashboardChatIntentDecision( + intent=DashboardChatIntent.QUERY_WITH_SQL, + confidence=0.95, + reason="Test SQL route", + force_tool_usage=True, + follow_up_context=DashboardChatFollowUpContext(is_follow_up=False), + ) + + def compose_small_talk(self, user_query): + return "Hi. Ask me anything about this dashboard or the data behind it." + + def get_prompt(self, prompt_key): + return "" + + def reset_usage(self): + return None + + def run_tool_loop_turn(self, *, messages, tools, tool_choice, operation): + raise AssertionError("tool loop should be patched in this regression test") + + def compose_final_answer( + self, + *, + user_query, + intent, + response_format, + draft_answer, + retrieved_documents, + sql, + sql_results, + warnings, + ): + return f"Computed answer for {user_query}" + + def usage_summary(self): + return {} + + +@pytest.fixture +def org(): + organization = Org.objects.create( + name="Dashboard Chat Org", + slug=f"dashchat-{uuid4().hex[:8]}", + airbyte_workspace_id="workspace-1", + ) + yield organization + organization.delete() + + +@pytest.fixture +def orguser(org, seed_db): + user = User.objects.create( + username=f"dashchat-user-{uuid4().hex[:8]}", + email=f"dashchat-user-{uuid4().hex[:8]}@test.com", + password="testpassword", + ) + org_user = OrgUser.objects.create( + user=user, + org=org, + new_role=Role.objects.filter(slug=ACCOUNT_MANAGER_ROLE).first(), + ) + yield org_user + org_user.delete() + user.delete() + + +@pytest.fixture +def primary_chart(org, orguser): + chart = Chart.objects.create( + title="Program Reach", + description="Monthly reach", + chart_type="line", + schema_name="analytics", + table_name="program_reach", + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield chart + chart.delete() + + +@pytest.fixture +def primary_dashboard(org, orguser, primary_chart): + dashboard = Dashboard.objects.create( + title="Impact Overview", + description="Program KPIs and reach", + dashboard_type="native", + components={ + "chart-1": { + "id": "chart-1", + "type": "chart", + "config": {"chartId": primary_chart.id}, + } + }, + created_by=orguser, + last_modified_by=orguser, + org=org, + ) + yield dashboard + dashboard.delete() + + +def test_load_context_node_bootstraps_checkpoint_payloads(primary_dashboard): + """First-turn context loading should return checkpoint-safe dashboard payloads.""" + export_payload = {"dashboard": {"title": "Impact Overview"}, "charts": []} + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + manifest_json = {"nodes": {}, "sources": {}, "parent_map": {}, "child_map": {}} + + with patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardService.export_dashboard_context", + return_value=export_payload, + ) as export_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.load_manifest_json", + return_value=manifest_json, + ) as manifest_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build", + return_value=allowlist, + ) as build_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build_dbt_index", + return_value={"resources_by_unique_id": {}}, + ) as index_mock: + updates = load_context_node( + { + "org_id": primary_dashboard.org_id, + "dashboard_id": primary_dashboard.id, + "session_id": str(uuid4()), + } + ) + + assert updates["dashboard_export_payload"] == export_payload + assert updates["allowlist_payload"]["allowed_tables"] == ["analytics.program_reach"] + assert updates["dbt_index"] == {"resources_by_unique_id": {}} + assert updates["schema_snippet_payloads"] == {} + assert updates["validated_distinct_payloads"] == {} + export_mock.assert_called_once() + manifest_mock.assert_called_once() + build_mock.assert_called_once_with(export_payload, manifest_json=manifest_json) + index_mock.assert_called_once_with(manifest_json, allowlist) + + +def test_runtime_session_turn_writes_langgraph_checkpoints(primary_dashboard): + """Session-backed runtime turns should persist LangGraph checkpoints in Postgres.""" + export_payload = {"dashboard": {"title": "Impact Overview"}, "charts": []} + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + manifest_json = {"nodes": {}, "sources": {}, "parent_map": {}, "child_map": {}} + session_id = str(uuid4()) + + runtime = DashboardChatRuntime( + vector_store=MinimalVectorStore(), + llm_client=MinimalLlmClient(), + checkpointer=get_dashboard_chat_checkpointer().saver, + ) + + with patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardService.export_dashboard_context", + return_value=export_payload, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.load_manifest_json", + return_value=manifest_json, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build", + return_value=allowlist, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build_dbt_index", + return_value={"resources_by_unique_id": {}}, + ): + response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="hi", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + ) + + assert response.answer_text.startswith("Hi.") + with connection.cursor() as cursor: + cursor.execute("select count(*) from checkpoints where thread_id = %s", [session_id]) + checkpoint_count = cursor.fetchone()[0] + assert checkpoint_count > 0 + + +def test_runtime_reuses_checkpointed_context_across_turns(primary_dashboard): + """Second turns in the same session should reuse durable checkpointed context.""" + export_payload = {"dashboard": {"title": "Impact Overview"}, "charts": []} + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + manifest_json = {"nodes": {}, "sources": {}, "parent_map": {}, "child_map": {}} + session_id = str(uuid4()) + + runtime = DashboardChatRuntime( + vector_store=MinimalVectorStore(), + llm_client=MinimalLlmClient(), + checkpointer=get_dashboard_chat_checkpointer().saver, + ) + + with patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardService.export_dashboard_context", + return_value=export_payload, + ) as export_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.load_manifest_json", + return_value=manifest_json, + ) as manifest_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build", + return_value=allowlist, + ) as build_mock, patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build_dbt_index", + return_value={"resources_by_unique_id": {}}, + ) as index_mock: + first_response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="hi", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + ) + second_response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="who are you", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + ) + + assert first_response.answer_text.startswith("Hi.") + assert second_response.answer_text.startswith("I'm the dashboard chat assistant") + export_mock.assert_called_once() + manifest_mock.assert_called_once() + build_mock.assert_called_once() + index_mock.assert_called_once() + + +def test_runtime_rebuilds_response_for_non_small_talk_turns_in_same_session(primary_dashboard): + """A later SQL turn must not reuse the prior checkpointed small-talk response.""" + export_payload = {"dashboard": {"title": "Impact Overview"}, "charts": []} + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + manifest_json = {"nodes": {}, "sources": {}, "parent_map": {}, "child_map": {}} + session_id = str(uuid4()) + + runtime = DashboardChatRuntime( + vector_store=MinimalVectorStore(), + llm_client=QuerySecondTurnLlmClient(), + checkpointer=get_dashboard_chat_checkpointer().saver, + ) + + query_with_sql_updates = { + "retrieved_documents": [], + "tool_calls": [{"name": "run_sql_query"}], + "draft_answer_text": "", + "sql": "SELECT facilitator_name FROM analytics.facilitator_effectiveness_quarterly LIMIT 5", + "sql_validation": serialize_sql_validation_result( + DashboardChatSqlValidationResult( + is_valid=True, + sanitized_sql="SELECT facilitator_name FROM analytics.facilitator_effectiveness_quarterly LIMIT 5", + tables=["analytics.facilitator_effectiveness_quarterly"], + warnings=[], + errors=[], + ) + ), + "sql_results": [{"facilitator_name": "Asha Menon"}], + "warnings": [], + "timing_breakdown": {"tool_calls_ms": [], "tool_loop_ms": 0}, + "schema_snippet_payloads": {}, + "validated_distinct_payloads": {}, + } + + with patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardService.export_dashboard_context", + return_value=export_payload, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.load_manifest_json", + return_value=manifest_json, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build", + return_value=allowlist, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build_dbt_index", + return_value={"resources_by_unique_id": {}}, + ), patch( + "ddpui.core.dashboard_chat.orchestration.orchestrator.handle_query_with_sql_node", + return_value=query_with_sql_updates, + ) as query_mock: + first_response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="hi", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + ) + second_response = runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="top 5 facilitators by outcomes in q2", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + ) + + assert first_response.answer_text.startswith("Hi.") + assert second_response.answer_text == "Computed answer for top 5 facilitators by outcomes in q2" + assert second_response.intent == DashboardChatIntent.QUERY_WITH_SQL + assert second_response.sql == query_with_sql_updates["sql"] + assert second_response.sql_results == query_with_sql_updates["sql_results"] + query_mock.assert_called_once() + + +def test_runtime_resume_completes_interrupted_session(primary_dashboard): + """Interrupted checkpointed runs should resume cleanly from the persisted session thread.""" + export_payload = {"dashboard": {"title": "Impact Overview"}, "charts": []} + allowlist = DashboardChatAllowlist(allowed_tables={"analytics.program_reach"}) + manifest_json = {"nodes": {}, "sources": {}, "parent_map": {}, "child_map": {}} + session_id = str(uuid4()) + + runtime = DashboardChatRuntime( + vector_store=MinimalVectorStore(), + llm_client=MinimalLlmClient(), + checkpointer=get_dashboard_chat_checkpointer().saver, + ) + + with patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardService.export_dashboard_context", + return_value=export_payload, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.load_manifest_json", + return_value=manifest_json, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build", + return_value=allowlist, + ), patch( + "ddpui.core.dashboard_chat.orchestration.nodes.load_context.DashboardChatAllowlistBuilder.build_dbt_index", + return_value={"resources_by_unique_id": {}}, + ): + with pytest.raises(RuntimeError, match="interrupted before a final response"): + runtime.run( + org=primary_dashboard.org, + dashboard_id=primary_dashboard.id, + user_query="hi", + session_id=session_id, + vector_collection_name=None, + conversation_history=[], + interrupt_before=["handle_small_talk"], + ) + + interrupted_state = runtime.get_state_snapshot(session_id) + assert interrupted_state is not None + assert interrupted_state.next == ("handle_small_talk",) + + resumed_response = runtime.resume(session_id) + assert resumed_response.answer_text.startswith("Hi.") + + completed_state = runtime.get_state_snapshot(session_id) + assert completed_state is not None + assert completed_state.next == () diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py index bd6b36ddf..306096d5d 100644 --- a/ddpui/tests/core/dashboard_chat/test_llm_client.py +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -2,8 +2,8 @@ import json -import ddpui.core.dashboard_chat.agents.openai as llm_client_module -from ddpui.core.dashboard_chat.agents.openai import OpenAIDashboardChatLlmClient +import ddpui.core.dashboard_chat.agents.openai_llm_client as llm_client_module +from ddpui.core.dashboard_chat.agents.openai_llm_client import OpenAIDashboardChatLlmClient from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatIntent, @@ -120,6 +120,8 @@ def test_classify_intent_uses_prototype_router_message_shape(): last_tables_used=["analytics.program_reach"], last_chart_ids=["2"], last_response_type="sql_result", + last_answer_text="Asha Menon, Farah Ali, Leela Joseph, Meera Das, and Noor Khan each improved literacy for 3 students.", + last_intent="query_with_sql", ), ) @@ -128,6 +130,9 @@ def test_classify_intent_uses_prototype_router_message_shape(): assert messages[0]["role"] == "system" assert "CONVERSATION CONTEXT" in messages[0]["content"] assert "Previous SQL: SELECT COUNT(*) FROM analytics.program_reach" in messages[0]["content"] + assert "Last intent: query_with_sql" in messages[0]["content"] + assert "Last answer text: Asha Menon, Farah Ali, Leela Joseph" in messages[0]["content"] + assert '"these facilitators", "those students", "they", or "them"' in messages[0]["content"] assert messages[1] == { "role": "user", "content": "Classify this query: Now split that by donor type", diff --git a/ddpui/tests/core/dashboard_chat/test_prompt_store.py b/ddpui/tests/core/dashboard_chat/test_prompt_store.py index 10dcabb9e..1fcc12188 100644 --- a/ddpui/tests/core/dashboard_chat/test_prompt_store.py +++ b/ddpui/tests/core/dashboard_chat/test_prompt_store.py @@ -2,7 +2,7 @@ import pytest -from ddpui.core.dashboard_chat.agents.prompt_store import ( +from ddpui.core.dashboard_chat.agents.prompt_template_store import ( DEFAULT_DASHBOARD_CHAT_PROMPTS, DashboardChatPromptStore, ) @@ -16,6 +16,12 @@ def test_prompt_store_returns_default_when_no_db_override_exists(): """Missing prompt rows should fall back to the built-in default prompt text.""" + DashboardChatPromptTemplate.objects.filter( + key__in=[ + DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION, + DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION, + ] + ).delete() store = DashboardChatPromptStore() prompt = store.get(DashboardChatPromptTemplateKey.INTENT_CLASSIFICATION) @@ -29,6 +35,9 @@ def test_prompt_store_returns_default_when_no_db_override_exists(): final_answer_prompt == DEFAULT_DASHBOARD_CHAT_PROMPTS[DashboardChatPromptTemplateKey.FINAL_ANSWER_COMPOSITION] ) + assert "Which districts are these facilitators from?" in prompt + assert "Which programs are those students in?" in prompt + assert "Which states are they from?" in prompt def test_prompt_store_uses_db_override_after_save(): diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index 1f86b6d84..fbb4751ca 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -4,17 +4,51 @@ import pytest from django.contrib.auth.models import User -from django.core.cache import cache from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.context.allowlist import ( +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import ( DashboardChatAllowlist, DashboardChatAllowlistBuilder, ) from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig +from ddpui.core.dashboard_chat.orchestration.conversation_context import extract_conversation_context +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import ( + build_follow_up_messages, + build_new_query_messages, +) from ddpui.core.dashboard_chat.orchestration.orchestrator import DashboardChatRuntime -from ddpui.core.dashboard_chat.orchestration.conversation import extract_conversation_context -from ddpui.core.dashboard_chat.orchestration.presentation import determine_response_format +from ddpui.core.dashboard_chat.orchestration.response_composer import ( + compose_final_answer_text, + determine_response_format, +) +from ddpui.core.dashboard_chat.orchestration.retrieval_support import build_tool_document_payload +from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( + serialize_allowlist, + serialize_conversation_context, + serialize_intent_decision, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.dbt_tools import ( + handle_get_dbt_model_info_tool, + handle_search_dbt_models_tool, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.schema_tools import ( + handle_get_distinct_values_tool, + handle_list_tables_by_keyword_tool, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_corrections import ( + missing_columns_in_primary_table, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_execution_tools import ( + handle_run_sql_query_tool, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_validation import ( + find_missing_distinct_filters, + validate_follow_up_dimension_usage, +) +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, + seed_validated_distinct_values_from_previous_sql, +) from ddpui.core.dashboard_chat.contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, @@ -25,7 +59,7 @@ DashboardChatResponse, ) from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard -from ddpui.core.dashboard_chat.vector.documents import DashboardChatSourceType +from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType from ddpui.utils.vector.interface import VectorQueryResult as DashboardChatVectorQueryResult from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org @@ -37,6 +71,51 @@ pytestmark = pytest.mark.django_db(transaction=True) +def build_runtime_state( + *, + org: Org | None = None, + allowlist: DashboardChatAllowlist | None = None, + conversation_context: DashboardChatConversationContext | None = None, + intent_decision: DashboardChatIntentDecision | None = None, + **extra, +): + """Build a runtime-state payload that matches the post-refactor graph contract.""" + state = dict(extra) + if org is not None: + state["org_id"] = org.id + if allowlist is not None: + state["allowlist_payload"] = serialize_allowlist(allowlist) + if conversation_context is not None: + state["conversation_context"] = serialize_conversation_context(conversation_context) + if intent_decision is not None: + state["intent_decision"] = serialize_intent_decision(intent_decision) + return state + + +def build_turn_context( + *, + schema_snippets_by_table=None, + validated_distinct_values=None, + warehouse_tools=None, + warnings=None, + last_sql=None, + last_sql_results=None, + last_sql_validation=None, +): + """Build the explicit per-turn execution context used by tool helpers.""" + return DashboardChatTurnContext( + validated_distinct_values=set(validated_distinct_values or set()), + query_embeddings={}, + schema_snippets_by_table=dict(schema_snippets_by_table or {}), + warnings=list(warnings or []), + warehouse_tools=warehouse_tools, + last_sql=last_sql, + last_sql_results=last_sql_results, + last_sql_validation=last_sql_validation, + timing_breakdown={"tool_calls_ms": []}, + ) + + class FakeVectorStore: """Deterministic vector store used by runtime tests.""" @@ -720,15 +799,11 @@ def test_extract_conversation_context_reads_previous_sql_payload(): assert conversation_context.last_intent == "query_with_sql" -def test_seed_distinct_cache_reuses_previous_text_filters(primary_dashboard): +def test_seed_validated_distinct_values_reuses_previous_text_filters(primary_dashboard): """Follow-up turns should reuse text-filter validations from the previous successful SQL.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "dashboard_id": primary_dashboard.id, - "conversation_context": extract_conversation_context( + state = build_runtime_state( + dashboard_id=primary_dashboard.id, + conversation_context=extract_conversation_context( [ DashboardChatConversationMessage( role="assistant", @@ -745,33 +820,30 @@ def test_seed_distinct_cache_reuses_previous_text_filters(primary_dashboard): ) ] ), - } - execution_context = {"distinct_cache": set()} + ) + turn_context = build_turn_context(validated_distinct_values=set()) - runtime._seed_distinct_cache_from_previous_sql(state, execution_context) + seed_validated_distinct_values_from_previous_sql(state, turn_context) assert ( "analytics.donor_funding_quarterly", "quarter_label", "2025 q1", - ) in execution_context["distinct_cache"] - assert ("*", "quarter_label", "2025 q2") in execution_context["distinct_cache"] + ) in turn_context.validated_distinct_values + assert ("*", "quarter_label", "2025 q2") in turn_context.validated_distinct_values def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(primary_dashboard): """Follow-up SQL should reuse validated text filters even after moving to an upstream table.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.donor_funding_quarterly", "analytics.stg_donor_funding_clean", } ), - "conversation_context": extract_conversation_context( + conversation_context=extract_conversation_context( [ DashboardChatConversationMessage( role="assistant", @@ -788,11 +860,10 @@ def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(p ) ] ), - "org": primary_dashboard.org, - } - execution_context = { - "distinct_cache": set(), - "schema_cache": { + ) + turn_context = build_turn_context( + validated_distinct_values=set(), + schema_snippets_by_table={ "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( "analytics.stg_donor_funding_clean", [ @@ -803,11 +874,11 @@ def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(p ], ) }, - "warehouse_tools": None, - } + ) - runtime._seed_distinct_cache_from_previous_sql(state, execution_context) - missing = runtime._missing_distinct( + seed_validated_distinct_values_from_previous_sql(state, turn_context) + missing = find_missing_distinct_filters( + lambda org: FakeWarehouseTools(), ( "SELECT quarter_label, donor_type, SUM(realized_amount_usd) AS total_realized_funding_usd " "FROM analytics.stg_donor_funding_clean " @@ -816,7 +887,7 @@ def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(p "GROUP BY quarter_label, donor_type" ), state, - execution_context, + turn_context, ) assert missing == [] @@ -824,21 +895,17 @@ def test_missing_distinct_accepts_previous_filter_validation_on_upstream_table(p def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_dashboard): """Follow-up correction should surface candidate tables when a distinct lookup targets the wrong table.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.donor_funding_quarterly", "analytics.stg_donor_funding_clean", } ), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.donor_funding_quarterly": FakeWarehouseTools._schema_snippet( "analytics.donor_funding_quarterly", [ @@ -859,17 +926,17 @@ def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_d ], ), }, - "warehouse_tools": None, - } + ) - result = runtime._handle_get_distinct_values_tool( + result = handle_get_distinct_values_tool( + lambda org: FakeWarehouseTools(), { "table": "analytics.donor_funding_quarterly", "column": "donor_type", "limit": 50, }, state, - execution_context, + turn_context, ) assert result["error"] == "column_not_in_table" @@ -880,16 +947,12 @@ def test_get_distinct_values_returns_column_correction_for_wrong_table(primary_d def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): """Boolean literals in WHERE clauses should not be misread as missing columns.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), ) - state = { - "allowlist": DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( "analytics.stg_donor_funding_clean", [ @@ -901,10 +964,10 @@ def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): ], ) }, - "warehouse_tools": None, - } + ) - missing = runtime._missing_columns_in_primary_table( + missing = missing_columns_in_primary_table( + lambda org: FakeWarehouseTools(), sql=( "SELECT quarter_label, donor_type, SUM(realized_amount_usd) AS total_realized_funding_usd, " "COUNT(DISTINCT donation_id) AS donor_count " @@ -913,7 +976,7 @@ def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): "GROUP BY quarter_label, donor_type ORDER BY quarter_label, donor_type LIMIT 200" ), state=state, - execution_context=execution_context, + turn_context=turn_context, ) assert missing is None @@ -921,29 +984,25 @@ def test_missing_columns_check_ignores_boolean_literals(primary_dashboard): def test_run_sql_keeps_join_tables_intact(primary_dashboard): """Join queries should execute the model's SQL as written and let the tool loop correct errors.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - warehouse_tools_factory=lambda org: FakeWarehouseTools(), - ) - state = { - "allowlist": DashboardChatAllowlist( + fake_warehouse = FakeWarehouseTools() + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.facilitator_effectiveness_quarterly", "analytics.district_funding_efficiency_quarterly", } ), - "org": primary_dashboard.org, - "intent_decision": DashboardChatIntentDecision( + intent_decision=DashboardChatIntentDecision( intent=DashboardChatIntent.QUERY_WITH_SQL, confidence=0.9, reason="Join-heavy data analysis", force_tool_usage=True, ), - "user_query": "Join facilitator outcomes to district funding efficiency.", - } - execution_context = { - "schema_cache": { + user_query="Join facilitator outcomes to district funding efficiency.", + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ "analytics.facilitator_effectiveness_quarterly" ], @@ -951,15 +1010,19 @@ def test_run_sql_keeps_join_tables_intact(primary_dashboard): "analytics.district_funding_efficiency_quarterly" ], }, - "warehouse_tools": FakeWarehouseTools(), - "distinct_cache": {("*", "quarter_label")}, - "last_sql": None, - "last_sql_results": None, - "last_sql_validation": None, - "warnings": [], - } + warehouse_tools=fake_warehouse, + validated_distinct_values={("*", "quarter_label")}, + warnings=[], + ) - result = runtime._run_sql_with_distinct_guard( + result = handle_run_sql_query_tool( + lambda org: fake_warehouse, + DashboardChatRuntimeConfig( + retrieval_limit=6, + max_query_rows=200, + max_distinct_values=20, + max_schema_tables=4, + ), { "sql": ( "SELECT " @@ -975,7 +1038,7 @@ def test_run_sql_keeps_join_tables_intact(primary_dashboard): ) }, state, - execution_context, + turn_context, ) assert result["success"] is True @@ -985,21 +1048,17 @@ def test_run_sql_keeps_join_tables_intact(primary_dashboard): def test_missing_distinct_resolves_join_filter_to_qualified_table(primary_dashboard): """Distinct validation should inspect the joined table referenced by a qualified WHERE filter.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.facilitator_effectiveness_quarterly", "analytics.district_funding_efficiency_quarterly", } ), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ "analytics.facilitator_effectiveness_quarterly" ], @@ -1007,11 +1066,11 @@ def test_missing_distinct_resolves_join_filter_to_qualified_table(primary_dashbo "analytics.district_funding_efficiency_quarterly" ], }, - "warehouse_tools": None, - "distinct_cache": set(), - } + validated_distinct_values=set(), + ) - missing = runtime._missing_distinct( + missing = find_missing_distinct_filters( + lambda org: FakeWarehouseTools(), ( "SELECT f.facilitator_name, d.spend_per_student_usd " "FROM analytics.facilitator_effectiveness_quarterly f " @@ -1022,7 +1081,7 @@ def test_missing_distinct_resolves_join_filter_to_qualified_table(primary_dashbo "WHERE d.program_area = 'Literacy'" ), state, - execution_context, + turn_context, ) assert missing == [ @@ -1036,21 +1095,17 @@ def test_missing_distinct_resolves_join_filter_to_qualified_table(primary_dashbo def test_missing_columns_check_is_join_aware_for_qualified_columns(primary_dashboard): """Qualified join columns should be validated against the referenced joined table.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.facilitator_effectiveness_quarterly", "analytics.district_funding_efficiency_quarterly", } ), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools().schemas[ "analytics.facilitator_effectiveness_quarterly" ], @@ -1058,10 +1113,10 @@ def test_missing_columns_check_is_join_aware_for_qualified_columns(primary_dashb "analytics.district_funding_efficiency_quarterly" ], }, - "warehouse_tools": None, - } + ) - missing = runtime._missing_columns_in_primary_table( + missing = missing_columns_in_primary_table( + lambda org: FakeWarehouseTools(), sql=( "SELECT f.facilitator_name, d.fake_dimension " "FROM analytics.facilitator_effectiveness_quarterly f " @@ -1072,7 +1127,7 @@ def test_missing_columns_check_is_join_aware_for_qualified_columns(primary_dashb "WHERE f.quarter_label = '2025 Q2'" ), state=state, - execution_context=execution_context, + turn_context=turn_context, ) assert missing["error"] == "column_not_in_table" @@ -1082,18 +1137,14 @@ def test_missing_columns_check_is_join_aware_for_qualified_columns(primary_dashb def test_missing_columns_check_ignores_order_by_select_alias(primary_dashboard): """ORDER BY aliases from the SELECT clause should not be treated as missing physical columns.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist( allowed_tables={"analytics.facilitator_effectiveness_quarterly"} ), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.facilitator_effectiveness_quarterly": FakeWarehouseTools._schema_snippet( "analytics.facilitator_effectiveness_quarterly", [ @@ -1107,10 +1158,10 @@ def test_missing_columns_check_ignores_order_by_select_alias(primary_dashboard): ], ) }, - "warehouse_tools": None, - } + ) - missing = runtime._missing_columns_in_primary_table( + missing = missing_columns_in_primary_table( + lambda org: FakeWarehouseTools(), sql=( "SELECT facilitator_name, AVG(cost_per_improved_outcome_usd) AS avg_cost_per_improved_outcome " "FROM analytics.facilitator_effectiveness_quarterly " @@ -1120,7 +1171,7 @@ def test_missing_columns_check_ignores_order_by_select_alias(primary_dashboard): "LIMIT 1" ), state=state, - execution_context=execution_context, + turn_context=turn_context, ) assert missing is None @@ -1248,22 +1299,24 @@ def test_runtime_prompt_messages_do_not_inline_raw_human_context(primary_dashboa llm_client=SmallTalkLlm(), ) - new_query_messages = runtime._build_new_query_messages( - { - "user_query": "Explain the reach metric", - "human_context": "Organization context: duplicated markdown", - } + new_query_messages = build_new_query_messages( + runtime.llm_client, + build_runtime_state( + user_query="Explain the reach metric", + human_context="Organization context: duplicated markdown", + ), ) - follow_up_messages = runtime._build_follow_up_messages( - { - "user_query": "Explain that metric", - "human_context": "Organization context: duplicated markdown", - "conversation_context": extract_conversation_context([]), - } + follow_up_messages = build_follow_up_messages( + runtime.llm_client, + build_runtime_state( + user_query="Explain that metric", + human_context="Organization context: duplicated markdown", + conversation_context=extract_conversation_context([]), + ), ) assert new_query_messages[0]["content"] == "prompt:new_query_system" - assert "Human context" not in follow_up_messages[0]["content"] + assert all("Human context" not in message["content"] for message in follow_up_messages) def test_runtime_query_with_sql_uses_distinct_values_before_sql_execution( @@ -1308,129 +1361,6 @@ def test_runtime_query_with_sql_uses_distinct_values_before_sql_execution( assert fake_warehouse.distinct_requests == [("analytics.program_reach", "program_name", 20)] -def test_runtime_reuses_session_snapshot_across_turns(org, primary_dashboard): - """Session snapshots should freeze dashboard context and reuse schema within one chat.""" - cache.clear() - vector_store = FakeVectorStore( - [ - DashboardChatVectorQueryResult( - document_id="doc-dashboard-export", - content="Chart id: 1. Data source: analytics.program_reach.", - metadata={ - "source_type": "dashboard_export", - "source_identifier": f"dashboard:{primary_dashboard.id}:chart:1", - "dashboard_id": primary_dashboard.id, - }, - distance=0.01, - ) - ] - ) - fake_warehouse = FakeWarehouseTools() - - def build_runtime(): - return DashboardChatRuntime( - vector_store=vector_store, - llm_client=SqlToolLoopLlm(), - warehouse_tools_factory=lambda org: fake_warehouse, - runtime_config=DashboardChatRuntimeConfig( - retrieval_limit=6, - max_query_rows=200, - max_distinct_values=20, - max_schema_tables=4, - ), - ) - - first_response = build_runtime().run( - org=org, - dashboard_id=primary_dashboard.id, - session_id="session-cache-test", - user_query="How many beneficiaries are in Education?", - ) - second_response = build_runtime().run( - org=org, - dashboard_id=primary_dashboard.id, - session_id="session-cache-test", - user_query="How many beneficiaries are in Education?", - ) - - assert first_response.intent == DashboardChatIntent.QUERY_WITH_SQL - assert second_response.intent == DashboardChatIntent.QUERY_WITH_SQL - assert fake_warehouse.schema_requests == [["analytics.program_reach"]] - assert "WHERE program_name = 'Education'" in fake_warehouse.executed_sql[0] - assert first_response.sql is not None - assert second_response.sql is not None - assert "Beneficiary Count: 120" in first_response.answer_text - assert any(citation.source_type == "warehouse_table" for citation in first_response.citations) - assert [call["name"] for call in first_response.tool_calls] == [ - "retrieve_docs", - "get_schema_snippets", - "get_distinct_values", - "run_sql_query", - ] - - -def test_runtime_persists_distinct_validations_in_session_snapshot(org, primary_dashboard): - """Validated text filter values should survive across turns in the same chat session.""" - cache.clear() - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - session_id = "session-distinct-cache-test" - snapshot_state = { - "org": org, - "dashboard_id": primary_dashboard.id, - "session_id": session_id, - } - - snapshot = runtime._load_session_snapshot(snapshot_state) - state = { - "org": org, - "dashboard_id": primary_dashboard.id, - "session_id": session_id, - "allowlist": snapshot["allowlist"], - "session_distinct_cache": snapshot["distinct_cache"], - } - execution_context = {"distinct_cache": set(snapshot["distinct_cache"])} - - runtime._record_validated_distinct_values( - state=state, - execution_context=execution_context, - table_name="analytics.program_reach", - column_name="program_name", - values=["Education"], - ) - - reloaded_snapshot = runtime._load_session_snapshot(snapshot_state) - missing = runtime._missing_distinct( - "SELECT COUNT(*) FROM analytics.program_reach WHERE program_name = 'Education'", - { - "allowlist": snapshot["allowlist"], - "org": org, - }, - { - "distinct_cache": set(reloaded_snapshot["distinct_cache"]), - "schema_cache": { - "analytics.program_reach": FakeWarehouseTools._schema_snippet( - "analytics.program_reach", - [ - {"name": "program_name", "data_type": "text", "nullable": False}, - {"name": "beneficiaries", "data_type": "integer", "nullable": False}, - ], - ) - }, - "warehouse_tools": None, - }, - ) - - assert ( - "analytics.program_reach", - "program_name", - "education", - ) in reloaded_snapshot["distinct_cache"] - assert missing == [] - - def test_runtime_follow_up_sql_corrects_after_failed_sql_attempt( monkeypatch, org, @@ -1553,20 +1483,20 @@ def test_runtime_dbt_tools_use_compact_allowlisted_index(): vector_store=FakeVectorStore([]), llm_client=SmallTalkLlm(), ) - state = { - "allowlist": allowlist, - "dbt_index": dbt_index, - } + state = build_runtime_state( + allowlist=allowlist, + dbt_index=dbt_index, + ) - search_result = runtime._handle_search_dbt_models_tool( + search_result = handle_search_dbt_models_tool( {"query": "program reach", "limit": 5}, state, - {}, + build_turn_context(), ) - info_result = runtime._handle_get_dbt_model_info_tool( + info_result = handle_get_dbt_model_info_tool( {"model_name": "analytics.program_reach"}, state, - {}, + build_turn_context(), ) assert search_result["count"] >= 1 @@ -1664,12 +1594,10 @@ def test_runtime_follow_up_sql_rejects_query_that_ignores_requested_dimension( def test_follow_up_dimension_validation_accepts_structural_granularity_change(primary_dashboard): """Follow-up add-dimension validation should accept structural SQL rewrites, not only exact token reuse.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - state = { - "intent_decision": DashboardChatIntentDecision( + state = build_runtime_state( + org=primary_dashboard.org, + allowlist=DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), + intent_decision=DashboardChatIntentDecision( intent=DashboardChatIntent.FOLLOW_UP_SQL, confidence=0.9, reason="Follow-up SQL", @@ -1680,7 +1608,7 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr modification_instruction="Now split that by donor type.", ), ), - "conversation_context": DashboardChatConversationContext( + conversation_context=DashboardChatConversationContext( last_sql_query=( "SELECT quarter_label, SUM(realized_amount_usd) AS total_realized_funding_usd " "FROM analytics.stg_donor_funding_clean " @@ -1688,12 +1616,10 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr "GROUP BY quarter_label" ), ), - "user_query": "Now split that by donor type.", - "allowlist": DashboardChatAllowlist(allowed_tables={"analytics.stg_donor_funding_clean"}), - "org": primary_dashboard.org, - } - execution_context = { - "schema_cache": { + user_query="Now split that by donor type.", + ) + turn_context = build_turn_context( + schema_snippets_by_table={ "analytics.stg_donor_funding_clean": FakeWarehouseTools._schema_snippet( "analytics.stg_donor_funding_clean", [ @@ -1703,10 +1629,10 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr ], ) }, - "warehouse_tools": None, - } + ) - validation = runtime._validate_follow_up_dimension_usage( + validation = validate_follow_up_dimension_usage( + lambda org: FakeWarehouseTools(), sql=( "SELECT quarter_label, COALESCE(donor_type, 'Unknown') AS donor_type, " "SUM(realized_amount_usd) AS total_realized_funding_usd " @@ -1715,7 +1641,7 @@ def test_follow_up_dimension_validation_accepts_structural_granularity_change(pr "GROUP BY quarter_label, COALESCE(donor_type, 'Unknown')" ), state=state, - execution_context=execution_context, + turn_context=turn_context, ) assert validation is None @@ -1771,26 +1697,22 @@ def test_runtime_skips_disabled_source_types_during_retrieval(org, primary_dashb def test_list_tables_by_keyword_matches_allowlisted_table_names_without_schema_lookup(org): """Keyword table lookup should work even when schema snippets are not yet cached.""" fake_warehouse = FakeWarehouseTools() - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=ContextToolLoopLlm(), - warehouse_tools_factory=lambda org: fake_warehouse, - ) - state = { - "org": org, - "allowlist": DashboardChatAllowlist( + state = build_runtime_state( + org=org, + allowlist=DashboardChatAllowlist( allowed_tables={ "analytics.district_funding_efficiency_quarterly", "analytics.facilitator_effectiveness_quarterly", } ), - } - execution_context = {"schema_cache": {}, "warnings": []} + ) + turn_context = build_turn_context(schema_snippets_by_table={}, warnings=[]) - result = runtime._handle_list_tables_by_keyword_tool( + result = handle_list_tables_by_keyword_tool( + lambda org: fake_warehouse, {"keyword": "district_funding_efficiency_quarterly", "limit": 10}, state, - execution_context, + turn_context, ) assert result["tables"][0]["table"] == "analytics.district_funding_efficiency_quarterly" @@ -1850,12 +1772,7 @@ def test_allowlist_adds_upstream_dbt_tables(): def test_tool_document_payload_exposes_structured_chart_metadata(): """Chart retrieval payloads should surface exact table, metric, and dimension hints.""" - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=SmallTalkLlm(), - ) - - payload = runtime._build_tool_document_payload( + payload = build_tool_document_payload( DashboardChatRetrievedDocument( document_id="doc-chart", source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, @@ -1939,19 +1856,15 @@ def test_sql_guard_rejects_select_into_queries(): def test_compose_final_answer_text_uses_llm_and_normalizes_rate_values(): """Final answer composition should send normalized values and table hints to the composer.""" llm = FinalAnswerComposerLlm() - runtime = DashboardChatRuntime( - vector_store=FakeVectorStore([]), - llm_client=llm, - ) - state = { - "user_query": "Give me a district wise pass rate breakdown", - "intent_decision": DashboardChatIntentDecision( + state = build_runtime_state( + user_query="Give me a district wise pass rate breakdown", + intent_decision=DashboardChatIntentDecision( intent=DashboardChatIntent.QUERY_WITH_SQL, confidence=0.9, reason="Needs grouped results", force_tool_usage=True, ), - } + ) execution_result = { "answer_text": "", "retrieved_documents": [ @@ -1981,7 +1894,8 @@ def test_compose_final_answer_text_uses_llm_and_normalizes_rate_values(): "warnings": [], } - answer = runtime._compose_final_answer_text( + answer = compose_final_answer_text( + llm, state, execution_result, response_format="text_with_table", diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index e1eaca7f4..b3268dea0 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -1,20 +1,22 @@ -"""Tests for dashboard chat session creation and reuse rules.""" +"""Tests for dashboard chat session creation, reuse, and turn execution.""" from datetime import datetime, timezone -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest from django.contrib.auth.models import User from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.sessions.service import ( +from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message, create_dashboard_chat_user_message_with_status, + execute_dashboard_chat_turn, get_or_create_dashboard_chat_session, ) -from ddpui.core.dashboard_chat.vector.documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.vector.vector_documents import build_dashboard_chat_collection_name +from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org import Org, OrgDbt @@ -265,3 +267,106 @@ def test_create_dashboard_chat_user_message_with_status_marks_reused_message( assert first_result.created is True assert second_result.created is False assert first_result.message.id == second_result.message.id + + +@patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") +def test_execute_dashboard_chat_turn_persists_assistant_message(get_runtime, session_owner, dashboard): + """Successful turns should persist the assistant reply through the session service.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + runtime = Mock() + runtime.run.return_value = DashboardChatResponse( + answer_text="Funding dropped because donor inflows slowed this quarter.", + intent=DashboardChatIntent.QUERY_WITH_SQL, + warnings=["Example warning"], + sql="SELECT 1", + sql_results=[{"value": 1}], + metadata={ + "timing_breakdown": { + "runtime_total_ms": 123.4, + "graph_nodes_ms": {"load_context": 10.0}, + } + }, + ) + get_runtime.return_value = runtime + + result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) + + assistant_message = DashboardChatMessage.objects.get(session=session, role="assistant") + assert assistant_message.sequence_number == 2 + assert assistant_message.content == "Funding dropped because donor inflows slowed this quarter." + assert assistant_message.payload["sql"] == "SELECT 1" + assert assistant_message.response_latency_ms is not None + assert assistant_message.response_latency_ms >= 0 + assert assistant_message.timing_breakdown == { + "runtime_total_ms": 123.4, + "graph_nodes_ms": {"load_context": 10.0}, + } + assert result["status"] == "completed" + assert result["assistant_message"].id == assistant_message.id + + +@patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") +def test_execute_dashboard_chat_turn_bubbles_runtime_errors(get_runtime, session_owner, dashboard): + """Runtime failures should propagate without persisting an assistant reply.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + runtime = Mock() + runtime.run.side_effect = RuntimeError("boom") + get_runtime.return_value = runtime + + with pytest.raises(RuntimeError, match="boom"): + execute_dashboard_chat_turn(str(session.session_id), user_message.id) + + assert DashboardChatMessage.objects.filter(session=session, role="assistant").count() == 0 + + +@patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") +def test_execute_dashboard_chat_turn_reuses_existing_assistant_reply( + get_runtime, + session_owner, + dashboard, +): + """Duplicate execution attempts should reuse the persisted assistant reply.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + assistant_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=2, + role="assistant", + content="Existing answer", + payload={"intent": "query_without_sql"}, + ) + + result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) + + assert result["status"] == "skipped_existing_reply" + assert result["assistant_message"].id == assistant_message.id + get_runtime.assert_not_called() diff --git a/ddpui/tests/core/dashboard_chat/test_tasks.py b/ddpui/tests/core/dashboard_chat/test_tasks.py index 24268e2e8..e66e9621d 100644 --- a/ddpui/tests/core/dashboard_chat/test_tasks.py +++ b/ddpui/tests/core/dashboard_chat/test_tasks.py @@ -8,14 +8,10 @@ from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.celeryworkers.tasks import ( build_dashboard_chat_context_for_org, - run_dashboard_chat_turn, schedule_dashboard_chat_context_builds, ) -from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildResult -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse +from ddpui.core.dashboard_chat.vector.org_vector_context_build_service import OrgVectorBuildResult from ddpui.models.org import Org, OrgDbt -from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org_preferences import OrgPreferences from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role @@ -58,18 +54,6 @@ def _create_org_dbt(org: Org) -> OrgDbt: org.dbt = dbt org.save(update_fields=["dbt"]) return dbt - - -def _create_dashboard(orguser: OrgUser) -> Dashboard: - return Dashboard.objects.create( - title="Chat Dashboard", - dashboard_type="native", - created_by=orguser, - last_modified_by=orguser, - org=orguser.org, - ) - - def test_schedule_dashboard_chat_context_builds_enqueues_only_eligible_orgs(orguser): eligible_org = orguser.org _create_org_dbt(eligible_org) @@ -102,23 +86,21 @@ def test_schedule_dashboard_chat_context_builds_enqueues_only_eligible_orgs(orgu assert result == {"enqueued_org_ids": [eligible_org.id]} -def test_build_dashboard_chat_context_for_org_skips_when_locked(orguser): +def test_build_dashboard_chat_context_for_org_skips_when_org_is_missing(): + result = build_dashboard_chat_context_for_org.run(999999) + + assert result == {"status": "skipped_missing_org", "org_id": 999999} + + +def test_build_dashboard_chat_context_for_org_skips_when_org_is_ineligible(orguser): org = orguser.org _create_org_dbt(org) - OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=True) - enable_feature_flag("AI_DASHBOARD_CHAT", org=org) - - redis_lock = Mock() - redis_lock.acquire.return_value = False - redis_client = Mock() - redis_client.lock.return_value = redis_lock + OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=False) - with patch( - "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client - ), patch("ddpui.celeryworkers.tasks.OrgVectorBuildService") as vector_build_service: + with patch("ddpui.celeryworkers.tasks.OrgVectorBuildService") as vector_build_service: result = build_dashboard_chat_context_for_org.run(org.id) - assert result == {"status": "skipped_locked", "org_id": org.id} + assert result == {"status": "skipped_ineligible", "org_id": org.id} vector_build_service.assert_not_called() @@ -128,12 +110,6 @@ def test_build_dashboard_chat_context_for_org_runs_vector_build(orguser): OrgPreferences.objects.create(org=org, ai_data_sharing_enabled=True) enable_feature_flag("AI_DASHBOARD_CHAT", org=org) - redis_lock = Mock() - redis_lock.acquire.return_value = True - redis_lock.owned.return_value = True - redis_client = Mock() - redis_client.lock.return_value = redis_lock - result_payload = OrgVectorBuildResult( org_id=org.id, docs_generated_at=timezone.now(), @@ -145,138 +121,10 @@ def test_build_dashboard_chat_context_for_org_runs_vector_build(orguser): vector_build_service = Mock() vector_build_service.build_org_vector_context.return_value = result_payload - with patch( - "ddpui.celeryworkers.tasks.RedisClient.get_instance", return_value=redis_client - ), patch( - "ddpui.celeryworkers.tasks.OrgVectorBuildService", - return_value=vector_build_service, - ): + with patch("ddpui.celeryworkers.tasks.OrgVectorBuildService", return_value=vector_build_service): result = build_dashboard_chat_context_for_org.run(org.id) assert result["status"] == "completed" assert result["org_id"] == org.id assert result["source_document_counts"] == {"dashboard_export": 2} vector_build_service.build_org_vector_context.assert_called_once() - redis_lock.release.assert_called_once() - - -@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") -def test_run_dashboard_chat_turn_persists_assistant_message_and_publishes_event( - get_runtime, - publish_event, - orguser, -): - _create_org_dbt(orguser.org) - dashboard = _create_dashboard(orguser) - session = DashboardChatSession.objects.create( - org=orguser.org, - orguser=orguser, - dashboard=dashboard, - ) - user_message = DashboardChatMessage.objects.create( - session=session, - sequence_number=1, - role="user", - content="Why did funding drop?", - ) - runtime = Mock() - runtime.run.return_value = DashboardChatResponse( - answer_text="Funding dropped because donor inflows slowed this quarter.", - intent=DashboardChatIntent.QUERY_WITH_SQL, - warnings=["Example warning"], - sql="SELECT 1", - sql_results=[{"value": 1}], - metadata={ - "timing_breakdown": { - "runtime_total_ms": 123.4, - "graph_nodes_ms": {"load_context": 10.0}, - } - }, - ) - get_runtime.return_value = runtime - - result = run_dashboard_chat_turn(str(session.session_id), user_message.id) - - assistant_message = DashboardChatMessage.objects.get(session=session, role="assistant") - assert assistant_message.sequence_number == 2 - assert assistant_message.content == "Funding dropped because donor inflows slowed this quarter." - assert assistant_message.payload["sql"] == "SELECT 1" - assert assistant_message.response_latency_ms is not None - assert assistant_message.response_latency_ms >= 0 - assert assistant_message.timing_breakdown == { - "runtime_total_ms": 123.4, - "graph_nodes_ms": {"load_context": 10.0}, - } - assert result["status"] == "completed" - publish_event.assert_called_once() - - -@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") -def test_run_dashboard_chat_turn_publishes_error_when_runtime_fails( - get_runtime, - publish_event, - orguser, -): - _create_org_dbt(orguser.org) - dashboard = _create_dashboard(orguser) - session = DashboardChatSession.objects.create( - org=orguser.org, - orguser=orguser, - dashboard=dashboard, - ) - user_message = DashboardChatMessage.objects.create( - session=session, - sequence_number=1, - role="user", - content="Why did funding drop?", - ) - runtime = Mock() - runtime.run.side_effect = RuntimeError("boom") - get_runtime.return_value = runtime - - with pytest.raises(RuntimeError, match="boom"): - run_dashboard_chat_turn(str(session.session_id), user_message.id) - - assert DashboardChatMessage.objects.filter(session=session, role="assistant").count() == 0 - publish_event.assert_called_once() - - -@patch("ddpui.celeryworkers.tasks.publish_dashboard_chat_event") -@patch("ddpui.celeryworkers.tasks.get_dashboard_chat_runtime") -def test_run_dashboard_chat_turn_reuses_existing_assistant_reply( - get_runtime, - publish_event, - orguser, -): - _create_org_dbt(orguser.org) - dashboard = _create_dashboard(orguser) - session = DashboardChatSession.objects.create( - org=orguser.org, - orguser=orguser, - dashboard=dashboard, - ) - user_message = DashboardChatMessage.objects.create( - session=session, - sequence_number=1, - role="user", - content="Why did funding drop?", - ) - assistant_message = DashboardChatMessage.objects.create( - session=session, - sequence_number=2, - role="assistant", - content="Existing answer", - payload={"intent": "query_without_sql"}, - ) - - result = run_dashboard_chat_turn(str(session.session_id), user_message.id) - - assert result == { - "status": "skipped_existing_reply", - "session_id": str(session.session_id), - "assistant_message_id": assistant_message.id, - } - get_runtime.assert_not_called() - publish_event.assert_not_called() diff --git a/ddpui/tests/core/dashboard_chat/test_vector_building.py b/ddpui/tests/core/dashboard_chat/test_vector_building.py index 24f58688c..f20523631 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_building.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_building.py @@ -10,13 +10,13 @@ from django.utils import timezone from ddpui.auth import ACCOUNT_MANAGER_ROLE -from ddpui.core.dashboard_chat.context.dbt_docs import ( +from ddpui.core.dashboard_chat.context.dbt_docs_artifacts import ( DashboardChatDbtDocsArtifacts, generate_dashboard_chat_dbt_docs_artifacts, ) from ddpui.core.dashboard_chat.config import DashboardChatSourceConfig -from ddpui.core.dashboard_chat.vector.ingest import OrgVectorBuildService -from ddpui.core.dashboard_chat.vector.documents import ( +from ddpui.core.dashboard_chat.vector.org_vector_context_build_service import OrgVectorBuildService +from ddpui.core.dashboard_chat.vector.vector_documents import ( DashboardChatSourceType, build_dashboard_chat_collection_name, ) @@ -259,7 +259,7 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb (target_dir / "catalog.json").write_text(json.dumps(catalog_json), encoding="utf-8") with patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.gather_dbt_project_params", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.gather_dbt_project_params", return_value=DbtProjectParams( dbt_binary="/mock/dbt", dbt_env_dir="/mock/env", @@ -269,13 +269,13 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_updates_timestamp(org, orgdb org_project_dir=str(project_dir.parent), ), ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.prefect_service.get_dbt_cli_profile_block", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.prefect_service.get_dbt_cli_profile_block", return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.run_dbt_command", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.run_dbt_command", return_value=Mock(stdout="ok", returncode=0), ) as mock_run_dbt, patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.get_dbt_project_dir", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.get_dbt_project_dir", return_value=str(project_dir), ): artifacts = generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) @@ -320,7 +320,7 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera mock_git_manager = Mock() with patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.gather_dbt_project_params", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.gather_dbt_project_params", return_value=DbtProjectParams( dbt_binary="/mock/dbt", dbt_env_dir="/mock/env", @@ -330,19 +330,19 @@ def test_generate_dashboard_chat_dbt_docs_artifacts_pulls_git_repo_before_genera org_project_dir=str(project_dir.parent), ), ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.prefect_service.get_dbt_cli_profile_block", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.prefect_service.get_dbt_cli_profile_block", return_value={"profile": {"dashchat": {"outputs": {"dev": {"type": "postgres"}}}}}, ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.run_dbt_command", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.run_dbt_command", return_value=Mock(stdout="ok", returncode=0), ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.DbtProjectManager.get_dbt_project_dir", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.DbtProjectManager.get_dbt_project_dir", return_value=str(project_dir), ), patch( - "ddpui.core.dashboard_chat.context.dbt_docs.secretsmanager.retrieve_github_pat", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.secretsmanager.retrieve_github_pat", return_value="actual-pat", ) as mock_retrieve_pat, patch( - "ddpui.core.dashboard_chat.context.dbt_docs.GitManager", + "ddpui.core.dashboard_chat.context.dbt_docs_artifacts.GitManager", return_value=mock_git_manager, ) as mock_git_manager_class: generate_dashboard_chat_dbt_docs_artifacts(org, orgdbt) diff --git a/ddpui/tests/core/dashboard_chat/test_vector_store.py b/ddpui/tests/core/dashboard_chat/test_vector_store.py index c86df0024..50b821dc4 100644 --- a/ddpui/tests/core/dashboard_chat/test_vector_store.py +++ b/ddpui/tests/core/dashboard_chat/test_vector_store.py @@ -6,13 +6,13 @@ from chromadb.errors import NotFoundError from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig -from ddpui.core.dashboard_chat.vector.documents import ( +from ddpui.core.dashboard_chat.vector.vector_documents import ( DashboardChatSourceType, DashboardChatVectorDocument, build_dashboard_chat_collection_name, ) from ddpui.utils.vector.backends.chroma import ChromaVectorStore -from ddpui.core.dashboard_chat.vector.store import OrgVectorStore +from ddpui.core.dashboard_chat.vector.org_vector_store import OrgVectorStore class FakeEmbeddingProvider: diff --git a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py index bb30e2c0d..275391c29 100644 --- a/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py +++ b/ddpui/tests/core/dashboard_chat/test_warehouse_tools.py @@ -6,7 +6,7 @@ import pytest -from ddpui.core.dashboard_chat.warehouse.tools import ( +from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import ( DashboardChatWarehouseTools, DashboardChatWarehouseToolsError, ) @@ -25,7 +25,7 @@ def _build_bigquery_tools(): def test_quote_bigquery_table_ref_uses_project_id_from_credentials(): """BigQuery table refs should use project_id from stored credentials, not dataset location.""" with patch( - "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.warehouse_access_tools.secretsmanager.retrieve_warehouse_credentials", return_value={"project_id": "analytics-project"}, ): tools = _build_bigquery_tools() @@ -37,7 +37,7 @@ def test_quote_bigquery_table_ref_uses_project_id_from_credentials(): def test_quote_bigquery_table_ref_reads_nested_project_id_from_credentials_json(): """credentials_json payloads should still provide the BigQuery project id.""" with patch( - "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.warehouse_access_tools.secretsmanager.retrieve_warehouse_credentials", return_value={"credentials_json": json.dumps({"project_id": "analytics-project"})}, ): tools = _build_bigquery_tools() @@ -49,7 +49,7 @@ def test_quote_bigquery_table_ref_reads_nested_project_id_from_credentials_json( def test_quote_bigquery_table_ref_requires_project_id(): """A missing project id should fail explicitly.""" with patch( - "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.warehouse_access_tools.secretsmanager.retrieve_warehouse_credentials", return_value={"dataset_location": "asia-south1"}, ): tools = _build_bigquery_tools() @@ -62,7 +62,7 @@ def test_quote_bigquery_table_ref_requires_project_id(): def test_quote_bigquery_table_ref_rejects_unsafe_identifier_components(): """BigQuery table refs should reject unsafe project/schema/table identifier text.""" with patch( - "ddpui.core.dashboard_chat.warehouse.tools.secretsmanager.retrieve_warehouse_credentials", + "ddpui.core.dashboard_chat.warehouse.warehouse_access_tools.secretsmanager.retrieve_warehouse_credentials", return_value={"project_id": "analytics-project"}, ): tools = _build_bigquery_tools() diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 777569380..b32e7c484 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -3,12 +3,12 @@ from asgiref.sync import async_to_sync -from ddpui.core.dashboard_chat.sessions.service import execute_dashboard_chat_turn +from ddpui.core.dashboard_chat.sessions.session_service import execute_dashboard_chat_turn from ddpui.core.dashboard_chat.events import ( build_dashboard_chat_event, dashboard_chat_group_name, ) -from ddpui.core.dashboard_chat.sessions.service import ( +from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message_with_status, find_dashboard_chat_assistant_reply, diff --git a/pyproject.toml b/pyproject.toml index eeae97d7e..c56a705d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dependencies = [ "django-cors-headers==3.14.0", "django-extensions==3.2.3", "django-flags==5.0.13", - "django-ninja==0.21.0", + "django-ninja==1.6.2", "django-prometheus==2.3.1", "djangorestframework==3.14.0", "djangorestframework-simplejwt>=5.5.0", @@ -119,7 +119,8 @@ dependencies = [ "kombu==5.2.4", "kubernetes>=28.1.0", "lazy-object-proxy==1.9.0", - "langgraph==0.0.69", + "langgraph==1.1.3", + "langgraph-checkpoint-postgres==3.0.5", "leather==0.3.4", "logbook==1.5.3", "mako==1.2.4", @@ -144,7 +145,7 @@ dependencies = [ "onnxruntime==1.20.1", "openai==1.55.3", "ordered-set==4.1.0", - "orjson==3.9.12", + "orjson==3.11.7", "packaging==23.2", "pandas==2.2.2", "paramiko==3.4.0", @@ -164,14 +165,16 @@ dependencies = [ "proto-plus==1.23.0", "protobuf==4.25.3", "psutil==5.9.5", - "psycopg2-binary==2.9.6", + "psycopg[binary]==3.3.3", + "psycopg2-binary==2.9.10", + "psycopg-pool==3.3.0", "ptyprocess==0.7.0", "pure-eval==0.2.2", "pyarrow==16.1.0", "pyasn1==0.6.2", "pyasn1-modules==0.4.2", "pycparser==2.21", - "pydantic==1.10.6", + "pydantic==2.12.5", "pydeck==0.9.1", "pyfiglet==0.8.post1", "pygments==2.14.0", @@ -235,7 +238,7 @@ dependencies = [ "tornado==6.3.2", "traitlets==5.9.0", "typer==0.9.0", - "typing-extensions==4.12.2", + "typing-extensions==4.14.1", "tzdata==2022.7", "tzlocal==4.3", "urllib3==2.6.3", @@ -262,6 +265,9 @@ DJANGO_SETTINGS_MODULE="ddpui.settings" testpaths = [ "ddpui/tests" ] +filterwarnings = [ + "ignore:SelectableGroups dict interface is deprecated\\. Use select\\.:DeprecationWarning:kombu\\.utils\\.compat", +] [tool.pylint.messages_control] max-line-length = 200 diff --git a/uv.lock b/uv.lock index 8134a061d..b7c906b0e 100644 --- a/uv.lock +++ b/uv.lock @@ -53,6 +53,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "anyio" version = "3.6.2" @@ -930,6 +939,7 @@ dependencies = [ { name = "kombu" }, { name = "kubernetes" }, { name = "langgraph" }, + { name = "langgraph-checkpoint-postgres" }, { name = "lazy-object-proxy" }, { name = "leather" }, { name = "logbook" }, @@ -975,6 +985,8 @@ dependencies = [ { name = "proto-plus" }, { name = "protobuf" }, { name = "psutil" }, + { name = "psycopg", extra = ["binary"] }, + { name = "psycopg-pool" }, { name = "psycopg2-binary" }, { name = "ptyprocess" }, { name = "pure-eval" }, @@ -1121,7 +1133,7 @@ requires-dist = [ { name = "django-cors-headers", specifier = "==3.14.0" }, { name = "django-extensions", specifier = "==3.2.3" }, { name = "django-flags", specifier = "==5.0.13" }, - { name = "django-ninja", specifier = "==0.21.0" }, + { name = "django-ninja", specifier = "==1.6.2" }, { name = "django-prometheus", specifier = "==2.3.1" }, { name = "djangorestframework", specifier = "==3.14.0" }, { name = "djangorestframework-simplejwt", specifier = ">=5.5.0" }, @@ -1176,7 +1188,8 @@ requires-dist = [ { name = "kiwisolver", specifier = "==1.4.5" }, { name = "kombu", specifier = "==5.2.4" }, { name = "kubernetes", specifier = ">=28.1.0" }, - { name = "langgraph", specifier = "==0.0.69" }, + { name = "langgraph", specifier = "==1.1.3" }, + { name = "langgraph-checkpoint-postgres", specifier = "==3.0.5" }, { name = "lazy-object-proxy", specifier = "==1.9.0" }, { name = "leather", specifier = "==0.3.4" }, { name = "logbook", specifier = "==1.5.3" }, @@ -1202,7 +1215,7 @@ requires-dist = [ { name = "onnxruntime", specifier = "==1.20.1" }, { name = "openai", specifier = "==1.55.3" }, { name = "ordered-set", specifier = "==4.1.0" }, - { name = "orjson", specifier = "==3.9.12" }, + { name = "orjson", specifier = "==3.11.7" }, { name = "packaging", specifier = "==23.2" }, { name = "pandas", specifier = "==2.2.2" }, { name = "paramiko", specifier = "==3.4.0" }, @@ -1222,14 +1235,16 @@ requires-dist = [ { name = "proto-plus", specifier = "==1.23.0" }, { name = "protobuf", specifier = "==4.25.3" }, { name = "psutil", specifier = "==5.9.5" }, - { name = "psycopg2-binary", specifier = "==2.9.6" }, + { name = "psycopg", extras = ["binary"], specifier = "==3.3.3" }, + { name = "psycopg-pool", specifier = "==3.3.0" }, + { name = "psycopg2-binary", specifier = "==2.9.10" }, { name = "ptyprocess", specifier = "==0.7.0" }, { name = "pure-eval", specifier = "==0.2.2" }, { name = "pyarrow", specifier = "==16.1.0" }, { name = "pyasn1", specifier = "==0.6.2" }, { name = "pyasn1-modules", specifier = "==0.4.2" }, { name = "pycparser", specifier = "==2.21" }, - { name = "pydantic", specifier = "==1.10.6" }, + { name = "pydantic", specifier = "==2.12.5" }, { name = "pydeck", specifier = "==0.9.1" }, { name = "pyfiglet", specifier = "==0.8.post1" }, { name = "pygments", specifier = "==2.14.0" }, @@ -1293,7 +1308,7 @@ requires-dist = [ { name = "tornado", specifier = "==6.3.2" }, { name = "traitlets", specifier = "==5.9.0" }, { name = "typer", specifier = "==0.9.0" }, - { name = "typing-extensions", specifier = "==4.12.2" }, + { name = "typing-extensions", specifier = "==4.14.1" }, { name = "tzdata", specifier = "==2022.7" }, { name = "tzlocal", specifier = "==4.3" }, { name = "urllib3", specifier = "==2.6.3" }, @@ -1408,15 +1423,15 @@ wheels = [ [[package]] name = "django-ninja" -version = "0.21.0" +version = "1.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/35/d8/96bc19b447e680fc716f304ab8d90d87e5f14bc5c9019d1927b985a438d2/django-ninja-0.21.0.tar.gz", hash = "sha256:3ed90fc55877408d5c42ec3d3cec8384c9a0cc7adf2cd66d6669561bed10a485", size = 3543843, upload-time = "2023-02-24T14:17:29.247Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/7c/3307e17b872f545c88314b2737a22f965785dfb5a120d739b0131d0492c3/django_ninja-1.6.2.tar.gz", hash = "sha256:d56ae5aa4791068ef4ac9a66cfdf2fc11f507413ded35abb79c51d0d52ad6412", size = 3685599, upload-time = "2026-03-18T20:06:47.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/59/23d3878e9362459a49ec87029d0d1aac79bbc5c0d6554399cd5ef083066a/django_ninja-0.21.0-py3-none-any.whl", hash = "sha256:b6ed7647212a4647682b134a6f82277ecf02e9d99960777aa75d124685682d25", size = 2280428, upload-time = "2023-02-24T14:17:26.987Z" }, + { url = "https://files.pythonhosted.org/packages/21/0c/25f72060a39632fbd2d90e9c8b6052a09cd45b0598fc06c0758d313f0052/django_ninja-1.6.2-py3-none-any.whl", hash = "sha256:20095f5900bada22ea00cf1a58af50bdb285b2354c61a9d9b47d0dc89ac462d6", size = 2374994, upload-time = "2026-03-18T20:06:45.676Z" }, ] [[package]] @@ -2376,7 +2391,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.2.2" +version = "1.2.22" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -2385,35 +2400,103 @@ dependencies = [ { name = "pydantic" }, { name = "pyyaml" }, { name = "tenacity" }, + { name = "typing-extensions" }, + { name = "uuid-utils" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e3/59/f72fd34b459e6f49ee522fcdb0b908be44b887f0e6c8679c5b85b719a60e/langchain_core-0.2.2.tar.gz", hash = "sha256:6884a87f7ac8e0d43e4d83c5f9efa95236c7bd535e22a0a51db19156875b4cd6", size = 242534, upload-time = "2024-05-29T19:56:32.908Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/a3/c4cd6827a1df46c821e7214b7f7b7a28b189e6c9b84ef15c6d629c5e3179/langchain_core-1.2.22.tar.gz", hash = "sha256:8d8f726d03d3652d403da915126626bb6250747e8ba406537d849e68b9f5d058", size = 842487, upload-time = "2026-03-24T18:48:44.9Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/e2/9d7ccae2e2b2983912d71eacf8626d7b5f186389cd82eaa2930896132feb/langchain_core-0.2.2-py3-none-any.whl", hash = "sha256:4b3b55a5f214acbcf8d6d8e322da3a9d6248d6b6f45ac1b86ab0494fd3716128", size = 309535, upload-time = "2024-05-29T19:56:30.44Z" }, + { url = "https://files.pythonhosted.org/packages/c7/a6/2ffacf0f1a3788f250e75d0b52a24896c413be11be3a6d42bcdf46fbea48/langchain_core-1.2.22-py3-none-any.whl", hash = "sha256:7e30d586b75918e828833b9ec1efc25465723566845dd652c277baf751e9c04b", size = 506829, upload-time = "2026-03-24T18:48:43.286Z" }, ] [[package]] name = "langgraph" -version = "0.0.69" +version = "1.1.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, + { name = "langgraph-prebuilt" }, + { name = "langgraph-sdk" }, + { name = "pydantic" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d2/b2/e7db624e8b0ee063ecfbf7acc09467c0836a05914a78e819dfb3744a0fac/langgraph-1.1.3.tar.gz", hash = "sha256:ee496c297a9c93b38d8560be15cbb918110f49077d83abd14976cb13ac3b3370", size = 545120, upload-time = "2026-03-18T23:42:58.24Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/f7/221cc479e95e03e260496616e5ce6fb50c1ea01472e3a5bc481a9b8a2f83/langgraph-1.1.3-py3-none-any.whl", hash = "sha256:57cd6964ebab41cbd211f222293a2352404e55f8b2312cecde05e8753739b546", size = 168149, upload-time = "2026-03-18T23:42:56.967Z" }, +] + +[[package]] +name = "langgraph-checkpoint" +version = "4.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "ormsgpack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/44/a8df45d1e8b4637e29789fa8bae1db022c953cc7ac80093cfc52e923547e/langgraph_checkpoint-4.0.1.tar.gz", hash = "sha256:b433123735df11ade28829e40ce25b9be614930cd50245ff2af60629234befd9", size = 158135, upload-time = "2026-02-27T21:06:16.092Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/4c/09a4a0c42f5d2fc38d6c4d67884788eff7fd2cfdf367fdf7033de908b4c0/langgraph_checkpoint-4.0.1-py3-none-any.whl", hash = "sha256:e3adcd7a0e0166f3b48b8cf508ce0ea366e7420b5a73aa81289888727769b034", size = 50453, upload-time = "2026-02-27T21:06:14.293Z" }, +] + +[[package]] +name = "langgraph-checkpoint-postgres" +version = "3.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langgraph-checkpoint" }, + { name = "orjson" }, + { name = "psycopg" }, + { name = "psycopg-pool" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/e1/0124a5be9fb9a8cc9fcf5156de1aa0266207bb7d7331513335800cce2480/langgraph-0.0.69.tar.gz", hash = "sha256:25391195461e66783811f29dff34f071af8f3302382b80ac2fb524c12f0bc1f2", size = 68689, upload-time = "2024-06-14T18:59:04.041Z" } +sdist = { url = "https://files.pythonhosted.org/packages/95/7a/8f439966643d32111248a225e6cb33a182d07c90de780c4dbfc1e0377832/langgraph_checkpoint_postgres-3.0.5.tar.gz", hash = "sha256:a8fd7278a63f4f849b5cbc7884a15ca8f41e7d5f7467d0a66b31e8c24492f7eb", size = 127856, upload-time = "2026-03-18T21:25:29.785Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/38/1d68efc0a1cfd022de0dce98857c3a0d3c60f67e4010e238bae4ac7b45c6/langgraph-0.0.69-py3-none-any.whl", hash = "sha256:7eb628b25ed75d717c9521d98f147424df7dbd04cf0f12bfcf2b5c3122b04137", size = 86836, upload-time = "2024-06-14T18:59:01.886Z" }, + { url = "https://files.pythonhosted.org/packages/e8/87/b0f98b33a67204bca9d5619bcd9574222f6b025cf3c125eedcec9a50ecbc/langgraph_checkpoint_postgres-3.0.5-py3-none-any.whl", hash = "sha256:86d7040a88fd70087eaafb72251d796696a0a2d856168f5c11ef620771411552", size = 42907, upload-time = "2026-03-18T21:25:28.75Z" }, +] + +[[package]] +name = "langgraph-prebuilt" +version = "1.0.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "langgraph-checkpoint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0d/06/dd61a5c2dce009d1b03b1d56f2a85b3127659fdddf5b3be5d8f1d60820fb/langgraph_prebuilt-1.0.8.tar.gz", hash = "sha256:0cd3cf5473ced8a6cd687cc5294e08d3de57529d8dd14fdc6ae4899549efcf69", size = 164442, upload-time = "2026-02-19T18:14:39.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/41/ec966424ad3f2ed3996d24079d3342c8cd6c0bd0653c12b2a917a685ec6c/langgraph_prebuilt-1.0.8-py3-none-any.whl", hash = "sha256:d16a731e591ba4470f3e313a319c7eee7dbc40895bcf15c821f985a3522a7ce0", size = 35648, upload-time = "2026-02-19T18:14:37.611Z" }, +] + +[[package]] +name = "langgraph-sdk" +version = "0.3.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "orjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/a1/012f0e0f5c9fd26f92bdc9d244756ad673c428230156ef668e6ec7c18cee/langgraph_sdk-0.3.12.tar.gz", hash = "sha256:c9c9ec22b3c0fcd352e2b8f32a815164f69446b8648ca22606329f4ff4c59a71", size = 194932, upload-time = "2026-03-18T22:15:54.592Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/4d/4f796e86b03878ab20d9b30aaed1ad459eda71a5c5b67f7cfe712f3548f2/langgraph_sdk-0.3.12-py3-none-any.whl", hash = "sha256:44323804965d6ec2a07127b3cf08a0428ea6deaeb172c2d478d5cd25540e3327", size = 95834, upload-time = "2026-03-18T22:15:53.545Z" }, ] [[package]] name = "langsmith" -version = "0.1.5" +version = "0.7.22" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "httpx" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy'" }, + { name = "packaging" }, { name = "pydantic" }, { name = "requests" }, + { name = "requests-toolbelt" }, + { name = "uuid-utils" }, + { name = "xxhash" }, + { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c7/78/05312ccd0925c914bfc58bdd0ca38da7a908c5c580964ea8000748e55255/langsmith-0.1.5.tar.gz", hash = "sha256:aa7a2861aa3d9ae563a077c622953533800466c4e2e539b0d567b84d5fd5b157", size = 58485, upload-time = "2024-02-21T01:57:59.523Z" } +sdist = { url = "https://files.pythonhosted.org/packages/be/2a/2d5e6c67396fd228670af278c4da7bd6db2b8d11deaf6f108490b6d3f561/langsmith-0.7.22.tar.gz", hash = "sha256:35bfe795d648b069958280760564632fd28ebc9921c04f3e209c0db6a6c7dc04", size = 1134923, upload-time = "2026-03-19T22:45:23.492Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/ff/a330448e7f335298bed0129a71742ec8a96c41196a3d96304ed890ff25a7/langsmith-0.1.5-py3-none-any.whl", hash = "sha256:a1811821a923d90e53bcbacdd0988c3c366aff8f4c120d8777e7af8ecda06268", size = 61031, upload-time = "2024-02-21T01:57:56.778Z" }, + { url = "https://files.pythonhosted.org/packages/1a/94/1f5d72655ab6534129540843776c40eff757387b88e798d8b3bf7e313fd4/langsmith-0.7.22-py3-none-any.whl", hash = "sha256:6e9d5148314d74e86748cb9d3898632cad0320c9323d95f70f969e5bc078eee4", size = 359927, upload-time = "2026-03-19T22:45:21.603Z" }, ] [[package]] @@ -3073,39 +3156,139 @@ wheels = [ [[package]] name = "orjson" -version = "3.9.12" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3d/27/6a821fc97a2b68705cba3158e5ddb300938500a8c2b19dc084f6d43587d4/orjson-3.9.12.tar.gz", hash = "sha256:da908d23a3b3243632b523344403b128722a5f45e278a8343c2bb67538dff0e4", size = 4821075, upload-time = "2024-01-18T17:24:19.045Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3d/c5/6c9f6084c8b8e55b3af2e05cf0c656442dc0088135f2a9d034a1ef41895c/orjson-3.9.12-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6b4e2bed7d00753c438e83b613923afdd067564ff7ed696bfe3a7b073a236e07", size = 250591, upload-time = "2024-01-18T17:19:41.656Z" }, - { url = "https://files.pythonhosted.org/packages/65/a3/f07f1ed78002a03ce4a33f285b06b81bfca188a6652cd8f563a4f60cec47/orjson-3.9.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd1b8ec63f0bf54a50b498eedeccdca23bd7b658f81c524d18e410c203189365", size = 142528, upload-time = "2024-01-18T17:22:47.683Z" }, - { url = "https://files.pythonhosted.org/packages/64/f6/cc6564986f2d9c15c5372af0b1cbf4ee67a1eac1553c151bec6d0a8f5ce6/orjson-3.9.12-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab8add018a53665042a5ae68200f1ad14c7953fa12110d12d41166f111724656", size = 130798, upload-time = "2024-01-18T17:22:50.208Z" }, - { url = "https://files.pythonhosted.org/packages/09/37/f6896df8adb9ca831423765f732203248431380afc0d6c9786647fd275a0/orjson-3.9.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12756a108875526b76e505afe6d6ba34960ac6b8c5ec2f35faf73ef161e97e07", size = 159981, upload-time = "2024-01-18T17:22:52.785Z" }, - { url = "https://files.pythonhosted.org/packages/62/a9/fc2cc6722a4abe40aec7641002755ebef89f43ac849286c6b8117ba548a9/orjson-3.9.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:890e7519c0c70296253660455f77e3a194554a3c45e42aa193cdebc76a02d82b", size = 157028, upload-time = "2024-01-18T17:22:55.313Z" }, - { url = "https://files.pythonhosted.org/packages/f9/3a/5ba53e3c6dd62860331d7d14c236bf7e85fd52983d22419a5a431fc3ddce/orjson-3.9.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d664880d7f016efbae97c725b243b33c2cbb4851ddc77f683fd1eec4a7894146", size = 139793, upload-time = "2024-01-18T17:22:57.927Z" }, - { url = "https://files.pythonhosted.org/packages/27/a5/aa5668088a0fb02d64e5d58bddcd3adbe8559d64820343ef7bdb04959839/orjson-3.9.12-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cfdaede0fa5b500314ec7b1249c7e30e871504a57004acd116be6acdda3b8ab3", size = 315448, upload-time = "2024-01-18T17:23:00.665Z" }, - { url = "https://files.pythonhosted.org/packages/73/0a/7448623e16abeb690e8da6f4184c75deb3cba756c806279a9adc20926c13/orjson-3.9.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6492ff5953011e1ba9ed1bf086835fd574bd0a3cbe252db8e15ed72a30479081", size = 309061, upload-time = "2024-01-18T17:23:03.036Z" }, - { url = "https://files.pythonhosted.org/packages/22/84/2010e461edf593e7af9876ad1cee002834b8fa9ac3576aca74c6f76a1ca5/orjson-3.9.12-cp310-none-win32.whl", hash = "sha256:29bf08e2eadb2c480fdc2e2daae58f2f013dff5d3b506edd1e02963b9ce9f8a9", size = 140905, upload-time = "2024-01-18T17:24:02.18Z" }, - { url = "https://files.pythonhosted.org/packages/67/da/28ff5de12191a2d626b52494498da33087406d33f1c504b83cb796ce71cb/orjson-3.9.12-cp310-none-win_amd64.whl", hash = "sha256:0fc156fba60d6b50743337ba09f052d8afc8b64595112996d22f5fce01ab57da", size = 134850, upload-time = "2024-01-18T17:19:12.986Z" }, - { url = "https://files.pythonhosted.org/packages/c5/a3/b8f435fd22245bbe301abb0c1c3a29c8f0e1065333491f98974f59bab3bd/orjson-3.9.12-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:2849f88a0a12b8d94579b67486cbd8f3a49e36a4cb3d3f0ab352c596078c730c", size = 250589, upload-time = "2024-01-18T17:21:41.686Z" }, - { url = "https://files.pythonhosted.org/packages/53/0a/711394924624222a0fe86553bd3a38654bf17608c89a70f452c9a32c56da/orjson-3.9.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3186b18754befa660b31c649a108a915493ea69b4fc33f624ed854ad3563ac65", size = 142531, upload-time = "2024-01-18T17:23:05.758Z" }, - { url = "https://files.pythonhosted.org/packages/9f/74/e1975f84c0899a237d29a04c33ee6fc7ee547465b8541bbbabc158d060d8/orjson-3.9.12-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbbf313c9fb9d4f6cf9c22ced4b6682230457741daeb3d7060c5d06c2e73884a", size = 130818, upload-time = "2024-01-18T17:23:07.847Z" }, - { url = "https://files.pythonhosted.org/packages/4d/5a/413aa3107ee4cd4a29c61fbef00534e9b135795f76cd2a1183b16edd16cd/orjson-3.9.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99e8cd005b3926c3db9b63d264bd05e1bf4451787cc79a048f27f5190a9a0311", size = 159983, upload-time = "2024-01-18T17:23:10.464Z" }, - { url = "https://files.pythonhosted.org/packages/c6/e0/d54d01dca09d219e1e8eb8cc97cc07e5e17cb6ab81eae82597ed6c4fb0e8/orjson-3.9.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59feb148392d9155f3bfed0a2a3209268e000c2c3c834fb8fe1a6af9392efcbf", size = 157029, upload-time = "2024-01-18T17:23:12.449Z" }, - { url = "https://files.pythonhosted.org/packages/cb/f0/506623ccfc4b2ec326c0be70a1bd2162e0e8202087062f6b05075a15045e/orjson-3.9.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4ae815a172a1f073b05b9e04273e3b23e608a0858c4e76f606d2d75fcabde0c", size = 139796, upload-time = "2024-01-18T17:23:15.029Z" }, - { url = "https://files.pythonhosted.org/packages/0d/70/032ffa99b1fb806b018dab0b59a45489f07d6e69c06ddaa13dc000b8862f/orjson-3.9.12-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed398f9a9d5a1bf55b6e362ffc80ac846af2122d14a8243a1e6510a4eabcb71e", size = 315447, upload-time = "2024-01-18T17:23:17.907Z" }, - { url = "https://files.pythonhosted.org/packages/b1/9f/f418a0d3a641fb4e4fc156b1a1e32a23adb8eb77fbcdf9f61a503d7b74ae/orjson-3.9.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d3cfb76600c5a1e6be91326b8f3b83035a370e727854a96d801c1ea08b708073", size = 309068, upload-time = "2024-01-18T17:23:20.795Z" }, - { url = "https://files.pythonhosted.org/packages/0e/00/638e36d13c69a7b4d5fafbdadccf130c72080f621d67fc1020490cdcea66/orjson-3.9.12-cp311-none-win32.whl", hash = "sha256:a2b6f5252c92bcab3b742ddb3ac195c0fa74bed4319acd74f5d54d79ef4715dc", size = 140907, upload-time = "2024-01-18T17:25:05.867Z" }, - { url = "https://files.pythonhosted.org/packages/41/c5/ac4a0f8a1ae80373f4dce810f861cb2bab4aded9ceccc510d73b966fb607/orjson-3.9.12-cp311-none-win_amd64.whl", hash = "sha256:c95488e4aa1d078ff5776b58f66bd29d628fa59adcb2047f4efd3ecb2bd41a71", size = 134853, upload-time = "2024-01-18T17:19:18.966Z" }, - { url = "https://files.pythonhosted.org/packages/0c/74/be2349ccba34fdce4f38607ce7df9a3faf64d31f49a6d8289537e8442f2d/orjson-3.9.12-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d6ce2062c4af43b92b0221ed4f445632c6bf4213f8a7da5396a122931377acd9", size = 250640, upload-time = "2024-01-18T17:21:47.642Z" }, - { url = "https://files.pythonhosted.org/packages/54/7c/e49520e76a976b98b7f0e5d34a2072418eb85b2ebcbeeba855498955f919/orjson-3.9.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:950951799967558c214cd6cceb7ceceed6f81d2c3c4135ee4a2c9c69f58aa225", size = 142469, upload-time = "2024-01-18T17:23:23.172Z" }, - { url = "https://files.pythonhosted.org/packages/4b/06/016366526c0a9409195d05401d6f42cf7614c5f2e7046dcd6cc064a772b7/orjson-3.9.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2dfaf71499d6fd4153f5c86eebb68e3ec1bf95851b030a4b55c7637a37bbdee4", size = 130826, upload-time = "2024-01-18T17:23:25.941Z" }, - { url = "https://files.pythonhosted.org/packages/d7/cb/4bb410d8825ce7171579452c01844e853c80e1437b7db40d9d503a8b4160/orjson-3.9.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:659a8d7279e46c97661839035a1a218b61957316bf0202674e944ac5cfe7ed83", size = 160021, upload-time = "2024-01-18T17:23:28.036Z" }, - { url = "https://files.pythonhosted.org/packages/e5/cb/96af73bb6b06d3cd967394a0834496e2600f44db6b7622bb540e93e4e98a/orjson-3.9.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af17fa87bccad0b7f6fd8ac8f9cbc9ee656b4552783b10b97a071337616db3e4", size = 157045, upload-time = "2024-01-18T17:23:31.265Z" }, - { url = "https://files.pythonhosted.org/packages/dc/ac/7cc0c187536b5e6fc50b15d4b601d40b4b9825a1bcaf0ed19c83b12ff90e/orjson-3.9.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd52dec9eddf4c8c74392f3fd52fa137b5f2e2bed1d9ae958d879de5f7d7cded", size = 139861, upload-time = "2024-01-18T17:23:33.578Z" }, - { url = "https://files.pythonhosted.org/packages/04/91/c817ad546b8640013627161e561ae519c95d5ccf49b7eee2f994bbb7c6c3/orjson-3.9.12-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:640e2b5d8e36b970202cfd0799d11a9a4ab46cf9212332cd642101ec952df7c8", size = 315361, upload-time = "2024-01-18T17:23:35.802Z" }, - { url = "https://files.pythonhosted.org/packages/b8/d6/1f9db09d7fcd7cf118775a34038daedb82cff14e57c7a223ae8b1725af7d/orjson-3.9.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:daa438bd8024e03bcea2c5a92cd719a663a58e223fba967296b6ab9992259dbf", size = 309184, upload-time = "2024-01-18T17:23:39.171Z" }, - { url = "https://files.pythonhosted.org/packages/71/f5/87f3728d3aff8d76e7343f9cce0ac2958bb43e1dd7222a32d9c50981d508/orjson-3.9.12-cp312-none-win_amd64.whl", hash = "sha256:1bb8f657c39ecdb924d02e809f992c9aafeb1ad70127d53fb573a6a6ab59d549", size = 134919, upload-time = "2024-01-18T17:19:45.415Z" }, +version = "3.11.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/1a/a373746fa6d0e116dd9e54371a7b54622c44d12296d5d0f3ad5e3ff33490/orjson-3.11.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a02c833f38f36546ba65a452127633afce4cf0dd7296b753d3bb54e55e5c0174", size = 229140, upload-time = "2026-02-02T15:37:06.082Z" }, + { url = "https://files.pythonhosted.org/packages/52/a2/fa129e749d500f9b183e8a3446a193818a25f60261e9ce143ad61e975208/orjson-3.11.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63c6e6738d7c3470ad01601e23376aa511e50e1f3931395b9f9c722406d1a67", size = 128670, upload-time = "2026-02-02T15:37:08.002Z" }, + { url = "https://files.pythonhosted.org/packages/08/93/1e82011cd1e0bd051ef9d35bed1aa7fb4ea1f0a055dc2c841b46b43a9ebd/orjson-3.11.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:043d3006b7d32c7e233b8cfb1f01c651013ea079e08dcef7189a29abd8befe11", size = 123832, upload-time = "2026-02-02T15:37:09.191Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d8/a26b431ef962c7d55736674dddade876822f3e33223c1f47a36879350d04/orjson-3.11.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57036b27ac8a25d81112eb0cc9835cd4833c5b16e1467816adc0015f59e870dc", size = 129171, upload-time = "2026-02-02T15:37:11.112Z" }, + { url = "https://files.pythonhosted.org/packages/a7/19/f47819b84a580f490da260c3ee9ade214cf4cf78ac9ce8c1c758f80fdfc9/orjson-3.11.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:733ae23ada68b804b222c44affed76b39e30806d38660bf1eb200520d259cc16", size = 141967, upload-time = "2026-02-02T15:37:12.282Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cd/37ece39a0777ba077fdcdbe4cccae3be8ed00290c14bf8afdc548befc260/orjson-3.11.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fdfad2093bdd08245f2e204d977facd5f871c88c4a71230d5bcbd0e43bf6222", size = 130991, upload-time = "2026-02-02T15:37:13.465Z" }, + { url = "https://files.pythonhosted.org/packages/8f/ed/f2b5d66aa9b6b5c02ff5f120efc7b38c7c4962b21e6be0f00fd99a5c348e/orjson-3.11.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cededd6738e1c153530793998e31c05086582b08315db48ab66649768f326baa", size = 133674, upload-time = "2026-02-02T15:37:14.694Z" }, + { url = "https://files.pythonhosted.org/packages/c4/6e/baa83e68d1aa09fa8c3e5b2c087d01d0a0bd45256de719ed7bc22c07052d/orjson-3.11.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:14f440c7268c8f8633d1b3d443a434bd70cb15686117ea6beff8fdc8f5917a1e", size = 138722, upload-time = "2026-02-02T15:37:16.501Z" }, + { url = "https://files.pythonhosted.org/packages/0c/47/7f8ef4963b772cd56999b535e553f7eb5cd27e9dd6c049baee6f18bfa05d/orjson-3.11.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3a2479753bbb95b0ebcf7969f562cdb9668e6d12416a35b0dda79febf89cdea2", size = 409056, upload-time = "2026-02-02T15:37:17.895Z" }, + { url = "https://files.pythonhosted.org/packages/38/eb/2df104dd2244b3618f25325a656f85cc3277f74bbd91224752410a78f3c7/orjson-3.11.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:71924496986275a737f38e3f22b4e0878882b3f7a310d2ff4dc96e812789120c", size = 144196, upload-time = "2026-02-02T15:37:19.349Z" }, + { url = "https://files.pythonhosted.org/packages/b6/2a/ee41de0aa3a6686598661eae2b4ebdff1340c65bfb17fcff8b87138aab21/orjson-3.11.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4a9eefdc70bf8bf9857f0290f973dec534ac84c35cd6a7f4083be43e7170a8f", size = 134979, upload-time = "2026-02-02T15:37:20.906Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fa/92fc5d3d402b87a8b28277a9ed35386218a6a5287c7fe5ee9b9f02c53fb2/orjson-3.11.7-cp310-cp310-win32.whl", hash = "sha256:ae9e0b37a834cef7ce8f99de6498f8fad4a2c0bf6bfc3d02abd8ed56aa15b2de", size = 127968, upload-time = "2026-02-02T15:37:23.178Z" }, + { url = "https://files.pythonhosted.org/packages/07/29/a576bf36d73d60df06904d3844a9df08e25d59eba64363aaf8ec2f9bff41/orjson-3.11.7-cp310-cp310-win_amd64.whl", hash = "sha256:d772afdb22555f0c58cfc741bdae44180122b3616faa1ecadb595cd526e4c993", size = 125128, upload-time = "2026-02-02T15:37:24.329Z" }, + { url = "https://files.pythonhosted.org/packages/37/02/da6cb01fc6087048d7f61522c327edf4250f1683a58a839fdcc435746dd5/orjson-3.11.7-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9487abc2c2086e7c8eb9a211d2ce8855bae0e92586279d0d27b341d5ad76c85c", size = 228664, upload-time = "2026-02-02T15:37:25.542Z" }, + { url = "https://files.pythonhosted.org/packages/c1/c2/5885e7a5881dba9a9af51bc564e8967225a642b3e03d089289a35054e749/orjson-3.11.7-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:79cacb0b52f6004caf92405a7e1f11e6e2de8bdf9019e4f76b44ba045125cd6b", size = 125344, upload-time = "2026-02-02T15:37:26.92Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1d/4e7688de0a92d1caf600dfd5fb70b4c5bfff51dfa61ac555072ef2d0d32a/orjson-3.11.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e85fe4698b6a56d5e2ebf7ae87544d668eb6bde1ad1226c13f44663f20ec9e", size = 128404, upload-time = "2026-02-02T15:37:28.108Z" }, + { url = "https://files.pythonhosted.org/packages/2f/b2/ec04b74ae03a125db7bd69cffd014b227b7f341e3261bf75b5eb88a1aa92/orjson-3.11.7-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b8d14b71c0b12963fe8a62aac87119f1afdf4cb88a400f61ca5ae581449efcb5", size = 123677, upload-time = "2026-02-02T15:37:30.287Z" }, + { url = "https://files.pythonhosted.org/packages/4c/69/f95bdf960605f08f827f6e3291fe243d8aa9c5c9ff017a8d7232209184c3/orjson-3.11.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91c81ef070c8f3220054115e1ef468b1c9ce8497b4e526cb9f68ab4dc0a7ac62", size = 128950, upload-time = "2026-02-02T15:37:31.595Z" }, + { url = "https://files.pythonhosted.org/packages/a4/1b/de59c57bae1d148ef298852abd31909ac3089cff370dfd4cd84cc99cbc42/orjson-3.11.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:411ebaf34d735e25e358a6d9e7978954a9c9d58cfb47bc6683cdc3964cd2f910", size = 141756, upload-time = "2026-02-02T15:37:32.985Z" }, + { url = "https://files.pythonhosted.org/packages/ee/9e/9decc59f4499f695f65c650f6cfa6cd4c37a3fbe8fa235a0a3614cb54386/orjson-3.11.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a16bcd08ab0bcdfc7e8801d9c4a9cc17e58418e4d48ddc6ded4e9e4b1a94062b", size = 130812, upload-time = "2026-02-02T15:37:34.204Z" }, + { url = "https://files.pythonhosted.org/packages/28/e6/59f932bcabd1eac44e334fe8e3281a92eacfcb450586e1f4bde0423728d8/orjson-3.11.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c0b51672e466fd7e56230ffbae7f1639e18d0ce023351fb75da21b71bc2c960", size = 133444, upload-time = "2026-02-02T15:37:35.446Z" }, + { url = "https://files.pythonhosted.org/packages/f1/36/b0f05c0eaa7ca30bc965e37e6a2956b0d67adb87a9872942d3568da846ae/orjson-3.11.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:136dcd6a2e796dfd9ffca9fc027d778567b0b7c9968d092842d3c323cef88aa8", size = 138609, upload-time = "2026-02-02T15:37:36.657Z" }, + { url = "https://files.pythonhosted.org/packages/b8/03/58ec7d302b8d86944c60c7b4b82975d5161fcce4c9bc8c6cb1d6741b6115/orjson-3.11.7-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:7ba61079379b0ae29e117db13bda5f28d939766e410d321ec1624afc6a0b0504", size = 408918, upload-time = "2026-02-02T15:37:38.076Z" }, + { url = "https://files.pythonhosted.org/packages/06/3a/868d65ef9a8b99be723bd510de491349618abd9f62c826cf206d962db295/orjson-3.11.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0527a4510c300e3b406591b0ba69b5dc50031895b0a93743526a3fc45f59d26e", size = 143998, upload-time = "2026-02-02T15:37:39.706Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c7/1e18e1c83afe3349f4f6dc9e14910f0ae5f82eac756d1412ea4018938535/orjson-3.11.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a709e881723c9b18acddcfb8ba357322491ad553e277cf467e1e7e20e2d90561", size = 134802, upload-time = "2026-02-02T15:37:41.002Z" }, + { url = "https://files.pythonhosted.org/packages/d4/0b/ccb7ee1a65b37e8eeb8b267dc953561d72370e85185e459616d4345bab34/orjson-3.11.7-cp311-cp311-win32.whl", hash = "sha256:c43b8b5bab288b6b90dac410cca7e986a4fa747a2e8f94615aea407da706980d", size = 127828, upload-time = "2026-02-02T15:37:42.241Z" }, + { url = "https://files.pythonhosted.org/packages/af/9e/55c776dffda3f381e0f07d010a4f5f3902bf48eaba1bb7684d301acd4924/orjson-3.11.7-cp311-cp311-win_amd64.whl", hash = "sha256:6543001328aa857187f905308a028935864aefe9968af3848401b6fe80dbb471", size = 124941, upload-time = "2026-02-02T15:37:43.444Z" }, + { url = "https://files.pythonhosted.org/packages/aa/8e/424a620fa7d263b880162505fb107ef5e0afaa765b5b06a88312ac291560/orjson-3.11.7-cp311-cp311-win_arm64.whl", hash = "sha256:1ee5cc7160a821dfe14f130bc8e63e7611051f964b463d9e2a3a573204446a4d", size = 126245, upload-time = "2026-02-02T15:37:45.18Z" }, + { url = "https://files.pythonhosted.org/packages/80/bf/76f4f1665f6983385938f0e2a5d7efa12a58171b8456c252f3bae8a4cf75/orjson-3.11.7-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bd03ea7606833655048dab1a00734a2875e3e86c276e1d772b2a02556f0d895f", size = 228545, upload-time = "2026-02-02T15:37:46.376Z" }, + { url = "https://files.pythonhosted.org/packages/79/53/6c72c002cb13b5a978a068add59b25a8bdf2800ac1c9c8ecdb26d6d97064/orjson-3.11.7-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:89e440ebc74ce8ab5c7bc4ce6757b4a6b1041becb127df818f6997b5c71aa60b", size = 125224, upload-time = "2026-02-02T15:37:47.697Z" }, + { url = "https://files.pythonhosted.org/packages/2c/83/10e48852865e5dd151bdfe652c06f7da484578ed02c5fca938e3632cb0b8/orjson-3.11.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ede977b5fe5ac91b1dffc0a517ca4542d2ec8a6a4ff7b2652d94f640796342a", size = 128154, upload-time = "2026-02-02T15:37:48.954Z" }, + { url = "https://files.pythonhosted.org/packages/6e/52/a66e22a2b9abaa374b4a081d410edab6d1e30024707b87eab7c734afe28d/orjson-3.11.7-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b7b1dae39230a393df353827c855a5f176271c23434cfd2db74e0e424e693e10", size = 123548, upload-time = "2026-02-02T15:37:50.187Z" }, + { url = "https://files.pythonhosted.org/packages/de/38/605d371417021359f4910c496f764c48ceb8997605f8c25bf1dfe58c0ebe/orjson-3.11.7-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed46f17096e28fb28d2975834836a639af7278aa87c84f68ab08fbe5b8bd75fa", size = 129000, upload-time = "2026-02-02T15:37:51.426Z" }, + { url = "https://files.pythonhosted.org/packages/44/98/af32e842b0ffd2335c89714d48ca4e3917b42f5d6ee5537832e069a4b3ac/orjson-3.11.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3726be79e36e526e3d9c1aceaadbfb4a04ee80a72ab47b3f3c17fefb9812e7b8", size = 141686, upload-time = "2026-02-02T15:37:52.607Z" }, + { url = "https://files.pythonhosted.org/packages/96/0b/fc793858dfa54be6feee940c1463370ece34b3c39c1ca0aa3845f5ba9892/orjson-3.11.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0724e265bc548af1dedebd9cb3d24b4e1c1e685a343be43e87ba922a5c5fff2f", size = 130812, upload-time = "2026-02-02T15:37:53.944Z" }, + { url = "https://files.pythonhosted.org/packages/dc/91/98a52415059db3f374757d0b7f0f16e3b5cd5976c90d1c2b56acaea039e6/orjson-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7745312efa9e11c17fbd3cb3097262d079da26930ae9ae7ba28fb738367cbad", size = 133440, upload-time = "2026-02-02T15:37:55.615Z" }, + { url = "https://files.pythonhosted.org/packages/dc/b6/cb540117bda61791f46381f8c26c8f93e802892830a6055748d3bb1925ab/orjson-3.11.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f904c24bdeabd4298f7a977ef14ca2a022ca921ed670b92ecd16ab6f3d01f867", size = 138386, upload-time = "2026-02-02T15:37:56.814Z" }, + { url = "https://files.pythonhosted.org/packages/63/1a/50a3201c334a7f17c231eee5f841342190723794e3b06293f26e7cf87d31/orjson-3.11.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b9fc4d0f81f394689e0814617aadc4f2ea0e8025f38c226cbf22d3b5ddbf025d", size = 408853, upload-time = "2026-02-02T15:37:58.291Z" }, + { url = "https://files.pythonhosted.org/packages/87/cd/8de1c67d0be44fdc22701e5989c0d015a2adf391498ad42c4dc589cd3013/orjson-3.11.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:849e38203e5be40b776ed2718e587faf204d184fc9a008ae441f9442320c0cab", size = 144130, upload-time = "2026-02-02T15:38:00.163Z" }, + { url = "https://files.pythonhosted.org/packages/0f/fe/d605d700c35dd55f51710d159fc54516a280923cd1b7e47508982fbb387d/orjson-3.11.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4682d1db3bcebd2b64757e0ddf9e87ae5f00d29d16c5cdf3a62f561d08cc3dd2", size = 134818, upload-time = "2026-02-02T15:38:01.507Z" }, + { url = "https://files.pythonhosted.org/packages/e4/e4/15ecc67edb3ddb3e2f46ae04475f2d294e8b60c1825fbe28a428b93b3fbd/orjson-3.11.7-cp312-cp312-win32.whl", hash = "sha256:f4f7c956b5215d949a1f65334cf9d7612dde38f20a95f2315deef167def91a6f", size = 127923, upload-time = "2026-02-02T15:38:02.75Z" }, + { url = "https://files.pythonhosted.org/packages/34/70/2e0855361f76198a3965273048c8e50a9695d88cd75811a5b46444895845/orjson-3.11.7-cp312-cp312-win_amd64.whl", hash = "sha256:bf742e149121dc5648ba0a08ea0871e87b660467ef168a3a5e53bc1fbd64bb74", size = 125007, upload-time = "2026-02-02T15:38:04.032Z" }, + { url = "https://files.pythonhosted.org/packages/68/40/c2051bd19fc467610fed469dc29e43ac65891571138f476834ca192bc290/orjson-3.11.7-cp312-cp312-win_arm64.whl", hash = "sha256:26c3b9132f783b7d7903bf1efb095fed8d4a3a85ec0d334ee8beff3d7a4749d5", size = 126089, upload-time = "2026-02-02T15:38:05.297Z" }, + { url = "https://files.pythonhosted.org/packages/89/25/6e0e52cac5aab51d7b6dcd257e855e1dec1c2060f6b28566c509b4665f62/orjson-3.11.7-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1d98b30cc1313d52d4af17d9c3d307b08389752ec5f2e5febdfada70b0f8c733", size = 228390, upload-time = "2026-02-02T15:38:06.8Z" }, + { url = "https://files.pythonhosted.org/packages/a5/29/a77f48d2fc8a05bbc529e5ff481fb43d914f9e383ea2469d4f3d51df3d00/orjson-3.11.7-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:d897e81f8d0cbd2abb82226d1860ad2e1ab3ff16d7b08c96ca00df9d45409ef4", size = 125189, upload-time = "2026-02-02T15:38:08.181Z" }, + { url = "https://files.pythonhosted.org/packages/89/25/0a16e0729a0e6a1504f9d1a13cdd365f030068aab64cec6958396b9969d7/orjson-3.11.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:814be4b49b228cfc0b3c565acf642dd7d13538f966e3ccde61f4f55be3e20785", size = 128106, upload-time = "2026-02-02T15:38:09.41Z" }, + { url = "https://files.pythonhosted.org/packages/66/da/a2e505469d60666a05ab373f1a6322eb671cb2ba3a0ccfc7d4bc97196787/orjson-3.11.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d06e5c5fed5caedd2e540d62e5b1c25e8c82431b9e577c33537e5fa4aa909539", size = 123363, upload-time = "2026-02-02T15:38:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/23/bf/ed73f88396ea35c71b38961734ea4a4746f7ca0768bf28fd551d37e48dd0/orjson-3.11.7-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31c80ce534ac4ea3739c5ee751270646cbc46e45aea7576a38ffec040b4029a1", size = 129007, upload-time = "2026-02-02T15:38:12.138Z" }, + { url = "https://files.pythonhosted.org/packages/73/3c/b05d80716f0225fc9008fbf8ab22841dcc268a626aa550561743714ce3bf/orjson-3.11.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f50979824bde13d32b4320eedd513431c921102796d86be3eee0b58e58a3ecd1", size = 141667, upload-time = "2026-02-02T15:38:13.398Z" }, + { url = "https://files.pythonhosted.org/packages/61/e8/0be9b0addd9bf86abfc938e97441dcd0375d494594b1c8ad10fe57479617/orjson-3.11.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e54f3808e2b6b945078c41aa8d9b5834b28c50843846e97807e5adb75fa9705", size = 130832, upload-time = "2026-02-02T15:38:14.698Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ec/c68e3b9021a31d9ec15a94931db1410136af862955854ed5dd7e7e4f5bff/orjson-3.11.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12b80df61aab7b98b490fe9e4879925ba666fccdfcd175252ce4d9035865ace", size = 133373, upload-time = "2026-02-02T15:38:16.109Z" }, + { url = "https://files.pythonhosted.org/packages/d2/45/f3466739aaafa570cc8e77c6dbb853c48bf56e3b43738020e2661e08b0ac/orjson-3.11.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:996b65230271f1a97026fd0e6a753f51fbc0c335d2ad0c6201f711b0da32693b", size = 138307, upload-time = "2026-02-02T15:38:17.453Z" }, + { url = "https://files.pythonhosted.org/packages/e1/84/9f7f02288da1ffb31405c1be07657afd1eecbcb4b64ee2817b6fe0f785fa/orjson-3.11.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ab49d4b2a6a1d415ddb9f37a21e02e0d5dbfe10b7870b21bf779fc21e9156157", size = 408695, upload-time = "2026-02-02T15:38:18.831Z" }, + { url = "https://files.pythonhosted.org/packages/18/07/9dd2f0c0104f1a0295ffbe912bc8d63307a539b900dd9e2c48ef7810d971/orjson-3.11.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:390a1dce0c055ddf8adb6aa94a73b45a4a7d7177b5c584b8d1c1947f2ba60fb3", size = 144099, upload-time = "2026-02-02T15:38:20.28Z" }, + { url = "https://files.pythonhosted.org/packages/a5/66/857a8e4a3292e1f7b1b202883bcdeb43a91566cf59a93f97c53b44bd6801/orjson-3.11.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1eb80451a9c351a71dfaf5b7ccc13ad065405217726b59fdbeadbcc544f9d223", size = 134806, upload-time = "2026-02-02T15:38:22.186Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5b/6ebcf3defc1aab3a338ca777214966851e92efb1f30dc7fc8285216e6d1b/orjson-3.11.7-cp313-cp313-win32.whl", hash = "sha256:7477aa6a6ec6139c5cb1cc7b214643592169a5494d200397c7fc95d740d5fcf3", size = 127914, upload-time = "2026-02-02T15:38:23.511Z" }, + { url = "https://files.pythonhosted.org/packages/00/04/c6f72daca5092e3117840a1b1e88dfc809cc1470cf0734890d0366b684a1/orjson-3.11.7-cp313-cp313-win_amd64.whl", hash = "sha256:b9f95dcdea9d4f805daa9ddf02617a89e484c6985fa03055459f90e87d7a0757", size = 124986, upload-time = "2026-02-02T15:38:24.836Z" }, + { url = "https://files.pythonhosted.org/packages/03/ba/077a0f6f1085d6b806937246860fafbd5b17f3919c70ee3f3d8d9c713f38/orjson-3.11.7-cp313-cp313-win_arm64.whl", hash = "sha256:800988273a014a0541483dc81021247d7eacb0c845a9d1a34a422bc718f41539", size = 126045, upload-time = "2026-02-02T15:38:26.216Z" }, + { url = "https://files.pythonhosted.org/packages/e9/1e/745565dca749813db9a093c5ebc4bac1a9475c64d54b95654336ac3ed961/orjson-3.11.7-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:de0a37f21d0d364954ad5de1970491d7fbd0fb1ef7417d4d56a36dc01ba0c0a0", size = 228391, upload-time = "2026-02-02T15:38:27.757Z" }, + { url = "https://files.pythonhosted.org/packages/46/19/e40f6225da4d3aa0c8dc6e5219c5e87c2063a560fe0d72a88deb59776794/orjson-3.11.7-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:c2428d358d85e8da9d37cba18b8c4047c55222007a84f97156a5b22028dfbfc0", size = 125188, upload-time = "2026-02-02T15:38:29.241Z" }, + { url = "https://files.pythonhosted.org/packages/9d/7e/c4de2babef2c0817fd1f048fd176aa48c37bec8aef53d2fa932983032cce/orjson-3.11.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c4bc6c6ac52cdaa267552544c73e486fecbd710b7ac09bc024d5a78555a22f6", size = 128097, upload-time = "2026-02-02T15:38:30.618Z" }, + { url = "https://files.pythonhosted.org/packages/eb/74/233d360632bafd2197f217eee7fb9c9d0229eac0c18128aee5b35b0014fe/orjson-3.11.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd0d68edd7dfca1b2eca9361a44ac9f24b078de3481003159929a0573f21a6bf", size = 123364, upload-time = "2026-02-02T15:38:32.363Z" }, + { url = "https://files.pythonhosted.org/packages/79/51/af79504981dd31efe20a9e360eb49c15f06df2b40e7f25a0a52d9ae888e8/orjson-3.11.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:623ad1b9548ef63886319c16fa317848e465a21513b31a6ad7b57443c3e0dcf5", size = 129076, upload-time = "2026-02-02T15:38:33.68Z" }, + { url = "https://files.pythonhosted.org/packages/67/e2/da898eb68b72304f8de05ca6715870d09d603ee98d30a27e8a9629abc64b/orjson-3.11.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6e776b998ac37c0396093d10290e60283f59cfe0fc3fccbd0ccc4bd04dd19892", size = 141705, upload-time = "2026-02-02T15:38:34.989Z" }, + { url = "https://files.pythonhosted.org/packages/c5/89/15364d92acb3d903b029e28d834edb8780c2b97404cbf7929aa6b9abdb24/orjson-3.11.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:652c6c3af76716f4a9c290371ba2e390ede06f6603edb277b481daf37f6f464e", size = 130855, upload-time = "2026-02-02T15:38:36.379Z" }, + { url = "https://files.pythonhosted.org/packages/c2/8b/ecdad52d0b38d4b8f514be603e69ccd5eacf4e7241f972e37e79792212ec/orjson-3.11.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a56df3239294ea5964adf074c54bcc4f0ccd21636049a2cf3ca9cf03b5d03cf1", size = 133386, upload-time = "2026-02-02T15:38:37.704Z" }, + { url = "https://files.pythonhosted.org/packages/b9/0e/45e1dcf10e17d0924b7c9162f87ec7b4ca79e28a0548acf6a71788d3e108/orjson-3.11.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bda117c4148e81f746655d5a3239ae9bd00cb7bc3ca178b5fc5a5997e9744183", size = 138295, upload-time = "2026-02-02T15:38:39.096Z" }, + { url = "https://files.pythonhosted.org/packages/63/d7/4d2e8b03561257af0450f2845b91fbd111d7e526ccdf737267108075e0ba/orjson-3.11.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:23d6c20517a97a9daf1d48b580fcdc6f0516c6f4b5038823426033690b4d2650", size = 408720, upload-time = "2026-02-02T15:38:40.634Z" }, + { url = "https://files.pythonhosted.org/packages/78/cf/d45343518282108b29c12a65892445fc51f9319dc3c552ceb51bb5905ed2/orjson-3.11.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8ff206156006da5b847c9304b6308a01e8cdbc8cce824e2779a5ba71c3def141", size = 144152, upload-time = "2026-02-02T15:38:42.262Z" }, + { url = "https://files.pythonhosted.org/packages/a9/3a/d6001f51a7275aacd342e77b735c71fa04125a3f93c36fee4526bc8c654e/orjson-3.11.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:962d046ee1765f74a1da723f4b33e3b228fe3a48bd307acce5021dfefe0e29b2", size = 134814, upload-time = "2026-02-02T15:38:43.627Z" }, + { url = "https://files.pythonhosted.org/packages/1d/d3/f19b47ce16820cc2c480f7f1723e17f6d411b3a295c60c8ad3aa9ff1c96a/orjson-3.11.7-cp314-cp314-win32.whl", hash = "sha256:89e13dd3f89f1c38a9c9eba5fbf7cdc2d1feca82f5f290864b4b7a6aac704576", size = 127997, upload-time = "2026-02-02T15:38:45.06Z" }, + { url = "https://files.pythonhosted.org/packages/12/df/172771902943af54bf661a8d102bdf2e7f932127968080632bda6054b62c/orjson-3.11.7-cp314-cp314-win_amd64.whl", hash = "sha256:845c3e0d8ded9c9271cd79596b9b552448b885b97110f628fb687aee2eed11c1", size = 124985, upload-time = "2026-02-02T15:38:46.388Z" }, + { url = "https://files.pythonhosted.org/packages/6f/1c/f2a8d8a1b17514660a614ce5f7aac74b934e69f5abc2700cc7ced882a009/orjson-3.11.7-cp314-cp314-win_arm64.whl", hash = "sha256:4a2e9c5be347b937a2e0203866f12bba36082e89b402ddb9e927d5822e43088d", size = 126038, upload-time = "2026-02-02T15:38:47.703Z" }, +] + +[[package]] +name = "ormsgpack" +version = "1.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/12/0c/f1761e21486942ab9bb6feaebc610fa074f7c5e496e6962dea5873348077/ormsgpack-1.12.2.tar.gz", hash = "sha256:944a2233640273bee67521795a73cf1e959538e0dfb7ac635505010455e53b33", size = 39031, upload-time = "2026-01-18T20:55:28.023Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/fa/a91f70829ebccf6387c4946e0a1a109f6ba0d6a28d65f628bedfad94b890/ormsgpack-1.12.2-cp310-cp310-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:c1429217f8f4d7fcb053523bbbac6bed5e981af0b85ba616e6df7cce53c19657", size = 378262, upload-time = "2026-01-18T20:55:22.284Z" }, + { url = "https://files.pythonhosted.org/packages/5f/62/3698a9a0c487252b5c6a91926e5654e79e665708ea61f67a8bdeceb022bf/ormsgpack-1.12.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f13034dc6c84a6280c6c33db7ac420253852ea233fc3ee27c8875f8dd651163", size = 203034, upload-time = "2026-01-18T20:55:53.324Z" }, + { url = "https://files.pythonhosted.org/packages/66/3a/f716f64edc4aec2744e817660b317e2f9bb8de372338a95a96198efa1ac1/ormsgpack-1.12.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59f5da97000c12bc2d50e988bdc8576b21f6ab4e608489879d35b2c07a8ab51a", size = 210538, upload-time = "2026-01-18T20:55:20.097Z" }, + { url = "https://files.pythonhosted.org/packages/72/30/a436be9ce27d693d4e19fa94900028067133779f09fc45776db3f689c822/ormsgpack-1.12.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e4459c3f27066beadb2b81ea48a076a417aafffff7df1d3c11c519190ed44f2", size = 212401, upload-time = "2026-01-18T20:55:46.447Z" }, + { url = "https://files.pythonhosted.org/packages/10/c5/cde98300fd33fee84ca71de4751b19aeeca675f0cf3c0ec4b043f40f3b76/ormsgpack-1.12.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a1c460655d7288407ffa09065e322a7231997c0d62ce914bf3a96ad2dc6dedd", size = 387080, upload-time = "2026-01-18T20:56:00.884Z" }, + { url = "https://files.pythonhosted.org/packages/6a/31/30bf445ef827546747c10889dd254b3d84f92b591300efe4979d792f4c41/ormsgpack-1.12.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:458e4568be13d311ef7d8877275e7ccbe06c0e01b39baaac874caaa0f46d826c", size = 482346, upload-time = "2026-01-18T20:55:39.831Z" }, + { url = "https://files.pythonhosted.org/packages/2e/f5/e1745ddf4fa246c921b5ca253636c4c700ff768d78032f79171289159f6e/ormsgpack-1.12.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8cde5eaa6c6cbc8622db71e4a23de56828e3d876aeb6460ffbcb5b8aff91093b", size = 425178, upload-time = "2026-01-18T20:55:27.106Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a2/e6532ed7716aed03dede8df2d0d0d4150710c2122647d94b474147ccd891/ormsgpack-1.12.2-cp310-cp310-win_amd64.whl", hash = "sha256:dc7a33be14c347893edbb1ceda89afbf14c467d593a5ee92c11de4f1666b4d4f", size = 117183, upload-time = "2026-01-18T20:55:55.52Z" }, + { url = "https://files.pythonhosted.org/packages/4b/08/8b68f24b18e69d92238aa8f258218e6dfeacf4381d9d07ab8df303f524a9/ormsgpack-1.12.2-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:bd5f4bf04c37888e864f08e740c5a573c4017f6fd6e99fa944c5c935fabf2dd9", size = 378266, upload-time = "2026-01-18T20:55:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/29fc13044ecb7c153523ae0a1972269fcd613650d1fa1a9cec1044c6b666/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34d5b28b3570e9fed9a5a76528fc7230c3c76333bc214798958e58e9b79cc18a", size = 203035, upload-time = "2026-01-18T20:55:30.59Z" }, + { url = "https://files.pythonhosted.org/packages/ad/c2/00169fb25dd8f9213f5e8a549dfb73e4d592009ebc85fbbcd3e1dcac575b/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3708693412c28f3538fb5a65da93787b6bbab3484f6bc6e935bfb77a62400ae5", size = 210539, upload-time = "2026-01-18T20:55:48.569Z" }, + { url = "https://files.pythonhosted.org/packages/1b/33/543627f323ff3c73091f51d6a20db28a1a33531af30873ea90c5ac95a9b5/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43013a3f3e2e902e1d05e72c0f1aeb5bedbb8e09240b51e26792a3c89267e181", size = 212401, upload-time = "2026-01-18T20:56:10.101Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5d/f70e2c3da414f46186659d24745483757bcc9adccb481a6eb93e2b729301/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7c8b1667a72cbba74f0ae7ecf3105a5e01304620ed14528b2cb4320679d2869b", size = 387082, upload-time = "2026-01-18T20:56:12.047Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d6/06e8dc920c7903e051f30934d874d4afccc9bb1c09dcaf0bc03a7de4b343/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:df6961442140193e517303d0b5d7bc2e20e69a879c2d774316125350c4a76b92", size = 482346, upload-time = "2026-01-18T20:56:05.152Z" }, + { url = "https://files.pythonhosted.org/packages/66/c4/f337ac0905eed9c393ef990c54565cd33644918e0a8031fe48c098c71dbf/ormsgpack-1.12.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c6a4c34ddef109647c769d69be65fa1de7a6022b02ad45546a69b3216573eb4a", size = 425181, upload-time = "2026-01-18T20:55:37.83Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/6d5758fabef3babdf4bbbc453738cc7de9cd3334e4c38dd5737e27b85653/ormsgpack-1.12.2-cp311-cp311-win_amd64.whl", hash = "sha256:73670ed0375ecc303858e3613f407628dd1fca18fe6ac57b7b7ce66cc7bb006c", size = 117182, upload-time = "2026-01-18T20:55:31.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/57/17a15549233c37e7fd054c48fe9207492e06b026dbd872b826a0b5f833b6/ormsgpack-1.12.2-cp311-cp311-win_arm64.whl", hash = "sha256:c2be829954434e33601ae5da328cccce3266b098927ca7a30246a0baec2ce7bd", size = 111464, upload-time = "2026-01-18T20:55:38.811Z" }, + { url = "https://files.pythonhosted.org/packages/4c/36/16c4b1921c308a92cef3bf6663226ae283395aa0ff6e154f925c32e91ff5/ormsgpack-1.12.2-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7a29d09b64b9694b588ff2f80e9826bdceb3a2b91523c5beae1fab27d5c940e7", size = 378618, upload-time = "2026-01-18T20:55:50.835Z" }, + { url = "https://files.pythonhosted.org/packages/c0/68/468de634079615abf66ed13bb5c34ff71da237213f29294363beeeca5306/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b39e629fd2e1c5b2f46f99778450b59454d1f901bc507963168985e79f09c5d", size = 203186, upload-time = "2026-01-18T20:56:11.163Z" }, + { url = "https://files.pythonhosted.org/packages/73/a9/d756e01961442688b7939bacd87ce13bfad7d26ce24f910f6028178b2cc8/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:958dcb270d30a7cb633a45ee62b9444433fa571a752d2ca484efdac07480876e", size = 210738, upload-time = "2026-01-18T20:56:09.181Z" }, + { url = "https://files.pythonhosted.org/packages/7b/ba/795b1036888542c9113269a3f5690ab53dd2258c6fb17676ac4bd44fcf94/ormsgpack-1.12.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d379d72b6c5e964851c77cfedfb386e474adee4fd39791c2c5d9efb53505cc", size = 212569, upload-time = "2026-01-18T20:56:06.135Z" }, + { url = "https://files.pythonhosted.org/packages/6c/aa/bff73c57497b9e0cba8837c7e4bcab584b1a6dbc91a5dd5526784a5030c8/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8463a3fc5f09832e67bdb0e2fda6d518dc4281b133166146a67f54c08496442e", size = 387166, upload-time = "2026-01-18T20:55:36.738Z" }, + { url = "https://files.pythonhosted.org/packages/d3/cf/f8283cba44bcb7b14f97b6274d449db276b3a86589bdb363169b51bc12de/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:eddffb77eff0bad4e67547d67a130604e7e2dfbb7b0cde0796045be4090f35c6", size = 482498, upload-time = "2026-01-18T20:55:29.626Z" }, + { url = "https://files.pythonhosted.org/packages/05/be/71e37b852d723dfcbe952ad04178c030df60d6b78eba26bfd14c9a40575e/ormsgpack-1.12.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fcd55e5f6ba0dbce624942adf9f152062135f991a0126064889f68eb850de0dd", size = 425518, upload-time = "2026-01-18T20:55:49.556Z" }, + { url = "https://files.pythonhosted.org/packages/7a/0c/9803aa883d18c7ef197213cd2cbf73ba76472a11fe100fb7dab2884edf48/ormsgpack-1.12.2-cp312-cp312-win_amd64.whl", hash = "sha256:d024b40828f1dde5654faebd0d824f9cc29ad46891f626272dd5bfd7af2333a4", size = 117462, upload-time = "2026-01-18T20:55:47.726Z" }, + { url = "https://files.pythonhosted.org/packages/c8/9e/029e898298b2cc662f10d7a15652a53e3b525b1e7f07e21fef8536a09bb8/ormsgpack-1.12.2-cp312-cp312-win_arm64.whl", hash = "sha256:da538c542bac7d1c8f3f2a937863dba36f013108ce63e55745941dda4b75dbb6", size = 111559, upload-time = "2026-01-18T20:55:54.273Z" }, + { url = "https://files.pythonhosted.org/packages/eb/29/bb0eba3288c0449efbb013e9c6f58aea79cf5cb9ee1921f8865f04c1a9d7/ormsgpack-1.12.2-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5ea60cb5f210b1cfbad8c002948d73447508e629ec375acb82910e3efa8ff355", size = 378661, upload-time = "2026-01-18T20:55:57.765Z" }, + { url = "https://files.pythonhosted.org/packages/6e/31/5efa31346affdac489acade2926989e019e8ca98129658a183e3add7af5e/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3601f19afdbea273ed70b06495e5794606a8b690a568d6c996a90d7255e51c1", size = 203194, upload-time = "2026-01-18T20:56:08.252Z" }, + { url = "https://files.pythonhosted.org/packages/eb/56/d0087278beef833187e0167f8527235ebe6f6ffc2a143e9de12a98b1ce87/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29a9f17a3dac6054c0dce7925e0f4995c727f7c41859adf9b5572180f640d172", size = 210778, upload-time = "2026-01-18T20:55:17.694Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a2/072343e1413d9443e5a252a8eb591c2d5b1bffbe5e7bfc78c069361b92eb/ormsgpack-1.12.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39c1bd2092880e413902910388be8715f70b9f15f20779d44e673033a6146f2d", size = 212592, upload-time = "2026-01-18T20:55:32.747Z" }, + { url = "https://files.pythonhosted.org/packages/a2/8b/a0da3b98a91d41187a63b02dda14267eefc2a74fcb43cc2701066cf1510e/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:50b7249244382209877deedeee838aef1542f3d0fc28b8fe71ca9d7e1896a0d7", size = 387164, upload-time = "2026-01-18T20:55:40.853Z" }, + { url = "https://files.pythonhosted.org/packages/19/bb/6d226bc4cf9fc20d8eb1d976d027a3f7c3491e8f08289a2e76abe96a65f3/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:5af04800d844451cf102a59c74a841324868d3f1625c296a06cc655c542a6685", size = 482516, upload-time = "2026-01-18T20:55:42.033Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f1/bb2c7223398543dedb3dbf8bb93aaa737b387de61c5feaad6f908841b782/ormsgpack-1.12.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cec70477d4371cd524534cd16472d8b9cc187e0e3043a8790545a9a9b296c258", size = 425539, upload-time = "2026-01-18T20:55:24.727Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e8/0fb45f57a2ada1fed374f7494c8cd55e2f88ccd0ab0a669aa3468716bf5f/ormsgpack-1.12.2-cp313-cp313-win_amd64.whl", hash = "sha256:21f4276caca5c03a818041d637e4019bc84f9d6ca8baa5ea03e5cc8bf56140e9", size = 117459, upload-time = "2026-01-18T20:55:56.876Z" }, + { url = "https://files.pythonhosted.org/packages/7a/d4/0cfeea1e960d550a131001a7f38a5132c7ae3ebde4c82af1f364ccc5d904/ormsgpack-1.12.2-cp313-cp313-win_arm64.whl", hash = "sha256:baca4b6773d20a82e36d6fd25f341064244f9f86a13dead95dd7d7f996f51709", size = 111577, upload-time = "2026-01-18T20:55:43.605Z" }, + { url = "https://files.pythonhosted.org/packages/94/16/24d18851334be09c25e87f74307c84950f18c324a4d3c0b41dabdbf19c29/ormsgpack-1.12.2-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:bc68dd5915f4acf66ff2010ee47c8906dc1cf07399b16f4089f8c71733f6e36c", size = 378717, upload-time = "2026-01-18T20:55:26.164Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a2/88b9b56f83adae8032ac6a6fa7f080c65b3baf9b6b64fd3d37bd202991d4/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46d084427b4132553940070ad95107266656cb646ea9da4975f85cb1a6676553", size = 203183, upload-time = "2026-01-18T20:55:18.815Z" }, + { url = "https://files.pythonhosted.org/packages/a9/80/43e4555963bf602e5bdc79cbc8debd8b6d5456c00d2504df9775e74b450b/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c010da16235806cf1d7bc4c96bf286bfa91c686853395a299b3ddb49499a3e13", size = 210814, upload-time = "2026-01-18T20:55:33.973Z" }, + { url = "https://files.pythonhosted.org/packages/78/e1/7cfbf28de8bca6efe7e525b329c31277d1b64ce08dcba723971c241a9d60/ormsgpack-1.12.2-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18867233df592c997154ff942a6503df274b5ac1765215bceba7a231bea2745d", size = 212634, upload-time = "2026-01-18T20:55:28.634Z" }, + { url = "https://files.pythonhosted.org/packages/95/f8/30ae5716e88d792a4e879debee195653c26ddd3964c968594ddef0a3cc7e/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b009049086ddc6b8f80c76b3955df1aa22a5fbd7673c525cd63bf91f23122ede", size = 387139, upload-time = "2026-01-18T20:56:02.013Z" }, + { url = "https://files.pythonhosted.org/packages/dc/81/aee5b18a3e3a0e52f718b37ab4b8af6fae0d9d6a65103036a90c2a8ffb5d/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:1dcc17d92b6390d4f18f937cf0b99054824a7815818012ddca925d6e01c2e49e", size = 482578, upload-time = "2026-01-18T20:55:35.117Z" }, + { url = "https://files.pythonhosted.org/packages/bd/17/71c9ba472d5d45f7546317f467a5fc941929cd68fb32796ca3d13dcbaec2/ormsgpack-1.12.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f04b5e896d510b07c0ad733d7fce2d44b260c5e6c402d272128f8941984e4285", size = 425539, upload-time = "2026-01-18T20:56:04.009Z" }, + { url = "https://files.pythonhosted.org/packages/2e/a6/ac99cd7fe77e822fed5250ff4b86fa66dd4238937dd178d2299f10b69816/ormsgpack-1.12.2-cp314-cp314-win_amd64.whl", hash = "sha256:ae3aba7eed4ca7cb79fd3436eddd29140f17ea254b91604aa1eb19bfcedb990f", size = 117493, upload-time = "2026-01-18T20:56:07.343Z" }, + { url = "https://files.pythonhosted.org/packages/3a/67/339872846a1ae4592535385a1c1f93614138566d7af094200c9c3b45d1e5/ormsgpack-1.12.2-cp314-cp314-win_arm64.whl", hash = "sha256:118576ea6006893aea811b17429bfc561b4778fad393f5f538c84af70b01260c", size = 111579, upload-time = "2026-01-18T20:55:21.161Z" }, + { url = "https://files.pythonhosted.org/packages/49/c2/6feb972dc87285ad381749d3882d8aecbde9f6ecf908dd717d33d66df095/ormsgpack-1.12.2-cp314-cp314t-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:7121b3d355d3858781dc40dafe25a32ff8a8242b9d80c692fd548a4b1f7fd3c8", size = 378721, upload-time = "2026-01-18T20:55:52.12Z" }, + { url = "https://files.pythonhosted.org/packages/a3/9a/900a6b9b413e0f8a471cf07830f9cf65939af039a362204b36bd5b581d8b/ormsgpack-1.12.2-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ee766d2e78251b7a63daf1cddfac36a73562d3ddef68cacfb41b2af64698033", size = 203170, upload-time = "2026-01-18T20:55:44.469Z" }, + { url = "https://files.pythonhosted.org/packages/87/4c/27a95466354606b256f24fad464d7c97ab62bce6cc529dd4673e1179b8fb/ormsgpack-1.12.2-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:292410a7d23de9b40444636b9b8f1e4e4b814af7f1ef476e44887e52a123f09d", size = 212816, upload-time = "2026-01-18T20:55:23.501Z" }, + { url = "https://files.pythonhosted.org/packages/73/cd/29cee6007bddf7a834e6cd6f536754c0535fcb939d384f0f37a38b1cddb8/ormsgpack-1.12.2-cp314-cp314t-win_amd64.whl", hash = "sha256:837dd316584485b72ef451d08dd3e96c4a11d12e4963aedb40e08f89685d8ec2", size = 117232, upload-time = "2026-01-18T20:55:45.448Z" }, ] [[package]] @@ -3353,36 +3536,151 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/98/2624954f83489ab13fde2b544baa337d5578c07eee304d320d9ba56e1b1f/psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30", size = 246094, upload-time = "2023-04-17T18:25:14.584Z" }, ] +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/d8/a763308a41e2ecfb6256ba0877d340c2f2b124c8b2746401863d96fa2c7a/psycopg_binary-3.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b3385b58b2fe408a13d084c14b8dcf468cd36cbbe774408250facc128f9fa75c", size = 4609758, upload-time = "2026-02-18T16:46:33.132Z" }, + { url = "https://files.pythonhosted.org/packages/6c/a9/f8a683e85400c1208685e7c895abc049dc13aa0b6ea989e6adf0a3681fe0/psycopg_binary-3.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1bef235a50a80f6aba05147002bc354559657cb6386dbd04d8e1c97d1d7cbe84", size = 4676740, upload-time = "2026-02-18T16:46:42.904Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7d/03512c4aaac8a58fc3b1221f38293aa517a1950d10ef8646c72c49addc7d/psycopg_binary-3.3.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:97c839717bf8c8df3f6d983a20949c4fb22e2a34ee172e3e427ede363feda27b", size = 5496335, upload-time = "2026-02-18T16:46:51.517Z" }, + { url = "https://files.pythonhosted.org/packages/8a/bc/23319b4b1c2c0b810d225e1b6f16efbb16150074fc0ea96bfcabdf59ee09/psycopg_binary-3.3.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:48e500cf1c0984dacf1f28ea482c3cdbb4c2288d51c336c04bc64198ab21fc51", size = 5172032, upload-time = "2026-02-18T16:47:00.878Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c8/6d61dc0a56654c558a37b2d9b2094e470aa12621305cc7935fd769122e32/psycopg_binary-3.3.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb36a08859b9432d94ea6b26ec41a2f98f83f14868c91321d0c1e11f672eeae7", size = 6763107, upload-time = "2026-02-18T16:47:11.784Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b5/e2a3c90aa1059f5b5f593379caad7be3cc3c2ce1ddfc7730e39854e174fe/psycopg_binary-3.3.3-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0dde92cfde09293fb63b3f547919ba7d73bd2654573c03502b3263dd0218e44e", size = 5006494, upload-time = "2026-02-18T16:47:17.062Z" }, + { url = "https://files.pythonhosted.org/packages/5d/3e/bf126e0a1f864e191b7f3eeea667ee2ce13d582b036255fb8b12946d1f7a/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:78c9ce98caaf82ac8484d269791c1b403d7598633e0e4e2fa1097baae244e2f1", size = 4533850, upload-time = "2026-02-18T16:47:21.673Z" }, + { url = "https://files.pythonhosted.org/packages/f4/d8/bb5e8d395deb945629aa0c65d12ab90ec3bfcbdf56be89e2a84d001864c9/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d593612758d0041cb13cb0003f7f8d3fabb7ad9319e651e78afae49b1cf5860e", size = 4223316, upload-time = "2026-02-18T16:47:25.82Z" }, + { url = "https://files.pythonhosted.org/packages/c2/70/33eef61b0f0fd41ebf93b9699f44067313a45016827f67b3c8cc41f0a7ab/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:f24e8e17035200a465c178e9ea945527ad0738118694184c450f1192a452ff25", size = 3954515, upload-time = "2026-02-18T16:47:30.434Z" }, + { url = "https://files.pythonhosted.org/packages/ea/db/27c2b3b9698e713e83e11e8540daa27516f9e90390ec21a41091cb15fcaf/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e7b607f0e14f2a4cf7e78a05ebd13df6144acfba87cb90842e70d3f125d9f53f", size = 4260274, upload-time = "2026-02-18T16:47:36.128Z" }, + { url = "https://files.pythonhosted.org/packages/a1/3b/71e5d603059bf5474215f573a3e2d357a4e95672b26e04d41674400d4862/psycopg_binary-3.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b27d3a23c79fa59557d2cc63a7e8bb4c7e022c018558eda36f9d7c4e6b99a6e0", size = 3557375, upload-time = "2026-02-18T16:47:42.799Z" }, + { url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" }, + { url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/6c5c54b815edeb30a281cfcea96dc93b3bb6be939aea022f00cab7aa1420/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dac7ee2f88b4d7bb12837989ca354c38d400eeb21bce3b73dac02622f0a3c8d6", size = 5172410, upload-time = "2026-02-18T16:48:05.665Z" }, + { url = "https://files.pythonhosted.org/packages/51/75/8206c7008b57de03c1ada46bd3110cc3743f3fd9ed52031c4601401d766d/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62cf8784eb6d35beaee1056d54caf94ec6ecf2b7552395e305518ab61eb8fd2", size = 6763408, upload-time = "2026-02-18T16:48:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5a/ea1641a1e6c8c8b3454b0fcb43c3045133a8b703e6e824fae134088e63bd/psycopg_binary-3.3.3-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a39f34c9b18e8f6794cca17bfbcd64572ca2482318db644268049f8c738f35a6", size = 5006255, upload-time = "2026-02-18T16:48:22.176Z" }, + { url = "https://files.pythonhosted.org/packages/aa/fb/538df099bf55ae1637d52d7ccb6b9620b535a40f4c733897ac2b7bb9e14c/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:883d68d48ca9ff3cb3d10c5fdebea02c79b48eecacdddbf7cce6e7cdbdc216b8", size = 4532694, upload-time = "2026-02-18T16:48:27.338Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d1/00780c0e187ea3c13dfc53bd7060654b2232cd30df562aac91a5f1c545ac/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:cab7bc3d288d37a80aa8c0820033250c95e40b1c2b5c57cf59827b19c2a8b69d", size = 4222833, upload-time = "2026-02-18T16:48:31.221Z" }, + { url = "https://files.pythonhosted.org/packages/7a/34/a07f1ff713c51d64dc9f19f2c32be80299a2055d5d109d5853662b922cb4/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:56c767007ca959ca32f796b42379fc7e1ae2ed085d29f20b05b3fc394f3715cc", size = 3952818, upload-time = "2026-02-18T16:48:35.869Z" }, + { url = "https://files.pythonhosted.org/packages/d3/67/d33f268a7759b4445f3c9b5a181039b01af8c8263c865c1be7a6444d4749/psycopg_binary-3.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:da2f331a01af232259a21573a01338530c6016dcfad74626c01330535bcd8628", size = 4258061, upload-time = "2026-02-18T16:48:41.365Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3b/0d8d2c5e8e29ccc07d28c8af38445d9d9abcd238d590186cac82ee71fc84/psycopg_binary-3.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:19f93235ece6dbfc4036b5e4f6d8b13f0b8f2b3eeb8b0bd2936d406991bcdd40", size = 3558915, upload-time = "2026-02-18T16:48:46.679Z" }, + { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" }, + { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" }, + { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" }, + { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" }, + { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" }, + { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" }, + { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" }, + { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" }, + { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" }, + { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" }, + { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" }, + { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" }, + { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" }, + { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" }, + { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" }, + { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" }, + { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" }, +] + +[[package]] +name = "psycopg-pool" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606, upload-time = "2025-12-01T11:34:33.11Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" }, +] + [[package]] name = "psycopg2-binary" -version = "2.9.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/3e/05ab0922422c91ca0ecb5939a100f8dc2b5d15f5978433beadc87c5329bf/psycopg2-binary-2.9.6.tar.gz", hash = "sha256:1f64dcfb8f6e0c014c7f55e51c9759f024f70ea572fbdef123f85318c297947c", size = 384044, upload-time = "2023-04-03T11:36:39.387Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/d9/4bf3be330a0bf0ea3dc0d0742188d095df35abfc5e31565f86f2ed2aa37c/psycopg2_binary-2.9.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d26e0342183c762de3276cca7a530d574d4e25121ca7d6e4a98e4f05cb8e4df7", size = 2184043, upload-time = "2023-04-03T11:32:34.984Z" }, - { url = "https://files.pythonhosted.org/packages/87/65/d7c77f8d8bca9d5e601366f34028e2d5702f53cdb48fcda05a5f6f14cdee/psycopg2_binary-2.9.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c48d8f2db17f27d41fb0e2ecd703ea41984ee19362cbce52c097963b3a1b4365", size = 2042608, upload-time = "2023-04-03T11:32:38.437Z" }, - { url = "https://files.pythonhosted.org/packages/5b/6f/b25708056f623e107e50c255d770dba42729f9ad1affbeba32b804b9f20d/psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe9dc0a884a8848075e576c1de0290d85a533a9f6e9c4e564f19adf8f6e54a7", size = 2846144, upload-time = "2023-04-03T11:32:42.943Z" }, - { url = "https://files.pythonhosted.org/packages/e7/a5/8c99d01debda922e5402c88c93bfbd0c860fb94adf2a5397cac1e4082b98/psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a76e027f87753f9bd1ab5f7c9cb8c7628d1077ef927f5e2446477153a602f2c", size = 3077895, upload-time = "2023-04-03T11:32:46.777Z" }, - { url = "https://files.pythonhosted.org/packages/ae/ab/31bacfb21076c8527889c7716ed5593826dc3e4ab2dbf39da283baaa7fd1/psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6460c7a99fc939b849431f1e73e013d54aa54293f30f1109019c56a0b2b2ec2f", size = 3258816, upload-time = "2023-04-03T11:32:50.591Z" }, - { url = "https://files.pythonhosted.org/packages/44/06/6de819cb8604ad884aefe8d12980f53788fc08c4de8ea3ff1b3039746449/psycopg2_binary-2.9.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae102a98c547ee2288637af07393dd33f440c25e5cd79556b04e3fca13325e5f", size = 3017867, upload-time = "2023-04-03T11:32:54.779Z" }, - { url = "https://files.pythonhosted.org/packages/e1/79/787079c90f0aa236d1e944f4486d82bda1a576bd2d9134bb4fd05c62058e/psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9972aad21f965599ed0106f65334230ce826e5ae69fda7cbd688d24fa922415e", size = 2351403, upload-time = "2023-04-03T11:32:58.499Z" }, - { url = "https://files.pythonhosted.org/packages/34/d9/835fce2b1e3986eac8dcaf291509e1e199df1be4fea48748992159a9bfbc/psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7a40c00dbe17c0af5bdd55aafd6ff6679f94a9be9513a4c7e071baf3d7d22a70", size = 2530173, upload-time = "2023-04-03T11:33:02.107Z" }, - { url = "https://files.pythonhosted.org/packages/fa/7c/d0c364f994dbc37245a67f33999704c286ed45a737b88dff24c252a942c5/psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:cacbdc5839bdff804dfebc058fe25684cae322987f7a38b0168bc1b2df703fb1", size = 2480287, upload-time = "2023-04-03T11:33:06.29Z" }, - { url = "https://files.pythonhosted.org/packages/26/8c/95a22fe085e47e6302e7d51c1b3b20fe634d3bb8ff8ebc5db15eeaf24d0f/psycopg2_binary-2.9.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7f0438fa20fb6c7e202863e0d5ab02c246d35efb1d164e052f2f3bfe2b152bd0", size = 2451233, upload-time = "2023-04-03T11:33:10.184Z" }, - { url = "https://files.pythonhosted.org/packages/81/79/92393c02ff640059ca6e883216690abc4933abef065b7689e7254fbb97ce/psycopg2_binary-2.9.6-cp310-cp310-win32.whl", hash = "sha256:b6c8288bb8a84b47e07013bb4850f50538aa913d487579e1921724631d02ea1b", size = 1022551, upload-time = "2023-04-03T11:33:12.895Z" }, - { url = "https://files.pythonhosted.org/packages/e9/e9/cc2820de0d2748937b3dcf9bd66d3277ebb9d6d8502621d946ddd0f6cf14/psycopg2_binary-2.9.6-cp310-cp310-win_amd64.whl", hash = "sha256:61b047a0537bbc3afae10f134dc6393823882eb263088c271331602b672e52e9", size = 1161670, upload-time = "2023-04-03T11:33:16.137Z" }, - { url = "https://files.pythonhosted.org/packages/0a/4a/6134e27e1deba089e45a9a328802ec04f47f74621f5e82eeab8828c83ded/psycopg2_binary-2.9.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:964b4dfb7c1c1965ac4c1978b0f755cc4bd698e8aa2b7667c575fb5f04ebe06b", size = 2184092, upload-time = "2023-04-03T11:33:21.716Z" }, - { url = "https://files.pythonhosted.org/packages/db/a1/3de02c36b5fdc7031b32b26779cba70d8f267db9f524824f577266c9d76b/psycopg2_binary-2.9.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afe64e9b8ea66866a771996f6ff14447e8082ea26e675a295ad3bdbffdd72afb", size = 2042584, upload-time = "2023-04-03T11:33:26.59Z" }, - { url = "https://files.pythonhosted.org/packages/ac/2b/e772581482eab43fd47ef1d67a657816e1c5bf97aa66b80ed59366c3fec7/psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e2ee79e7cf29582ef770de7dab3d286431b01c3bb598f8e05e09601b890081", size = 2846244, upload-time = "2023-04-03T11:33:32.17Z" }, - { url = "https://files.pythonhosted.org/packages/68/88/0ea86f9d4b845b6da22890586efeeab9ba56674474ad58d0f246e46de0a6/psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa74c903a3c1f0d9b1c7e7b53ed2d929a4910e272add6700c38f365a6002820", size = 3077851, upload-time = "2023-04-03T11:33:36.49Z" }, - { url = "https://files.pythonhosted.org/packages/6e/46/c0c556bdc793c0bb0b3af31e3d0a46e68e58ae1cbb7ede2daf2d7139137e/psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b83456c2d4979e08ff56180a76429263ea254c3f6552cd14ada95cff1dec9bb8", size = 3258810, upload-time = "2023-04-03T11:33:41.735Z" }, - { url = "https://files.pythonhosted.org/packages/67/3d/4ab532a0b91a228d42fe6f4bd62384ae852fad92e195c6f78013045eb9ba/psycopg2_binary-2.9.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0645376d399bfd64da57148694d78e1f431b1e1ee1054872a5713125681cf1be", size = 3017892, upload-time = "2023-04-03T11:33:46.333Z" }, - { url = "https://files.pythonhosted.org/packages/bc/19/64dbb3f803dae8ec6f23e833635de99db51d5d573add03c8b9b3a2dbd6d5/psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e99e34c82309dd78959ba3c1590975b5d3c862d6f279f843d47d26ff89d7d7e1", size = 2351435, upload-time = "2023-04-03T11:33:49.81Z" }, - { url = "https://files.pythonhosted.org/packages/eb/3a/6bbc247a380250b898540acc9ddfce083667f4390ce4b68a26f4a0b60ef7/psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4ea29fc3ad9d91162c52b578f211ff1c931d8a38e1f58e684c45aa470adf19e2", size = 2530195, upload-time = "2023-04-03T11:33:53.332Z" }, - { url = "https://files.pythonhosted.org/packages/5d/67/73f4829773c1c7f90cd3d635732a436f31db64cad5849a5ddd88c187568b/psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4ac30da8b4f57187dbf449294d23b808f8f53cad6b1fc3623fa8a6c11d176dd0", size = 2480278, upload-time = "2023-04-03T11:33:57.308Z" }, - { url = "https://files.pythonhosted.org/packages/ce/36/bc8eccfb702596ec1f8b696c8aa9f1533b82e044cb87b460ad4691ca666b/psycopg2_binary-2.9.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e78e6e2a00c223e164c417628572a90093c031ed724492c763721c2e0bc2a8df", size = 2451210, upload-time = "2023-04-03T11:34:00.739Z" }, - { url = "https://files.pythonhosted.org/packages/01/76/512f0a878dd900902ed818156baccaf94c05d0450534f7b4f714932e3d7e/psycopg2_binary-2.9.6-cp311-cp311-win32.whl", hash = "sha256:1876843d8e31c89c399e31b97d4b9725a3575bb9c2af92038464231ec40f9edb", size = 1021999, upload-time = "2023-04-03T11:34:04.055Z" }, - { url = "https://files.pythonhosted.org/packages/26/40/c86e30a4c7c72b76b8ab6663568667d07f654770e45f09f022bfec2c2bd5/psycopg2_binary-2.9.6-cp311-cp311-win_amd64.whl", hash = "sha256:b4b24f75d16a89cc6b4cdff0eb6a910a966ecd476d1e73f7ce5985ff1328e9a6", size = 1161680, upload-time = "2023-04-03T11:34:07.338Z" }, +version = "2.9.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764, upload-time = "2024-10-16T11:24:58.126Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/81/331257dbf2801cdb82105306042f7a1637cc752f65f2bb688188e0de5f0b/psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f", size = 3043397, upload-time = "2024-10-16T11:18:58.647Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9a/7f4f2f031010bbfe6a02b4a15c01e12eb6b9b7b358ab33229f28baadbfc1/psycopg2_binary-2.9.10-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3e9c76f0ac6f92ecfc79516a8034a544926430f7b080ec5a0537bca389ee0906", size = 3274806, upload-time = "2024-10-16T11:19:03.935Z" }, + { url = "https://files.pythonhosted.org/packages/e5/57/8ddd4b374fa811a0b0a0f49b6abad1cde9cb34df73ea3348cc283fcd70b4/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ad26b467a405c798aaa1458ba09d7e2b6e5f96b1ce0ac15d82fd9f95dc38a92", size = 2851361, upload-time = "2024-10-16T11:19:07.277Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/d1e52c20d283f1f3a8e7e5c1e06851d432f123ef57b13043b4f9b21ffa1f/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:270934a475a0e4b6925b5f804e3809dd5f90f8613621d062848dd82f9cd62007", size = 3080836, upload-time = "2024-10-16T11:19:11.033Z" }, + { url = "https://files.pythonhosted.org/packages/a0/cb/592d44a9546aba78f8a1249021fe7c59d3afb8a0ba51434d6610cc3462b6/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48b338f08d93e7be4ab2b5f1dbe69dc5e9ef07170fe1f86514422076d9c010d0", size = 3264552, upload-time = "2024-10-16T11:19:14.606Z" }, + { url = "https://files.pythonhosted.org/packages/64/33/c8548560b94b7617f203d7236d6cdf36fe1a5a3645600ada6efd79da946f/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4152f8f76d2023aac16285576a9ecd2b11a9895373a1f10fd9db54b3ff06b4", size = 3019789, upload-time = "2024-10-16T11:19:18.889Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0e/c2da0db5bea88a3be52307f88b75eec72c4de62814cbe9ee600c29c06334/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32581b3020c72d7a421009ee1c6bf4a131ef5f0a968fab2e2de0c9d2bb4577f1", size = 2871776, upload-time = "2024-10-16T11:19:23.023Z" }, + { url = "https://files.pythonhosted.org/packages/15/d7/774afa1eadb787ddf41aab52d4c62785563e29949613c958955031408ae6/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2ce3e21dc3437b1d960521eca599d57408a695a0d3c26797ea0f72e834c7ffe5", size = 2820959, upload-time = "2024-10-16T11:19:26.906Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ed/440dc3f5991a8c6172a1cde44850ead0e483a375277a1aef7cfcec00af07/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e984839e75e0b60cfe75e351db53d6db750b00de45644c5d1f7ee5d1f34a1ce5", size = 2919329, upload-time = "2024-10-16T11:19:30.027Z" }, + { url = "https://files.pythonhosted.org/packages/03/be/2cc8f4282898306732d2ae7b7378ae14e8df3c1231b53579efa056aae887/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c4745a90b78e51d9ba06e2088a2fe0c693ae19cc8cb051ccda44e8df8a6eb53", size = 2957659, upload-time = "2024-10-16T11:19:32.864Z" }, + { url = "https://files.pythonhosted.org/packages/d0/12/fb8e4f485d98c570e00dad5800e9a2349cfe0f71a767c856857160d343a5/psycopg2_binary-2.9.10-cp310-cp310-win32.whl", hash = "sha256:e5720a5d25e3b99cd0dc5c8a440570469ff82659bb09431c1439b92caf184d3b", size = 1024605, upload-time = "2024-10-16T11:19:35.462Z" }, + { url = "https://files.pythonhosted.org/packages/22/4f/217cd2471ecf45d82905dd09085e049af8de6cfdc008b6663c3226dc1c98/psycopg2_binary-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:3c18f74eb4386bf35e92ab2354a12c17e5eb4d9798e4c0ad3a00783eae7cd9f1", size = 1163817, upload-time = "2024-10-16T11:19:37.384Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8f/9feb01291d0d7a0a4c6a6bab24094135c2b59c6a81943752f632c75896d6/psycopg2_binary-2.9.10-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:04392983d0bb89a8717772a193cfaac58871321e3ec69514e1c4e0d4957b5aff", size = 3043397, upload-time = "2024-10-16T11:19:40.033Z" }, + { url = "https://files.pythonhosted.org/packages/15/30/346e4683532011561cd9c8dfeac6a8153dd96452fee0b12666058ab7893c/psycopg2_binary-2.9.10-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1a6784f0ce3fec4edc64e985865c17778514325074adf5ad8f80636cd029ef7c", size = 3274806, upload-time = "2024-10-16T11:19:43.5Z" }, + { url = "https://files.pythonhosted.org/packages/66/6e/4efebe76f76aee7ec99166b6c023ff8abdc4e183f7b70913d7c047701b79/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f86c56eeb91dc3135b3fd8a95dc7ae14c538a2f3ad77a19645cf55bab1799c", size = 2851370, upload-time = "2024-10-16T11:19:46.986Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fd/ff83313f86b50f7ca089b161b8e0a22bb3c319974096093cd50680433fdb/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b3d2491d4d78b6b14f76881905c7a8a8abcf974aad4a8a0b065273a0ed7a2cb", size = 3080780, upload-time = "2024-10-16T11:19:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e6/c4/bfadd202dcda8333a7ccafdc51c541dbdfce7c2c7cda89fa2374455d795f/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2286791ececda3a723d1910441c793be44625d86d1a4e79942751197f4d30341", size = 3264583, upload-time = "2024-10-16T11:19:54.424Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f1/09f45ac25e704ac954862581f9f9ae21303cc5ded3d0b775532b407f0e90/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:512d29bb12608891e349af6a0cccedce51677725a921c07dba6342beaf576f9a", size = 3019831, upload-time = "2024-10-16T11:19:57.762Z" }, + { url = "https://files.pythonhosted.org/packages/9e/2e/9beaea078095cc558f215e38f647c7114987d9febfc25cb2beed7c3582a5/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a507320c58903967ef7384355a4da7ff3f28132d679aeb23572753cbf2ec10b", size = 2871822, upload-time = "2024-10-16T11:20:04.693Z" }, + { url = "https://files.pythonhosted.org/packages/01/9e/ef93c5d93f3dc9fc92786ffab39e323b9aed066ba59fdc34cf85e2722271/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d4fa1079cab9018f4d0bd2db307beaa612b0d13ba73b5c6304b9fe2fb441ff7", size = 2820975, upload-time = "2024-10-16T11:20:11.401Z" }, + { url = "https://files.pythonhosted.org/packages/a5/f0/049e9631e3268fe4c5a387f6fc27e267ebe199acf1bc1bc9cbde4bd6916c/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:851485a42dbb0bdc1edcdabdb8557c09c9655dfa2ca0460ff210522e073e319e", size = 2919320, upload-time = "2024-10-16T11:20:17.959Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9a/bcb8773b88e45fb5a5ea8339e2104d82c863a3b8558fbb2aadfe66df86b3/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:35958ec9e46432d9076286dda67942ed6d968b9c3a6a2fd62b48939d1d78bf68", size = 2957617, upload-time = "2024-10-16T11:20:24.711Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6b/144336a9bf08a67d217b3af3246abb1d027095dab726f0687f01f43e8c03/psycopg2_binary-2.9.10-cp311-cp311-win32.whl", hash = "sha256:ecced182e935529727401b24d76634a357c71c9275b356efafd8a2a91ec07392", size = 1024618, upload-time = "2024-10-16T11:20:27.718Z" }, + { url = "https://files.pythonhosted.org/packages/61/69/3b3d7bd583c6d3cbe5100802efa5beacaacc86e37b653fc708bf3d6853b8/psycopg2_binary-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:ee0e8c683a7ff25d23b55b11161c2663d4b099770f6085ff0a20d4505778d6b4", size = 1163816, upload-time = "2024-10-16T11:20:30.777Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771, upload-time = "2024-10-16T11:20:35.234Z" }, + { url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336, upload-time = "2024-10-16T11:20:38.742Z" }, + { url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637, upload-time = "2024-10-16T11:20:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097, upload-time = "2024-10-16T11:20:46.185Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776, upload-time = "2024-10-16T11:20:50.879Z" }, + { url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968, upload-time = "2024-10-16T11:20:56.819Z" }, + { url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334, upload-time = "2024-10-16T11:21:02.411Z" }, + { url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722, upload-time = "2024-10-16T11:21:09.01Z" }, + { url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132, upload-time = "2024-10-16T11:21:16.339Z" }, + { url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312, upload-time = "2024-10-16T11:21:25.584Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191, upload-time = "2024-10-16T11:21:29.912Z" }, + { url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031, upload-time = "2024-10-16T11:21:34.211Z" }, + { url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699, upload-time = "2024-10-16T11:21:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245, upload-time = "2024-10-16T11:21:51.989Z" }, + { url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631, upload-time = "2024-10-16T11:21:57.584Z" }, + { url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140, upload-time = "2024-10-16T11:22:02.005Z" }, + { url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762, upload-time = "2024-10-16T11:22:06.412Z" }, + { url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967, upload-time = "2024-10-16T11:22:11.583Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326, upload-time = "2024-10-16T11:22:16.406Z" }, + { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712, upload-time = "2024-10-16T11:22:21.366Z" }, + { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155, upload-time = "2024-10-16T11:22:25.684Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356, upload-time = "2024-10-16T11:22:30.562Z" }, + { url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224, upload-time = "2025-01-04T20:09:19.234Z" }, ] [[package]] @@ -3467,28 +3765,135 @@ wheels = [ [[package]] name = "pydantic" -version = "1.10.6" +version = "2.12.5" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, { name = "typing-extensions" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/87/200171b36005368bc4c114f01cb9e8ae2a3f3325a47da8c710cc58cfd00c/pydantic-1.10.6.tar.gz", hash = "sha256:cf95adb0d1671fc38d8c43dd921ad5814a735e7d9b4d9e437c088002863854fd", size = 342198, upload-time = "2023-03-08T20:13:10.22Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/44/d7/00df4bf20b38a79e6f797bdcec7789fa31205967c70f26fa4b82a445071e/pydantic-1.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9289065611c48147c1dd1fd344e9d57ab45f1d99b0fb26c51f1cf72cd9bcd31", size = 2857004, upload-time = "2023-03-08T20:11:46.068Z" }, - { url = "https://files.pythonhosted.org/packages/35/ea/90a5f5eb0514ef204474670836e5b8110655fe9f5c11af76ecf89a54ff28/pydantic-1.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c32b6bba301490d9bb2bf5f631907803135e8085b6aa3e5fe5a770d46dd0160", size = 2531590, upload-time = "2023-03-08T20:11:48.962Z" }, - { url = "https://files.pythonhosted.org/packages/9f/ee/1199f07af1e07492803e3d91ad9a92a2ef972a7d2f1f07f12f12d56fdb18/pydantic-1.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd9b9e98068fa1068edfc9eabde70a7132017bdd4f362f8b4fd0abed79c33083", size = 3085387, upload-time = "2023-03-08T20:11:51.79Z" }, - { url = "https://files.pythonhosted.org/packages/d4/23/f707aa3dda65ec019a00f3f8c0c473d1d259109d17b978b2cdf2dabb6222/pydantic-1.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c84583b9df62522829cbc46e2b22e0ec11445625b5acd70c5681ce09c9b11c4", size = 3114234, upload-time = "2023-03-08T20:11:54.023Z" }, - { url = "https://files.pythonhosted.org/packages/55/f3/0dd8541b3e612b566080d8116d9f16edc127097200de0e60db2b4b12a419/pydantic-1.10.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b41822064585fea56d0116aa431fbd5137ce69dfe837b599e310034171996084", size = 3145405, upload-time = "2023-03-08T20:11:56.719Z" }, - { url = "https://files.pythonhosted.org/packages/74/73/ff4d55b3735e900d8f83e42a45b8ede51021653ee0e4b69ed86e84560506/pydantic-1.10.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61f1f08adfaa9cc02e0cbc94f478140385cbd52d5b3c5a657c2fceb15de8d1fb", size = 3104136, upload-time = "2023-03-08T20:11:58.779Z" }, - { url = "https://files.pythonhosted.org/packages/d2/a9/09f668d851ae70d8513337489f91a16e2eca5524c3e0405d38291d043744/pydantic-1.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:32937835e525d92c98a1512218db4eed9ddc8f4ee2a78382d77f54341972c0e7", size = 2111931, upload-time = "2023-03-08T20:12:01.455Z" }, - { url = "https://files.pythonhosted.org/packages/f5/09/3f2ad426d20d2d353432f1c76290fa3c9863e2c04e05382ccca2aeade4c3/pydantic-1.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd5c531b22928e63d0cb1868dee76123456e1de2f1cb45879e9e7a3f3f1779b", size = 2808091, upload-time = "2023-03-08T20:12:03.797Z" }, - { url = "https://files.pythonhosted.org/packages/c6/18/c2212763c05bcbf5bf7c71e92e205c61e519be4e661947ab19063eab87af/pydantic-1.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e277bd18339177daa62a294256869bbe84df1fb592be2716ec62627bb8d7c81d", size = 2473452, upload-time = "2023-03-08T20:12:06.382Z" }, - { url = "https://files.pythonhosted.org/packages/b7/fa/dc742086cdae06c6f866dce887dae7de8bf29b100c871b9fd6b989d4f501/pydantic-1.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f15277d720aa57e173954d237628a8d304896364b9de745dcb722f584812c7", size = 3051646, upload-time = "2023-03-08T20:12:09.147Z" }, - { url = "https://files.pythonhosted.org/packages/90/dc/8299d37154ffb5d5cb971fbe22abb3aa190fc79c6ff1ab47bd5265ce5466/pydantic-1.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b243b564cea2576725e77aeeda54e3e0229a168bc587d536cd69941e6797543d", size = 3084465, upload-time = "2023-03-08T20:12:11.214Z" }, - { url = "https://files.pythonhosted.org/packages/eb/a3/f24c038a5f11316ea28daa70110e7254250452ff0305662b0f53abe27c8f/pydantic-1.10.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3ce13a558b484c9ae48a6a7c184b1ba0e5588c5525482681db418268e5f86186", size = 3129004, upload-time = "2023-03-08T20:12:13.341Z" }, - { url = "https://files.pythonhosted.org/packages/10/b4/8c5c7e659c8fe2f00a4dd1c33a6afc05b43a495f79f5ca3510699575d30f/pydantic-1.10.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3ac1cd4deed871dfe0c5f63721e29debf03e2deefa41b3ed5eb5f5df287c7b70", size = 3078422, upload-time = "2023-03-08T20:12:15.567Z" }, - { url = "https://files.pythonhosted.org/packages/87/0f/cb18f4a236c10b331d67df90dcc1b5653875476beabf7890712a3b175cbd/pydantic-1.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:b1eb6610330a1dfba9ce142ada792f26bbef1255b75f538196a39e9e90388bf4", size = 2089541, upload-time = "2023-03-08T20:12:17.574Z" }, - { url = "https://files.pythonhosted.org/packages/dd/73/cc7e962d40a7c6abf7dd210d3ba78afccb17dd2fb6d9c3ef7add028ef010/pydantic-1.10.6-py3-none-any.whl", hash = "sha256:acc6783751ac9c9bc4680379edd6d286468a1dc8d7d9906cd6f1186ed682b2b0", size = 156439, upload-time = "2023-03-08T20:13:08.147Z" }, + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, + { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, + { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, + { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, + { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, + { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, + { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, + { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, + { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, + { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, + { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, + { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, + { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, + { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, + { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, + { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, + { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, + { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, ] [[package]] @@ -3968,6 +4373,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/bb/5deac77a9af870143c684ab46a7934038a53eb4aa975bc0687ed6ca2c610/requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5", size = 23892, upload-time = "2022-01-29T18:52:22.279Z" }, ] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, +] + [[package]] name = "rfc3986" version = "1.5.0" @@ -4455,11 +4872,23 @@ wheels = [ [[package]] name = "typing-extensions" -version = "4.12.2" +version = "4.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321, upload-time = "2024-06-07T18:52:15.995Z" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438, upload-time = "2024-06-07T18:52:13.582Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] [[package]] @@ -4493,6 +4922,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uuid-utils" +version = "0.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/d1/38a573f0c631c062cf42fa1f5d021d4dd3c31fb23e4376e4b56b0c9fbbed/uuid_utils-0.14.1.tar.gz", hash = "sha256:9bfc95f64af80ccf129c604fb6b8ca66c6f256451e32bc4570f760e4309c9b69", size = 22195, upload-time = "2026-02-20T22:50:38.833Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/b7/add4363039a34506a58457d96d4aa2126061df3a143eb4d042aedd6a2e76/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:93a3b5dc798a54a1feb693f2d1cb4cf08258c32ff05ae4929b5f0a2ca624a4f0", size = 604679, upload-time = "2026-02-20T22:50:27.469Z" }, + { url = "https://files.pythonhosted.org/packages/dd/84/d1d0bef50d9e66d31b2019997c741b42274d53dde2e001b7a83e9511c339/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ccd65a4b8e83af23eae5e56d88034b2fe7264f465d3e830845f10d1591b81741", size = 309346, upload-time = "2026-02-20T22:50:31.857Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ed/b6d6fd52a6636d7c3eddf97d68da50910bf17cd5ac221992506fb56cf12e/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b56b0cacd81583834820588378e432b0696186683b813058b707aedc1e16c4b1", size = 344714, upload-time = "2026-02-20T22:50:42.642Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a7/a19a1719fb626fe0b31882db36056d44fe904dc0cf15b06fdf56b2679cf7/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb3cf14de789097320a3c56bfdfdd51b1225d11d67298afbedee7e84e3837c96", size = 350914, upload-time = "2026-02-20T22:50:36.487Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fc/f6690e667fdc3bb1a73f57951f97497771c56fe23e3d302d7404be394d4f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e0854a90d67f4b0cc6e54773deb8be618f4c9bad98d3326f081423b5d14fae", size = 482609, upload-time = "2026-02-20T22:50:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/54/6e/dcd3fa031320921a12ec7b4672dea3bd1dd90ddffa363a91831ba834d559/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862", size = 345699, upload-time = "2026-02-20T22:50:46.87Z" }, + { url = "https://files.pythonhosted.org/packages/04/28/e5220204b58b44ac0047226a9d016a113fde039280cc8732d9e6da43b39f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:043fb58fde6cf1620a6c066382f04f87a8e74feb0f95a585e4ed46f5d44af57b", size = 372205, upload-time = "2026-02-20T22:50:28.438Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d9/3d2eb98af94b8dfffc82b6a33b4dfc87b0a5de2c68a28f6dde0db1f8681b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c915d53f22945e55fe0d3d3b0b87fd965a57f5fd15666fd92d6593a73b1dd297", size = 521836, upload-time = "2026-02-20T22:50:23.057Z" }, + { url = "https://files.pythonhosted.org/packages/a8/15/0eb106cc6fe182f7577bc0ab6e2f0a40be247f35c5e297dbf7bbc460bd02/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:0972488e3f9b449e83f006ead5a0e0a33ad4a13e4462e865b7c286ab7d7566a3", size = 625260, upload-time = "2026-02-20T22:50:25.949Z" }, + { url = "https://files.pythonhosted.org/packages/3c/17/f539507091334b109e7496830af2f093d9fc8082411eafd3ece58af1f8ba/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:1c238812ae0c8ffe77d8d447a32c6dfd058ea4631246b08b5a71df586ff08531", size = 587824, upload-time = "2026-02-20T22:50:35.225Z" }, + { url = "https://files.pythonhosted.org/packages/2e/c2/d37a7b2e41f153519367d4db01f0526e0d4b06f1a4a87f1c5dfca5d70a8b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:bec8f8ef627af86abf8298e7ec50926627e29b34fa907fcfbedb45aaa72bca43", size = 551407, upload-time = "2026-02-20T22:50:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/65/36/2d24b2cbe78547c6532da33fb8613debd3126eccc33a6374ab788f5e46e9/uuid_utils-0.14.1-cp39-abi3-win32.whl", hash = "sha256:b54d6aa6252d96bac1fdbc80d26ba71bad9f220b2724d692ad2f2310c22ef523", size = 183476, upload-time = "2026-02-20T22:50:32.745Z" }, + { url = "https://files.pythonhosted.org/packages/83/92/2d7e90df8b1a69ec4cff33243ce02b7a62f926ef9e2f0eca5a026889cd73/uuid_utils-0.14.1-cp39-abi3-win_amd64.whl", hash = "sha256:fc27638c2ce267a0ce3e06828aff786f91367f093c80625ee21dad0208e0f5ba", size = 187147, upload-time = "2026-02-20T22:50:45.807Z" }, + { url = "https://files.pythonhosted.org/packages/d9/26/529f4beee17e5248e37e0bc17a2761d34c0fa3b1e5729c88adb2065bae6e/uuid_utils-0.14.1-cp39-abi3-win_arm64.whl", hash = "sha256:b04cb49b42afbc4ff8dbc60cf054930afc479d6f4dd7f1ec3bbe5dbfdde06b7a", size = 188132, upload-time = "2026-02-20T22:50:41.718Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/6c64bdbf71f58ccde7919e00491812556f446a5291573af92c49a5e9aaef/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b197cd5424cf89fb019ca7f53641d05bfe34b1879614bed111c9c313b5574cd8", size = 591617, upload-time = "2026-02-20T22:50:24.532Z" }, + { url = "https://files.pythonhosted.org/packages/d0/f0/758c3b0fb0c4871c7704fef26a5bc861de4f8a68e4831669883bebe07b0f/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:12c65020ba6cb6abe1d57fcbfc2d0ea0506c67049ee031714057f5caf0f9bc9c", size = 303702, upload-time = "2026-02-20T22:50:40.687Z" }, + { url = "https://files.pythonhosted.org/packages/85/89/d91862b544c695cd58855efe3201f83894ed82fffe34500774238ab8eba7/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b5d2ad28063d422ccc2c28d46471d47b61a58de885d35113a8f18cb547e25bf", size = 337678, upload-time = "2026-02-20T22:50:39.768Z" }, + { url = "https://files.pythonhosted.org/packages/ee/6b/cf342ba8a898f1de024be0243fac67c025cad530c79ea7f89c4ce718891a/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da2234387b45fde40b0fedfee64a0ba591caeea9c48c7698ab6e2d85c7991533", size = 343711, upload-time = "2026-02-20T22:50:43.965Z" }, + { url = "https://files.pythonhosted.org/packages/b3/20/049418d094d396dfa6606b30af925cc68a6670c3b9103b23e6990f84b589/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50fffc2827348c1e48972eed3d1c698959e63f9d030aa5dd82ba451113158a62", size = 476731, upload-time = "2026-02-20T22:50:30.589Z" }, + { url = "https://files.pythonhosted.org/packages/77/a1/0857f64d53a90321e6a46a3d4cc394f50e1366132dcd2ae147f9326ca98b/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1dbe718765f70f5b7f9b7f66b6a937802941b1cc56bcf642ce0274169741e01", size = 338902, upload-time = "2026-02-20T22:50:33.927Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d0/5bf7cbf1ac138c92b9ac21066d18faf4d7e7f651047b700eb192ca4b9fdb/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:258186964039a8e36db10810c1ece879d229b01331e09e9030bc5dcabe231bd2", size = 364700, upload-time = "2026-02-20T22:50:21.732Z" }, +] + [[package]] name = "uvicorn" version = "0.21.1" @@ -4776,6 +5234,124 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/f8/e068dafbb844c1447c55b23c921f3d338cddaba4ea53187a7dd0058452d9/wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640", size = 22007, upload-time = "2023-02-27T01:58:28.469Z" }, ] +[[package]] +name = "xxhash" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/34/ee/f9f1d656ad168681bb0f6b092372c1e533c4416b8069b1896a175c46e484/xxhash-3.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:87ff03d7e35c61435976554477a7f4cd1704c3596a89a8300d5ce7fc83874a71", size = 32845, upload-time = "2025-10-02T14:33:51.573Z" }, + { url = "https://files.pythonhosted.org/packages/a3/b1/93508d9460b292c74a09b83d16750c52a0ead89c51eea9951cb97a60d959/xxhash-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f572dfd3d0e2eb1a57511831cf6341242f5a9f8298a45862d085f5b93394a27d", size = 30807, upload-time = "2025-10-02T14:33:52.964Z" }, + { url = "https://files.pythonhosted.org/packages/07/55/28c93a3662f2d200c70704efe74aab9640e824f8ce330d8d3943bf7c9b3c/xxhash-3.6.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:89952ea539566b9fed2bbd94e589672794b4286f342254fad28b149f9615fef8", size = 193786, upload-time = "2025-10-02T14:33:54.272Z" }, + { url = "https://files.pythonhosted.org/packages/c1/96/fec0be9bb4b8f5d9c57d76380a366f31a1781fb802f76fc7cda6c84893c7/xxhash-3.6.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e6f2ffb07a50b52465a1032c3cf1f4a5683f944acaca8a134a2f23674c2058", size = 212830, upload-time = "2025-10-02T14:33:55.706Z" }, + { url = "https://files.pythonhosted.org/packages/c4/a0/c706845ba77b9611f81fd2e93fad9859346b026e8445e76f8c6fd057cc6d/xxhash-3.6.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5b848ad6c16d308c3ac7ad4ba6bede80ed5df2ba8ed382f8932df63158dd4b2", size = 211606, upload-time = "2025-10-02T14:33:57.133Z" }, + { url = "https://files.pythonhosted.org/packages/67/1e/164126a2999e5045f04a69257eea946c0dc3e86541b400d4385d646b53d7/xxhash-3.6.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a034590a727b44dd8ac5914236a7b8504144447a9682586c3327e935f33ec8cc", size = 444872, upload-time = "2025-10-02T14:33:58.446Z" }, + { url = "https://files.pythonhosted.org/packages/2d/4b/55ab404c56cd70a2cf5ecfe484838865d0fea5627365c6c8ca156bd09c8f/xxhash-3.6.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a8f1972e75ebdd161d7896743122834fe87378160c20e97f8b09166213bf8cc", size = 193217, upload-time = "2025-10-02T14:33:59.724Z" }, + { url = "https://files.pythonhosted.org/packages/45/e6/52abf06bac316db33aa269091ae7311bd53cfc6f4b120ae77bac1b348091/xxhash-3.6.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ee34327b187f002a596d7b167ebc59a1b729e963ce645964bbc050d2f1b73d07", size = 210139, upload-time = "2025-10-02T14:34:02.041Z" }, + { url = "https://files.pythonhosted.org/packages/34/37/db94d490b8691236d356bc249c08819cbcef9273a1a30acf1254ff9ce157/xxhash-3.6.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:339f518c3c7a850dd033ab416ea25a692759dc7478a71131fe8869010d2b75e4", size = 197669, upload-time = "2025-10-02T14:34:03.664Z" }, + { url = "https://files.pythonhosted.org/packages/b7/36/c4f219ef4a17a4f7a64ed3569bc2b5a9c8311abdb22249ac96093625b1a4/xxhash-3.6.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:bf48889c9630542d4709192578aebbd836177c9f7a4a2778a7d6340107c65f06", size = 210018, upload-time = "2025-10-02T14:34:05.325Z" }, + { url = "https://files.pythonhosted.org/packages/fd/06/bfac889a374fc2fc439a69223d1750eed2e18a7db8514737ab630534fa08/xxhash-3.6.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5576b002a56207f640636056b4160a378fe36a58db73ae5c27a7ec8db35f71d4", size = 413058, upload-time = "2025-10-02T14:34:06.925Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d1/555d8447e0dd32ad0930a249a522bb2e289f0d08b6b16204cfa42c1f5a0c/xxhash-3.6.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af1f3278bd02814d6dedc5dec397993b549d6f16c19379721e5a1d31e132c49b", size = 190628, upload-time = "2025-10-02T14:34:08.669Z" }, + { url = "https://files.pythonhosted.org/packages/d1/15/8751330b5186cedc4ed4b597989882ea05e0408b53fa47bcb46a6125bfc6/xxhash-3.6.0-cp310-cp310-win32.whl", hash = "sha256:aed058764db109dc9052720da65fafe84873b05eb8b07e5e653597951af57c3b", size = 30577, upload-time = "2025-10-02T14:34:10.234Z" }, + { url = "https://files.pythonhosted.org/packages/bb/cc/53f87e8b5871a6eb2ff7e89c48c66093bda2be52315a8161ddc54ea550c4/xxhash-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:e82da5670f2d0d98950317f82a0e4a0197150ff19a6df2ba40399c2a3b9ae5fb", size = 31487, upload-time = "2025-10-02T14:34:11.618Z" }, + { url = "https://files.pythonhosted.org/packages/9f/00/60f9ea3bb697667a14314d7269956f58bf56bb73864f8f8d52a3c2535e9a/xxhash-3.6.0-cp310-cp310-win_arm64.whl", hash = "sha256:4a082ffff8c6ac07707fb6b671caf7c6e020c75226c561830b73d862060f281d", size = 27863, upload-time = "2025-10-02T14:34:12.619Z" }, + { url = "https://files.pythonhosted.org/packages/17/d4/cc2f0400e9154df4b9964249da78ebd72f318e35ccc425e9f403c392f22a/xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a", size = 32844, upload-time = "2025-10-02T14:34:14.037Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ec/1cc11cd13e26ea8bc3cb4af4eaadd8d46d5014aebb67be3f71fb0b68802a/xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa", size = 30809, upload-time = "2025-10-02T14:34:15.484Z" }, + { url = "https://files.pythonhosted.org/packages/04/5f/19fe357ea348d98ca22f456f75a30ac0916b51c753e1f8b2e0e6fb884cce/xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248", size = 194665, upload-time = "2025-10-02T14:34:16.541Z" }, + { url = "https://files.pythonhosted.org/packages/90/3b/d1f1a8f5442a5fd8beedae110c5af7604dc37349a8e16519c13c19a9a2de/xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62", size = 213550, upload-time = "2025-10-02T14:34:17.878Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ef/3a9b05eb527457d5db13a135a2ae1a26c80fecd624d20f3e8dcc4cb170f3/xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f", size = 212384, upload-time = "2025-10-02T14:34:19.182Z" }, + { url = "https://files.pythonhosted.org/packages/0f/18/ccc194ee698c6c623acbf0f8c2969811a8a4b6185af5e824cd27b9e4fd3e/xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e", size = 445749, upload-time = "2025-10-02T14:34:20.659Z" }, + { url = "https://files.pythonhosted.org/packages/a5/86/cf2c0321dc3940a7aa73076f4fd677a0fb3e405cb297ead7d864fd90847e/xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8", size = 193880, upload-time = "2025-10-02T14:34:22.431Z" }, + { url = "https://files.pythonhosted.org/packages/82/fb/96213c8560e6f948a1ecc9a7613f8032b19ee45f747f4fca4eb31bb6d6ed/xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0", size = 210912, upload-time = "2025-10-02T14:34:23.937Z" }, + { url = "https://files.pythonhosted.org/packages/40/aa/4395e669b0606a096d6788f40dbdf2b819d6773aa290c19e6e83cbfc312f/xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77", size = 198654, upload-time = "2025-10-02T14:34:25.644Z" }, + { url = "https://files.pythonhosted.org/packages/67/74/b044fcd6b3d89e9b1b665924d85d3f400636c23590226feb1eb09e1176ce/xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c", size = 210867, upload-time = "2025-10-02T14:34:27.203Z" }, + { url = "https://files.pythonhosted.org/packages/bc/fd/3ce73bf753b08cb19daee1eb14aa0d7fe331f8da9c02dd95316ddfe5275e/xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b", size = 414012, upload-time = "2025-10-02T14:34:28.409Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b3/5a4241309217c5c876f156b10778f3ab3af7ba7e3259e6d5f5c7d0129eb2/xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3", size = 191409, upload-time = "2025-10-02T14:34:29.696Z" }, + { url = "https://files.pythonhosted.org/packages/c0/01/99bfbc15fb9abb9a72b088c1d95219fc4782b7d01fc835bd5744d66dd0b8/xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd", size = 30574, upload-time = "2025-10-02T14:34:31.028Z" }, + { url = "https://files.pythonhosted.org/packages/65/79/9d24d7f53819fe301b231044ea362ce64e86c74f6e8c8e51320de248b3e5/xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef", size = 31481, upload-time = "2025-10-02T14:34:32.062Z" }, + { url = "https://files.pythonhosted.org/packages/30/4e/15cd0e3e8772071344eab2961ce83f6e485111fed8beb491a3f1ce100270/xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7", size = 27861, upload-time = "2025-10-02T14:34:33.555Z" }, + { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, + { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, + { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" }, + { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" }, + { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload-time = "2025-10-02T14:34:55.839Z" }, + { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload-time = "2025-10-02T14:34:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload-time = "2025-10-02T14:34:59.21Z" }, + { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload-time = "2025-10-02T14:35:00.816Z" }, + { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" }, + { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" }, + { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" }, + { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload-time = "2025-10-02T14:35:06.239Z" }, + { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload-time = "2025-10-02T14:35:07.735Z" }, + { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" }, + { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" }, + { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" }, + { url = "https://files.pythonhosted.org/packages/e9/3a/6797e0114c21d1725e2577508e24006fd7ff1d8c0c502d3b52e45c1771d8/xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799", size = 30620, upload-time = "2025-10-02T14:35:14.129Z" }, + { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542, upload-time = "2025-10-02T14:35:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/39/c5/cc01e4f6188656e56112d6a8e0dfe298a16934b8c47a247236549a3f7695/xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6", size = 27880, upload-time = "2025-10-02T14:35:16.315Z" }, + { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload-time = "2025-10-02T14:35:17.413Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload-time = "2025-10-02T14:35:18.844Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload-time = "2025-10-02T14:35:20.31Z" }, + { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload-time = "2025-10-02T14:35:21.616Z" }, + { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" }, + { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" }, + { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload-time = "2025-10-02T14:35:28.087Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload-time = "2025-10-02T14:35:29.857Z" }, + { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" }, + { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" }, + { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" }, + { url = "https://files.pythonhosted.org/packages/19/fa/0172e350361d61febcea941b0cc541d6e6c8d65d153e85f850a7b256ff8a/xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec", size = 30916, upload-time = "2025-10-02T14:35:35.107Z" }, + { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799, upload-time = "2025-10-02T14:35:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/56/15/064b197e855bfb7b343210e82490ae672f8bc7cdf3ddb02e92f64304ee8a/xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746", size = 28044, upload-time = "2025-10-02T14:35:37.195Z" }, + { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754, upload-time = "2025-10-02T14:35:38.245Z" }, + { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846, upload-time = "2025-10-02T14:35:39.6Z" }, + { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343, upload-time = "2025-10-02T14:35:40.69Z" }, + { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074, upload-time = "2025-10-02T14:35:42.29Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388, upload-time = "2025-10-02T14:35:43.929Z" }, + { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614, upload-time = "2025-10-02T14:35:45.216Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024, upload-time = "2025-10-02T14:35:46.959Z" }, + { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541, upload-time = "2025-10-02T14:35:48.301Z" }, + { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305, upload-time = "2025-10-02T14:35:49.584Z" }, + { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848, upload-time = "2025-10-02T14:35:50.877Z" }, + { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142, upload-time = "2025-10-02T14:35:52.15Z" }, + { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547, upload-time = "2025-10-02T14:35:53.547Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ee/3cf8589e06c2164ac77c3bf0aa127012801128f1feebf2a079272da5737c/xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f", size = 31214, upload-time = "2025-10-02T14:35:54.746Z" }, + { url = "https://files.pythonhosted.org/packages/02/5d/a19552fbc6ad4cb54ff953c3908bbc095f4a921bc569433d791f755186f1/xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad", size = 32290, upload-time = "2025-10-02T14:35:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/b1/11/dafa0643bc30442c887b55baf8e73353a344ee89c1901b5a5c54a6c17d39/xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679", size = 28795, upload-time = "2025-10-02T14:35:57.162Z" }, + { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955, upload-time = "2025-10-02T14:35:58.267Z" }, + { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072, upload-time = "2025-10-02T14:35:59.382Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579, upload-time = "2025-10-02T14:36:00.838Z" }, + { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854, upload-time = "2025-10-02T14:36:02.207Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965, upload-time = "2025-10-02T14:36:03.507Z" }, + { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484, upload-time = "2025-10-02T14:36:04.828Z" }, + { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162, upload-time = "2025-10-02T14:36:06.182Z" }, + { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007, upload-time = "2025-10-02T14:36:07.733Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956, upload-time = "2025-10-02T14:36:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401, upload-time = "2025-10-02T14:36:10.585Z" }, + { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083, upload-time = "2025-10-02T14:36:12.276Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913, upload-time = "2025-10-02T14:36:14.025Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/c19c42c5b3f5a4aad748a6d5b4f23df3bed7ee5445accc65a0fb3ff03953/xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119", size = 31586, upload-time = "2025-10-02T14:36:15.603Z" }, + { url = "https://files.pythonhosted.org/packages/03/d6/4cc450345be9924fd5dc8c590ceda1db5b43a0a889587b0ae81a95511360/xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f", size = 32526, upload-time = "2025-10-02T14:36:16.708Z" }, + { url = "https://files.pythonhosted.org/packages/0f/c9/7243eb3f9eaabd1a88a5a5acadf06df2d83b100c62684b7425c6a11bcaa8/xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95", size = 28898, upload-time = "2025-10-02T14:36:17.843Z" }, + { url = "https://files.pythonhosted.org/packages/93/1e/8aec23647a34a249f62e2398c42955acd9b4c6ed5cf08cbea94dc46f78d2/xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0", size = 30662, upload-time = "2025-10-02T14:37:01.743Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0b/b14510b38ba91caf43006209db846a696ceea6a847a0c9ba0a5b1adc53d6/xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296", size = 41056, upload-time = "2025-10-02T14:37:02.879Z" }, + { url = "https://files.pythonhosted.org/packages/50/55/15a7b8a56590e66ccd374bbfa3f9ffc45b810886c8c3b614e3f90bd2367c/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13", size = 36251, upload-time = "2025-10-02T14:37:04.44Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/5ac99a041a29e58e95f907876b04f7067a0242cb85b5f39e726153981503/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd", size = 32481, upload-time = "2025-10-02T14:37:05.869Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d9/8d95e906764a386a3d3b596f3c68bb63687dfca806373509f51ce8eea81f/xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d", size = 31565, upload-time = "2025-10-02T14:37:06.966Z" }, +] + [[package]] name = "zipp" version = "3.15.0" @@ -4784,3 +5360,93 @@ sdist = { url = "https://files.pythonhosted.org/packages/00/27/f0ac6b846684cecce wheels = [ { url = "https://files.pythonhosted.org/packages/5b/fa/c9e82bbe1af6266adf08afb563905eb87cab83fde00a0a08963510621047/zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556", size = 6758, upload-time = "2023-02-25T02:17:20.807Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/7a/28efd1d371f1acd037ac64ed1c5e2b41514a6cc937dd6ab6a13ab9f0702f/zstandard-0.25.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e59fdc271772f6686e01e1b3b74537259800f57e24280be3f29c8a0deb1904dd", size = 795256, upload-time = "2025-09-14T22:15:56.415Z" }, + { url = "https://files.pythonhosted.org/packages/96/34/ef34ef77f1ee38fc8e4f9775217a613b452916e633c4f1d98f31db52c4a5/zstandard-0.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4d441506e9b372386a5271c64125f72d5df6d2a8e8a2a45a0ae09b03cb781ef7", size = 640565, upload-time = "2025-09-14T22:15:58.177Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1b/4fdb2c12eb58f31f28c4d28e8dc36611dd7205df8452e63f52fb6261d13e/zstandard-0.25.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:ab85470ab54c2cb96e176f40342d9ed41e58ca5733be6a893b730e7af9c40550", size = 5345306, upload-time = "2025-09-14T22:16:00.165Z" }, + { url = "https://files.pythonhosted.org/packages/73/28/a44bdece01bca027b079f0e00be3b6bd89a4df180071da59a3dd7381665b/zstandard-0.25.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e05ab82ea7753354bb054b92e2f288afb750e6b439ff6ca78af52939ebbc476d", size = 5055561, upload-time = "2025-09-14T22:16:02.22Z" }, + { url = "https://files.pythonhosted.org/packages/e9/74/68341185a4f32b274e0fc3410d5ad0750497e1acc20bd0f5b5f64ce17785/zstandard-0.25.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:78228d8a6a1c177a96b94f7e2e8d012c55f9c760761980da16ae7546a15a8e9b", size = 5402214, upload-time = "2025-09-14T22:16:04.109Z" }, + { url = "https://files.pythonhosted.org/packages/8b/67/f92e64e748fd6aaffe01e2b75a083c0c4fd27abe1c8747fee4555fcee7dd/zstandard-0.25.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:2b6bd67528ee8b5c5f10255735abc21aa106931f0dbaf297c7be0c886353c3d0", size = 5449703, upload-time = "2025-09-14T22:16:06.312Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e5/6d36f92a197c3c17729a2125e29c169f460538a7d939a27eaaa6dcfcba8e/zstandard-0.25.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4b6d83057e713ff235a12e73916b6d356e3084fd3d14ced499d84240f3eecee0", size = 5556583, upload-time = "2025-09-14T22:16:08.457Z" }, + { url = "https://files.pythonhosted.org/packages/d7/83/41939e60d8d7ebfe2b747be022d0806953799140a702b90ffe214d557638/zstandard-0.25.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9174f4ed06f790a6869b41cba05b43eeb9a35f8993c4422ab853b705e8112bbd", size = 5045332, upload-time = "2025-09-14T22:16:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/b3/87/d3ee185e3d1aa0133399893697ae91f221fda79deb61adbe998a7235c43f/zstandard-0.25.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:25f8f3cd45087d089aef5ba3848cd9efe3ad41163d3400862fb42f81a3a46701", size = 5572283, upload-time = "2025-09-14T22:16:12.128Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1d/58635ae6104df96671076ac7d4ae7816838ce7debd94aecf83e30b7121b0/zstandard-0.25.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3756b3e9da9b83da1796f8809dd57cb024f838b9eeafde28f3cb472012797ac1", size = 4959754, upload-time = "2025-09-14T22:16:14.225Z" }, + { url = "https://files.pythonhosted.org/packages/75/d6/57e9cb0a9983e9a229dd8fd2e6e96593ef2aa82a3907188436f22b111ccd/zstandard-0.25.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:81dad8d145d8fd981b2962b686b2241d3a1ea07733e76a2f15435dfb7fb60150", size = 5266477, upload-time = "2025-09-14T22:16:16.343Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a9/ee891e5edf33a6ebce0a028726f0bbd8567effe20fe3d5808c42323e8542/zstandard-0.25.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a5a419712cf88862a45a23def0ae063686db3d324cec7edbe40509d1a79a0aab", size = 5440914, upload-time = "2025-09-14T22:16:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/58/08/a8522c28c08031a9521f27abc6f78dbdee7312a7463dd2cfc658b813323b/zstandard-0.25.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e7360eae90809efd19b886e59a09dad07da4ca9ba096752e61a2e03c8aca188e", size = 5819847, upload-time = "2025-09-14T22:16:20.559Z" }, + { url = "https://files.pythonhosted.org/packages/6f/11/4c91411805c3f7b6f31c60e78ce347ca48f6f16d552fc659af6ec3b73202/zstandard-0.25.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:75ffc32a569fb049499e63ce68c743155477610532da1eb38e7f24bf7cd29e74", size = 5363131, upload-time = "2025-09-14T22:16:22.206Z" }, + { url = "https://files.pythonhosted.org/packages/ef/d6/8c4bd38a3b24c4c7676a7a3d8de85d6ee7a983602a734b9f9cdefb04a5d6/zstandard-0.25.0-cp310-cp310-win32.whl", hash = "sha256:106281ae350e494f4ac8a80470e66d1fe27e497052c8d9c3b95dc4cf1ade81aa", size = 436469, upload-time = "2025-09-14T22:16:25.002Z" }, + { url = "https://files.pythonhosted.org/packages/93/90/96d50ad417a8ace5f841b3228e93d1bb13e6ad356737f42e2dde30d8bd68/zstandard-0.25.0-cp310-cp310-win_amd64.whl", hash = "sha256:ea9d54cc3d8064260114a0bbf3479fc4a98b21dffc89b3459edd506b69262f6e", size = 506100, upload-time = "2025-09-14T22:16:23.569Z" }, + { url = "https://files.pythonhosted.org/packages/2a/83/c3ca27c363d104980f1c9cee1101cc8ba724ac8c28a033ede6aab89585b1/zstandard-0.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:933b65d7680ea337180733cf9e87293cc5500cc0eb3fc8769f4d3c88d724ec5c", size = 795254, upload-time = "2025-09-14T22:16:26.137Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4d/e66465c5411a7cf4866aeadc7d108081d8ceba9bc7abe6b14aa21c671ec3/zstandard-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3f79487c687b1fc69f19e487cd949bf3aae653d181dfb5fde3bf6d18894706f", size = 640559, upload-time = "2025-09-14T22:16:27.973Z" }, + { url = "https://files.pythonhosted.org/packages/12/56/354fe655905f290d3b147b33fe946b0f27e791e4b50a5f004c802cb3eb7b/zstandard-0.25.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:0bbc9a0c65ce0eea3c34a691e3c4b6889f5f3909ba4822ab385fab9057099431", size = 5348020, upload-time = "2025-09-14T22:16:29.523Z" }, + { url = "https://files.pythonhosted.org/packages/3b/13/2b7ed68bd85e69a2069bcc72141d378f22cae5a0f3b353a2c8f50ef30c1b/zstandard-0.25.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01582723b3ccd6939ab7b3a78622c573799d5d8737b534b86d0e06ac18dbde4a", size = 5058126, upload-time = "2025-09-14T22:16:31.811Z" }, + { url = "https://files.pythonhosted.org/packages/c9/dd/fdaf0674f4b10d92cb120ccff58bbb6626bf8368f00ebfd2a41ba4a0dc99/zstandard-0.25.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5f1ad7bf88535edcf30038f6919abe087f606f62c00a87d7e33e7fc57cb69fcc", size = 5405390, upload-time = "2025-09-14T22:16:33.486Z" }, + { url = "https://files.pythonhosted.org/packages/0f/67/354d1555575bc2490435f90d67ca4dd65238ff2f119f30f72d5cde09c2ad/zstandard-0.25.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:06acb75eebeedb77b69048031282737717a63e71e4ae3f77cc0c3b9508320df6", size = 5452914, upload-time = "2025-09-14T22:16:35.277Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/e9cfd801a3f9190bf3e759c422bbfd2247db9d7f3d54a56ecde70137791a/zstandard-0.25.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9300d02ea7c6506f00e627e287e0492a5eb0371ec1670ae852fefffa6164b072", size = 5559635, upload-time = "2025-09-14T22:16:37.141Z" }, + { url = "https://files.pythonhosted.org/packages/21/88/5ba550f797ca953a52d708c8e4f380959e7e3280af029e38fbf47b55916e/zstandard-0.25.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfd06b1c5584b657a2892a6014c2f4c20e0db0208c159148fa78c65f7e0b0277", size = 5048277, upload-time = "2025-09-14T22:16:38.807Z" }, + { url = "https://files.pythonhosted.org/packages/46/c0/ca3e533b4fa03112facbe7fbe7779cb1ebec215688e5df576fe5429172e0/zstandard-0.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313", size = 5574377, upload-time = "2025-09-14T22:16:40.523Z" }, + { url = "https://files.pythonhosted.org/packages/12/9b/3fb626390113f272abd0799fd677ea33d5fc3ec185e62e6be534493c4b60/zstandard-0.25.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6c0e5a65158a7946e7a7affa6418878ef97ab66636f13353b8502d7ea03c8097", size = 4961493, upload-time = "2025-09-14T22:16:43.3Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d3/23094a6b6a4b1343b27ae68249daa17ae0651fcfec9ed4de09d14b940285/zstandard-0.25.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c8e167d5adf59476fa3e37bee730890e389410c354771a62e3c076c86f9f7778", size = 5269018, upload-time = "2025-09-14T22:16:45.292Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a7/bb5a0c1c0f3f4b5e9d5b55198e39de91e04ba7c205cc46fcb0f95f0383c1/zstandard-0.25.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:98750a309eb2f020da61e727de7d7ba3c57c97cf6213f6f6277bb7fb42a8e065", size = 5443672, upload-time = "2025-09-14T22:16:47.076Z" }, + { url = "https://files.pythonhosted.org/packages/27/22/503347aa08d073993f25109c36c8d9f029c7d5949198050962cb568dfa5e/zstandard-0.25.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22a086cff1b6ceca18a8dd6096ec631e430e93a8e70a9ca5efa7561a00f826fa", size = 5822753, upload-time = "2025-09-14T22:16:49.316Z" }, + { url = "https://files.pythonhosted.org/packages/e2/be/94267dc6ee64f0f8ba2b2ae7c7a2df934a816baaa7291db9e1aa77394c3c/zstandard-0.25.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72d35d7aa0bba323965da807a462b0966c91608ef3a48ba761678cb20ce5d8b7", size = 5366047, upload-time = "2025-09-14T22:16:51.328Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a3/732893eab0a3a7aecff8b99052fecf9f605cf0fb5fb6d0290e36beee47a4/zstandard-0.25.0-cp311-cp311-win32.whl", hash = "sha256:f5aeea11ded7320a84dcdd62a3d95b5186834224a9e55b92ccae35d21a8b63d4", size = 436484, upload-time = "2025-09-14T22:16:55.005Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/c6155f5c1cce691cb80dfd38627046e50af3ee9ddc5d0b45b9b063bfb8c9/zstandard-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:daab68faadb847063d0c56f361a289c4f268706b598afbf9ad113cbe5c38b6b2", size = 506183, upload-time = "2025-09-14T22:16:52.753Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/8945ab86a0820cc0e0cdbf38086a92868a9172020fdab8a03ac19662b0e5/zstandard-0.25.0-cp311-cp311-win_arm64.whl", hash = "sha256:22a06c5df3751bb7dc67406f5374734ccee8ed37fc5981bf1ad7041831fa1137", size = 462533, upload-time = "2025-09-14T22:16:53.878Z" }, + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" }, + { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" }, + { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" }, + { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" }, + { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" }, + { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" }, + { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" }, + { url = "https://files.pythonhosted.org/packages/c2/38/f249a2050ad1eea0bb364046153942e34abba95dd5520af199aed86fbb49/zstandard-0.25.0-cp314-cp314-win32.whl", hash = "sha256:da469dc041701583e34de852d8634703550348d5822e66a0c827d39b05365b12", size = 444513, upload-time = "2025-09-14T22:18:20.61Z" }, + { url = "https://files.pythonhosted.org/packages/3a/43/241f9615bcf8ba8903b3f0432da069e857fc4fd1783bd26183db53c4804b/zstandard-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:c19bcdd826e95671065f8692b5a4aa95c52dc7a02a4c5a0cac46deb879a017a2", size = 516118, upload-time = "2025-09-14T22:18:17.849Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ef/da163ce2450ed4febf6467d77ccb4cd52c4c30ab45624bad26ca0a27260c/zstandard-0.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:d7541afd73985c630bafcd6338d2518ae96060075f9463d7dc14cfb33514383d", size = 476940, upload-time = "2025-09-14T22:18:19.088Z" }, +] From 856cdfab831a6d6e7d195e976e8dc88841374847 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 15:27:35 +0530 Subject: [PATCH 30/49] updates --- .../orchestration/checkpoints.py | 41 ++++++------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/ddpui/core/dashboard_chat/orchestration/checkpoints.py b/ddpui/core/dashboard_chat/orchestration/checkpoints.py index 2a77a341e..ac4dcd7c9 100644 --- a/ddpui/core/dashboard_chat/orchestration/checkpoints.py +++ b/ddpui/core/dashboard_chat/orchestration/checkpoints.py @@ -1,8 +1,6 @@ """Official LangGraph Postgres checkpoint wiring for dashboard chat.""" -from dataclasses import dataclass from functools import lru_cache -import os from django.conf import settings from psycopg import Connection @@ -12,38 +10,23 @@ from langgraph.checkpoint.postgres import PostgresSaver -@dataclass(frozen=True) -class DashboardChatCheckpointConfig: - """Configuration for dashboard chat LangGraph checkpoint persistence.""" - - conninfo: str - - @classmethod - def from_env(cls) -> "DashboardChatCheckpointConfig": - """Build checkpoint configuration from env or Django DB settings.""" - conninfo = os.getenv("AI_DASHBOARD_CHAT_LANGGRAPH_CHECKPOINT_CONNINFO") - if conninfo: - return cls(conninfo=conninfo) - - default_db = settings.DATABASES["default"] - return cls( - conninfo=make_conninfo( - dbname=default_db.get("NAME") or "", - user=default_db.get("USER") or "", - password=default_db.get("PASSWORD") or "", - host=default_db.get("HOST") or "", - port=str(default_db.get("PORT") or ""), - ) - ) +def _conninfo_from_django() -> str: + default_db = settings.DATABASES["default"] + return make_conninfo( + dbname=default_db.get("NAME") or "", + user=default_db.get("USER") or "", + password=default_db.get("PASSWORD") or "", + host=default_db.get("HOST") or "", + port=str(default_db.get("PORT") or ""), + ) class DashboardChatCheckpointer: """Long-lived Postgres saver wrapper used by the shared dashboard chat runtime.""" - def __init__(self, config: DashboardChatCheckpointConfig): - self.config = config + def __init__(self): self.connection = Connection.connect( - config.conninfo, + _conninfo_from_django(), autocommit=True, prepare_threshold=0, row_factory=dict_row, @@ -61,7 +44,7 @@ def close(self) -> None: @lru_cache(maxsize=1) def get_dashboard_chat_checkpointer() -> DashboardChatCheckpointer: """Return the shared checkpoint wrapper for dashboard chat runtime persistence.""" - return DashboardChatCheckpointer(DashboardChatCheckpointConfig.from_env()) + return DashboardChatCheckpointer() def reset_dashboard_chat_checkpointer() -> None: From f26f3eea3741f064e10a2abf0a8d633ad200925b Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 16:19:25 +0530 Subject: [PATCH 31/49] updates --- ddpui/core/dashboard_chat/events.py | 57 --------------- ddpui/websockets/dashboard_chat_consumer.py | 77 +++++++++------------ 2 files changed, 33 insertions(+), 101 deletions(-) delete mode 100644 ddpui/core/dashboard_chat/events.py diff --git a/ddpui/core/dashboard_chat/events.py b/ddpui/core/dashboard_chat/events.py deleted file mode 100644 index 655776c88..000000000 --- a/ddpui/core/dashboard_chat/events.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Websocket event helpers for dashboard chat.""" - -from asgiref.sync import async_to_sync -from channels.layers import get_channel_layer -from django.utils import timezone - -from ddpui.utils.custom_logger import CustomLogger - -logger = CustomLogger("ddpui") - - -def dashboard_chat_group_name(session_id: str) -> str: - """Return the channel-layer group name for a dashboard chat session.""" - return f"dashboard_chat_{session_id}" - - -def build_dashboard_chat_event( - *, - event_type: str, - dashboard_id: int, - data: dict, - session_id: str | None = None, - message_id: str | None = None, -) -> dict: - """Build a dashboard chat websocket event envelope.""" - event = { - "event_type": event_type, - "dashboard_id": dashboard_id, - "occurred_at": timezone.now().isoformat(), - "data": data, - } - if session_id is not None: - event["session_id"] = session_id - if message_id is not None: - event["message_id"] = message_id - return event - - -def publish_dashboard_chat_event(session_id: str, event: dict) -> None: - """Publish a dashboard chat event to the session channel-layer group.""" - channel_layer = get_channel_layer() - if channel_layer is None: - return - try: - async_to_sync(channel_layer.group_send)( - dashboard_chat_group_name(session_id), - { - "type": "dashboard_chat_event", - "event": event, - }, - ) - except Exception: - logger.exception( - "failed to publish dashboard chat event for session=%s event_type=%s", - session_id, - event.get("event_type"), - ) diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index b32e7c484..60b7db64b 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -1,13 +1,7 @@ import json from urllib.parse import parse_qs -from asgiref.sync import async_to_sync - from ddpui.core.dashboard_chat.sessions.session_service import execute_dashboard_chat_turn -from ddpui.core.dashboard_chat.events import ( - build_dashboard_chat_event, - dashboard_chat_group_name, -) from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message_with_status, @@ -15,6 +9,8 @@ get_or_create_dashboard_chat_session, serialize_dashboard_chat_message, ) +from django.utils import timezone + from ddpui.models.dashboard import Dashboard from ddpui.models.org_preferences import OrgPreferences from ddpui.models.role_based_access import RolePermission @@ -26,6 +22,27 @@ logger = CustomLogger("ddpui") +def build_dashboard_chat_event( + *, + event_type: str, + dashboard_id: int, + data: dict, + session_id: str | None = None, + message_id: str | None = None, +) -> dict: + event = { + "event_type": event_type, + "dashboard_id": dashboard_id, + "occurred_at": timezone.now().isoformat(), + "data": data, + } + if session_id is not None: + event["session_id"] = session_id + if message_id is not None: + event["message_id"] = message_id + return event + + class DashboardChatConsumer(BaseConsumer): """Authenticated websocket for dashboard-level chat.""" @@ -33,7 +50,6 @@ def connect(self): query_string = parse_qs(self.scope["query_string"].decode()) token = query_string.get("token", [None])[0] orgslug = query_string.get("orgslug", [None])[0] - self.joined_session_groups = set() if not self.authenticate_user(token, orgslug): self.close() @@ -82,12 +98,13 @@ def websocket_receive(self, message): ) return - available, unavailable_message = self._chat_available() - if not available: + try: + self._assert_chat_available() + except Exception as error: self.respond( WebsocketResponse( data={}, - message=unavailable_message, + message=str(error), status=WebsocketResponseStatus.ERROR, ) ) @@ -115,7 +132,6 @@ def websocket_receive(self, message): client_message_id=payload.get("client_message_id"), ) user_message = user_message_result.message - self._subscribe_to_session(str(session.session_id)) if not user_message_result.created: assistant_message = find_dashboard_chat_assistant_reply( @@ -216,50 +232,23 @@ def websocket_receive(self, message): ) ) - def websocket_disconnect(self, message): - """Remove the socket from any joined session groups on disconnect.""" - if getattr(self, "channel_layer", None) is None: - return - for group_name in getattr(self, "joined_session_groups", set()): - async_to_sync(self.channel_layer.group_discard)(group_name, self.channel_name) - - def dashboard_chat_event(self, event): - """Forward dashboard chat events from the channel layer to the browser.""" - self.respond( - WebsocketResponse( - data=event["event"], - message="", - status=WebsocketResponseStatus.SUCCESS, - ) - ) - - def _subscribe_to_session(self, session_id: str) -> None: - """Join the session-scoped channel-layer group if not already subscribed.""" - group_name = dashboard_chat_group_name(session_id) - if group_name in self.joined_session_groups: - return - async_to_sync(self.channel_layer.group_add)(group_name, self.channel_name) - self.joined_session_groups.add(group_name) - - def _chat_available(self) -> tuple[bool, str]: - """Return whether the current org is ready for dashboard chat.""" + def _assert_chat_available(self) -> None: + """Raise Exception if the org is not ready for dashboard chat.""" feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get( "AI_DASHBOARD_CHAT", False ) if not feature_enabled: - return False, "Chat with dashboards is not enabled for this organization" + raise Exception("Chat with dashboards is not enabled for this organization") org_preferences = OrgPreferences.objects.filter(org=self.orguser.org).first() if org_preferences is None or not org_preferences.ai_data_sharing_enabled: - return False, "Chat with dashboards is not enabled for this organization" + raise Exception("Chat with dashboards is not enabled for this organization") if self.orguser.org.dbt is None: - return False, "Chat with dashboards is not available because dbt is not configured" + raise Exception("Chat with dashboards is not available because dbt is not configured") if self.orguser.org.dbt.vector_last_ingested_at is None: - return False, "Chat with dashboards is still being prepared for this organization" - - return True, "" + raise Exception("Chat with dashboards is still being prepared for this organization") def _has_permission(self, permission_slug: str) -> bool: """Check the authenticated orguser's role permission directly.""" From b7ee7eac9a7356cd10c408576d774a1108983289 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 17:15:41 +0530 Subject: [PATCH 32/49] cleanup --- .../sessions/session_service.py | 27 +-- ddpui/websockets/dashboard_chat_consumer.py | 193 ++++++------------ 2 files changed, 68 insertions(+), 152 deletions(-) diff --git a/ddpui/core/dashboard_chat/sessions/session_service.py b/ddpui/core/dashboard_chat/sessions/session_service.py index 2dcbdc5fb..ed7e54cf5 100644 --- a/ddpui/core/dashboard_chat/sessions/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -176,8 +176,12 @@ def find_dashboard_chat_assistant_reply( ) -def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: - """Load session and message, run the runtime, persist and return the assistant reply.""" +def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> DashboardChatMessage: + """Load session and message, run the runtime, persist and return the assistant reply. + + Returns the assistant DashboardChatMessage on success. + Raises Exception if the session or user message cannot be found. + """ from ddpui.core.dashboard_chat.orchestration.orchestrator import get_dashboard_chat_runtime session = ( @@ -186,7 +190,7 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: .first() ) if session is None or session.dashboard is None: - return {"status": "skipped_missing_session", "session": None, "user_message": None} + raise Exception("Chat session could not be found") user_message = DashboardChatMessage.objects.filter( id=user_message_id, @@ -194,19 +198,15 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: role="user", ).first() if user_message is None: - return {"status": "skipped_missing_message", "session": session, "user_message": None} + raise Exception("Chat message could not be found") + # Safety net: if an assistant reply already exists, return it without re-running. existing_assistant_message = find_dashboard_chat_assistant_reply( session=session, user_message=user_message, ) if existing_assistant_message is not None: - return { - "status": "skipped_existing_reply", - "session": session, - "user_message": user_message, - "assistant_message": existing_assistant_message, - } + return existing_assistant_message response = get_dashboard_chat_runtime().run( org=session.org, @@ -236,12 +236,7 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> dict: ) assistant_message.response_latency_ms = response_latency_ms assistant_message.save(update_fields=["response_latency_ms"]) - return { - "status": "completed", - "session": session, - "user_message": user_message, - "assistant_message": assistant_message, - } + return assistant_message def _create_dashboard_chat_message( diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 60b7db64b..52ffe720b 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -1,17 +1,18 @@ import json from urllib.parse import parse_qs -from ddpui.core.dashboard_chat.sessions.session_service import execute_dashboard_chat_turn +from django.utils import timezone + from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message_with_status, + execute_dashboard_chat_turn, find_dashboard_chat_assistant_reply, get_or_create_dashboard_chat_session, serialize_dashboard_chat_message, ) -from django.utils import timezone - from ddpui.models.dashboard import Dashboard +from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org_preferences import OrgPreferences from ddpui.models.role_based_access import RolePermission from ddpui.utils.custom_logger import CustomLogger @@ -22,27 +23,6 @@ logger = CustomLogger("ddpui") -def build_dashboard_chat_event( - *, - event_type: str, - dashboard_id: int, - data: dict, - session_id: str | None = None, - message_id: str | None = None, -) -> dict: - event = { - "event_type": event_type, - "dashboard_id": dashboard_id, - "occurred_at": timezone.now().isoformat(), - "data": data, - } - if session_id is not None: - event["session_id"] = session_id - if message_id is not None: - event["message_id"] = message_id - return event - - class DashboardChatConsumer(BaseConsumer): """Authenticated websocket for dashboard-level chat.""" @@ -68,46 +48,22 @@ def websocket_receive(self, message): try: payload = json.loads(message["text"]) except (KeyError, ValueError): - self.respond( - WebsocketResponse( - data={}, - message="Invalid websocket payload", - status=WebsocketResponseStatus.ERROR, - ) - ) + self._send_error("Invalid websocket payload") return if payload.get("action") != "send_message": - self.respond( - WebsocketResponse( - data={}, - message="Unsupported websocket action", - status=WebsocketResponseStatus.ERROR, - ) - ) + self._send_error("Unsupported websocket action") return raw_message = str(payload.get("message") or "").strip() if not raw_message: - self.respond( - WebsocketResponse( - data={}, - message="Message is required", - status=WebsocketResponseStatus.ERROR, - ) - ) + self._send_error("Message is required") return try: self._assert_chat_available() except Exception as error: - self.respond( - WebsocketResponse( - data={}, - message=str(error), - status=WebsocketResponseStatus.ERROR, - ) - ) + self._send_error(str(error)) return try: @@ -117,13 +73,7 @@ def websocket_receive(self, message): session_id=payload.get("session_id"), ) except DashboardChatSessionError as error: - self.respond( - WebsocketResponse( - data={}, - message=str(error), - status=WebsocketResponseStatus.ERROR, - ) - ) + self._send_error(str(error)) return user_message_result = create_dashboard_chat_user_message_with_status( @@ -134,104 +84,75 @@ def websocket_receive(self, message): user_message = user_message_result.message if not user_message_result.created: - assistant_message = find_dashboard_chat_assistant_reply( + existing_reply = find_dashboard_chat_assistant_reply( session=session, user_message=user_message, ) - if assistant_message is not None: - self.respond( - WebsocketResponse( - data=build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), - ), - message="", - status=WebsocketResponseStatus.SUCCESS, - ) - ) + if existing_reply is not None: + self._send_assistant_message(session, existing_reply) return - self.respond( - WebsocketResponse( - data=build_dashboard_chat_event( - event_type="progress", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(user_message.id), - data={"label": "thinking"}, - ), - message="", - status=WebsocketResponseStatus.SUCCESS, - ) - ) + self._send_progress(session, user_message) try: - result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) - except Exception: - logger.exception( - "dashboard chat turn failed inline for session=%s", - session.session_id, - ) - self.respond( - WebsocketResponse( - data={}, - message="Something went wrong while generating the response", - status=WebsocketResponseStatus.ERROR, - ) + assistant_message = execute_dashboard_chat_turn( + str(session.session_id), user_message.id ) + except Exception: + logger.exception("dashboard chat turn failed for session=%s", session.session_id) + self._send_error("Something went wrong while generating the response") return - assistant_message = result.get("assistant_message") - if ( - result["status"] in {"completed", "skipped_existing_reply"} - and assistant_message is not None - ): - self.respond( - WebsocketResponse( - data=build_dashboard_chat_event( - event_type="assistant_message", - session_id=str(session.session_id), - dashboard_id=self.dashboard.id, - message_id=str(assistant_message.id), - data=serialize_dashboard_chat_message(assistant_message), - ), - message="", - status=WebsocketResponseStatus.SUCCESS, - ) - ) - return + self._send_assistant_message(session, assistant_message) - if result["status"] == "skipped_missing_session": - self.respond( - WebsocketResponse( - data={}, - message="Chat session could not be found", - status=WebsocketResponseStatus.ERROR, - ) + # ------------------------------------------------------------------------- + # Response helpers + # ------------------------------------------------------------------------- + + def _send_progress(self, session: DashboardChatSession, user_message: DashboardChatMessage): + self.respond( + WebsocketResponse( + status=WebsocketResponseStatus.SUCCESS, + message="", + data={ + "event_type": "progress", + "session_id": str(session.session_id), + "message_id": str(user_message.id), + "dashboard_id": self.dashboard.id, + "occurred_at": timezone.now().isoformat(), + }, ) - return + ) - if result["status"] == "skipped_missing_message": - self.respond( - WebsocketResponse( - data={}, - message="Chat message could not be found", - status=WebsocketResponseStatus.ERROR, - ) + def _send_assistant_message(self, session: DashboardChatSession, message: DashboardChatMessage): + self.respond( + WebsocketResponse( + status=WebsocketResponseStatus.SUCCESS, + message="", + data={ + "event_type": "assistant_message", + "session_id": str(session.session_id), + "message_id": str(message.id), + "dashboard_id": self.dashboard.id, + "occurred_at": timezone.now().isoformat(), + **serialize_dashboard_chat_message(message), + }, ) - return + ) + def _send_error(self, message: str): self.respond( WebsocketResponse( - data={}, - message="Something went wrong while generating the response", status=WebsocketResponseStatus.ERROR, + message=message, + data={}, ) ) + # ------------------------------------------------------------------------- + # Guards + # ------------------------------------------------------------------------- + def _assert_chat_available(self) -> None: """Raise Exception if the org is not ready for dashboard chat.""" feature_enabled = get_all_feature_flags_for_org(self.orguser.org).get( From c73c912e0b659b2529127c24624e95d36e5c320a Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 17:35:46 +0530 Subject: [PATCH 33/49] updates --- ddpui/websockets/dashboard_chat_consumer.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 52ffe720b..6f2ca5f0c 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -7,7 +7,6 @@ DashboardChatSessionError, create_dashboard_chat_user_message_with_status, execute_dashboard_chat_turn, - find_dashboard_chat_assistant_reply, get_or_create_dashboard_chat_session, serialize_dashboard_chat_message, ) @@ -76,21 +75,11 @@ def websocket_receive(self, message): self._send_error(str(error)) return - user_message_result = create_dashboard_chat_user_message_with_status( + user_message = create_dashboard_chat_user_message_with_status( session=session, content=raw_message, client_message_id=payload.get("client_message_id"), - ) - user_message = user_message_result.message - - if not user_message_result.created: - existing_reply = find_dashboard_chat_assistant_reply( - session=session, - user_message=user_message, - ) - if existing_reply is not None: - self._send_assistant_message(session, existing_reply) - return + ).message self._send_progress(session, user_message) From a9cf71eef011d40b56f7d077faaa666c0c88fc13 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 19:00:08 +0530 Subject: [PATCH 34/49] using pydantic contracts and validation --- .../context/dashboard_table_allowlist.py | 22 +- .../contracts/conversation_contracts.py | 25 +- .../contracts/intent_contracts.py | 19 +- .../contracts/response_contracts.py | 45 ++- .../contracts/retrieval_contracts.py | 13 +- .../dashboard_chat/contracts/sql_contracts.py | 13 +- .../orchestration/intent_routing.py | 6 +- .../llm_tools/implementations/schema_tools.py | 18 +- .../implementations/sql_execution_tools.py | 4 +- .../implementations/sql_validation.py | 29 +- .../implementations/vector_retrieval_tool.py | 12 +- .../llm_tools/runtime/tool_loop.py | 8 +- .../llm_tools/runtime/turn_context.py | 51 ++-- .../orchestration/nodes/compose_response.py | 93 +++--- .../orchestration/nodes/finalize.py | 68 +++-- .../nodes/handle_follow_up_context.py | 23 +- .../nodes/handle_follow_up_sql.py | 23 +- .../orchestration/nodes/handle_irrelevant.py | 17 +- .../nodes/handle_needs_clarification.py | 29 +- .../nodes/handle_query_with_sql.py | 23 +- .../nodes/handle_query_without_sql.py | 23 +- .../orchestration/nodes/handle_small_talk.py | 19 +- .../orchestration/nodes/load_context.py | 11 +- .../orchestration/nodes/route_intent.py | 16 +- .../orchestration/orchestrator.py | 13 +- .../orchestration/response_composer.py | 5 +- .../orchestration/state/accessors.py | 59 ---- .../orchestration/state/payload_codec.py | 285 ------------------ .../tool_loop_message_builder.py | 6 +- .../sessions/session_service.py | 26 +- 30 files changed, 340 insertions(+), 664 deletions(-) delete mode 100644 ddpui/core/dashboard_chat/orchestration/state/accessors.py delete mode 100644 ddpui/core/dashboard_chat/orchestration/state/payload_codec.py diff --git a/ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py b/ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py index 29256f111..70891f09e 100644 --- a/ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py +++ b/ddpui/core/dashboard_chat/context/dashboard_table_allowlist.py @@ -1,10 +1,11 @@ """Dashboard table allowlist derived from dashboard exports and dbt lineage.""" import json -from dataclasses import dataclass, field from pathlib import Path from typing import Any +from pydantic import BaseModel, Field + from ddpui.core.orgdbt_manager import DbtProjectManager from ddpui.models.org import OrgDbt @@ -27,16 +28,15 @@ def build_dashboard_chat_table_name(schema_name: str | None, table_name: str | N return normalize_dashboard_chat_table_name(f"{schema_name}.{table_name}") -@dataclass -class DashboardChatAllowlist: +class DashboardChatAllowlist(BaseModel): """Allowed tables and dbt nodes for the current dashboard context.""" - chart_tables: set[str] = field(default_factory=set) - upstream_tables: set[str] = field(default_factory=set) - allowed_tables: set[str] = field(default_factory=set) - allowed_unique_ids: set[str] = field(default_factory=set) - unique_id_to_table: dict[str, str] = field(default_factory=dict) - table_to_unique_ids: dict[str, set[str]] = field(default_factory=dict) + chart_tables: set[str] = Field(default_factory=set) + upstream_tables: set[str] = Field(default_factory=set) + allowed_tables: set[str] = Field(default_factory=set) + allowed_unique_ids: set[str] = Field(default_factory=set) + unique_id_to_table: dict[str, str] = Field(default_factory=dict) + table_to_unique_ids: dict[str, set[str]] = Field(default_factory=dict) def is_allowed(self, table_name: str | None) -> bool: """Return whether the table is inside the dashboard allowlist.""" @@ -57,7 +57,9 @@ def is_unique_id_allowed(self, unique_id: str | None) -> bool: def prioritized_tables(self, limit: int | None = None) -> list[str]: """Return chart tables first, then lineage tables.""" - ordered_tables = sorted(self.chart_tables) + sorted(self.upstream_tables - self.chart_tables) + ordered_tables = sorted(self.chart_tables) + sorted( + self.upstream_tables - self.chart_tables + ) deduped_tables = list(dict.fromkeys(ordered_tables)) if limit is None: return deduped_tables diff --git a/ddpui/core/dashboard_chat/contracts/conversation_contracts.py b/ddpui/core/dashboard_chat/contracts/conversation_contracts.py index 4af710032..aef85dfdd 100644 --- a/ddpui/core/dashboard_chat/contracts/conversation_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/conversation_contracts.py @@ -1,28 +1,31 @@ """Conversation-related dashboard chat contracts.""" -from dataclasses import dataclass, field from typing import Any +from pydantic import BaseModel, ConfigDict, Field -@dataclass(frozen=True) -class DashboardChatConversationMessage: + +class DashboardChatConversationMessage(BaseModel): """Single prior conversation message.""" + model_config = ConfigDict(frozen=True) + role: str content: str - payload: dict[str, Any] = field(default_factory=dict) + payload: dict[str, Any] = Field(default_factory=dict) -@dataclass(frozen=True) -class DashboardChatConversationContext: +class DashboardChatConversationContext(BaseModel): """Reusable context extracted from prior assistant turns.""" + model_config = ConfigDict(frozen=True) + last_sql_query: str | None = None - last_tables_used: list[str] = field(default_factory=list) - last_chart_ids: list[str] = field(default_factory=list) - last_metrics: list[str] = field(default_factory=list) - last_dimensions: list[str] = field(default_factory=list) - last_filters: list[str] = field(default_factory=list) + last_tables_used: list[str] = Field(default_factory=list) + last_chart_ids: list[str] = Field(default_factory=list) + last_metrics: list[str] = Field(default_factory=list) + last_dimensions: list[str] = Field(default_factory=list) + last_filters: list[str] = Field(default_factory=list) last_response_type: str | None = None last_answer_text: str | None = None last_intent: str | None = None diff --git a/ddpui/core/dashboard_chat/contracts/intent_contracts.py b/ddpui/core/dashboard_chat/contracts/intent_contracts.py index cd93e2fd6..a86242b45 100644 --- a/ddpui/core/dashboard_chat/contracts/intent_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/intent_contracts.py @@ -1,9 +1,10 @@ """Intent-routing dashboard chat contracts.""" -from dataclasses import dataclass, field from enum import Enum from typing import Any +from pydantic import BaseModel, ConfigDict, Field + class DashboardChatIntent(str, Enum): """Prototype-aligned top-level intents for dashboard chat.""" @@ -17,26 +18,28 @@ class DashboardChatIntent(str, Enum): IRRELEVANT = "irrelevant" -@dataclass(frozen=True) -class DashboardChatFollowUpContext: +class DashboardChatFollowUpContext(BaseModel): """Prototype-style follow-up metadata returned by the router.""" + model_config = ConfigDict(frozen=True) + is_follow_up: bool follow_up_type: str | None = None - reusable_elements: dict[str, Any] = field(default_factory=dict) + reusable_elements: dict[str, Any] = Field(default_factory=dict) modification_instruction: str | None = None -@dataclass(frozen=True) -class DashboardChatIntentDecision: +class DashboardChatIntentDecision(BaseModel): """Intent-routing outcome.""" + model_config = ConfigDict(frozen=True) + intent: DashboardChatIntent confidence: float reason: str - missing_info: list[str] = field(default_factory=list) + missing_info: list[str] = Field(default_factory=list) force_tool_usage: bool = False clarification_question: str | None = None - follow_up_context: DashboardChatFollowUpContext = field( + follow_up_context: DashboardChatFollowUpContext = Field( default_factory=lambda: DashboardChatFollowUpContext(is_follow_up=False) ) diff --git a/ddpui/core/dashboard_chat/contracts/response_contracts.py b/ddpui/core/dashboard_chat/contracts/response_contracts.py index 2bfb28de4..60df8ddaa 100644 --- a/ddpui/core/dashboard_chat/contracts/response_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/response_contracts.py @@ -1,18 +1,19 @@ """Response-related dashboard chat contracts.""" -from dataclasses import asdict, dataclass, field import json from typing import Any from django.core.serializers.json import DjangoJSONEncoder +from pydantic import BaseModel, ConfigDict, Field from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent -@dataclass(frozen=True) -class DashboardChatCitation: +class DashboardChatCitation(BaseModel): """Citation attached to a chat response.""" + model_config = ConfigDict(frozen=True) + source_type: str source_identifier: str title: str @@ -20,36 +21,26 @@ class DashboardChatCitation: dashboard_id: int | None = None table_name: str | None = None - def to_dict(self) -> dict[str, Any]: - """Return a serializable citation payload.""" - return asdict(self) - -@dataclass(frozen=True) -class DashboardChatResponse: +class DashboardChatResponse(BaseModel): """Final runtime response returned by the LangGraph runner.""" + model_config = ConfigDict(frozen=True) + answer_text: str intent: DashboardChatIntent - citations: list[DashboardChatCitation] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) + citations: list[DashboardChatCitation] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) sql: str | None = None sql_results: list[dict[str, Any]] | None = None - usage: dict[str, Any] = field(default_factory=dict) - tool_calls: list[dict[str, Any]] = field(default_factory=list) - metadata: dict[str, Any] = field(default_factory=dict) + usage: dict[str, Any] = Field(default_factory=dict) + tool_calls: list[dict[str, Any]] = Field(default_factory=list) + metadata: dict[str, Any] = Field(default_factory=dict) def to_dict(self) -> dict[str, Any]: - """Return a serializable payload.""" - payload = { - "answer_text": self.answer_text, - "intent": self.intent.value, - "citations": [citation.to_dict() for citation in self.citations], - "warnings": self.warnings, - "sql": self.sql, - "sql_results": self.sql_results, - "usage": self.usage, - "tool_calls": self.tool_calls, - "metadata": self.metadata, - } - return json.loads(json.dumps(payload, cls=DjangoJSONEncoder)) + """Return a serializable payload safe for JSON storage and websocket delivery. + + Uses DjangoJSONEncoder to handle warehouse-specific types (Decimal, datetime, etc.) + that may appear in sql_results. + """ + return json.loads(json.dumps(self.model_dump(mode="json"), cls=DjangoJSONEncoder)) diff --git a/ddpui/core/dashboard_chat/contracts/retrieval_contracts.py b/ddpui/core/dashboard_chat/contracts/retrieval_contracts.py index ea9a07615..45ab1ba0d 100644 --- a/ddpui/core/dashboard_chat/contracts/retrieval_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/retrieval_contracts.py @@ -1,13 +1,15 @@ """Retrieval-related dashboard chat contracts.""" -from dataclasses import dataclass from typing import Any +from pydantic import BaseModel, ConfigDict -@dataclass(frozen=True) -class DashboardChatRetrievedDocument: + +class DashboardChatRetrievedDocument(BaseModel): """Retrieved document returned from the vector store.""" + model_config = ConfigDict(frozen=True) + document_id: str source_type: str source_identifier: str @@ -16,9 +18,10 @@ class DashboardChatRetrievedDocument: distance: float | None = None -@dataclass(frozen=True) -class DashboardChatSchemaSnippet: +class DashboardChatSchemaSnippet(BaseModel): """Schema description for a warehouse table.""" + model_config = ConfigDict(frozen=True) + table_name: str columns: list[dict[str, Any]] diff --git a/ddpui/core/dashboard_chat/contracts/sql_contracts.py b/ddpui/core/dashboard_chat/contracts/sql_contracts.py index 720c3e195..b8e83d26d 100644 --- a/ddpui/core/dashboard_chat/contracts/sql_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/sql_contracts.py @@ -1,14 +1,15 @@ """SQL-validation dashboard chat contracts.""" -from dataclasses import dataclass, field +from pydantic import BaseModel, ConfigDict, Field -@dataclass(frozen=True) -class DashboardChatSqlValidationResult: +class DashboardChatSqlValidationResult(BaseModel): """Outcome of SQL guard validation.""" + model_config = ConfigDict(frozen=True) + is_valid: bool sanitized_sql: str | None - tables: list[str] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) - errors: list[str] = field(default_factory=list) + tables: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) diff --git a/ddpui/core/dashboard_chat/orchestration/intent_routing.py b/ddpui/core/dashboard_chat/orchestration/intent_routing.py index 01d87d497..acec8d20d 100644 --- a/ddpui/core/dashboard_chat/orchestration/intent_routing.py +++ b/ddpui/core/dashboard_chat/orchestration/intent_routing.py @@ -1,9 +1,11 @@ """Graph intent-routing helpers for dashboard chat orchestration.""" +from ddpui.core.dashboard_chat.contracts import DashboardChatIntentDecision from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision def route_after_intent(state: DashboardChatGraphState) -> str: """Return the next node name for the current classified intent.""" - return get_intent_decision(state).intent.value + return DashboardChatIntentDecision.model_validate( + state.get("intent_decision") or {} + ).intent.value diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py index 8f0b4d9ee..fb0437f04 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/schema_tools.py @@ -5,9 +5,11 @@ from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from ddpui.utils.custom_logger import CustomLogger +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_allowlist -from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import find_tables_with_column +from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( + find_tables_with_column, +) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( DashboardChatTurnContext, get_or_load_schema_snippets, @@ -25,7 +27,7 @@ def handle_get_schema_snippets_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Return schema snippets for allowlisted tables only.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) requested_tables = [str(table_name).lower() for table_name in args.get("tables") or []] allowed_tables = [ table_name for table_name in requested_tables if allowlist.is_allowed(table_name) @@ -58,7 +60,7 @@ def handle_get_distinct_values_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Return distinct values and persist validated filter values for the session.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) table_name = str(args.get("table") or "").lower() column_name = str(args.get("column") or "") limit = max(1, min(int(args.get("limit", 50)), 200)) @@ -69,7 +71,9 @@ def handle_get_distinct_values_tool( "message": (f"Table {table_name} is not accessible in the current dashboard context."), } - schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, state, turn_context + ) snippet = schema_snippets_by_table.get(table_name) normalized_column_name = column_name.lower() if snippet is not None and normalized_column_name not in { @@ -117,7 +121,7 @@ def handle_list_tables_by_keyword_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Search allowlisted tables by table name or column name.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) keyword = str(args.get("keyword") or "").strip().lower() limit = max(1, min(int(args.get("limit", 15)), 50)) if not keyword: @@ -192,7 +196,7 @@ def handle_check_table_row_count_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Count rows in one allowlisted table.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) table_name = str(args.get("table") or "").lower() if not allowlist.is_allowed(table_name): return { diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py index 380d06f36..83c5054d1 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py @@ -7,8 +7,8 @@ from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_allowlist from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_corrections import ( missing_columns_in_primary_table, structured_sql_execution_error, @@ -33,7 +33,7 @@ def handle_run_sql_query_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Validate SQL and let the tool loop self-correct on structured failures.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) sql = str(args.get("sql") or "").strip() if not sql: return {"error": "sql_missing", "message": "SQL is required"} diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py index 0c3727119..ba5912e2a 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py @@ -9,11 +9,11 @@ from ddpui.core.dashboard_chat.orchestration.conversation_context import ( extract_requested_follow_up_dimension, ) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import ( - get_conversation_context, - get_intent_decision, +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatConversationContext, + DashboardChatIntentDecision, ) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( extract_text_filter_values, find_tables_with_column, @@ -63,7 +63,7 @@ def validate_follow_up_dimension_usage( turn_context: DashboardChatTurnContext, ) -> dict[str, Any] | None: """Keep add-dimension follow-ups from succeeding without changing query granularity.""" - intent_decision = get_intent_decision(state) + intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) if intent_decision.intent != DashboardChatIntent.FOLLOW_UP_SQL: return None if intent_decision.follow_up_context.follow_up_type != "add_dimension": @@ -75,7 +75,12 @@ def validate_follow_up_dimension_usage( if not requested_dimension: return None - previous_sql = get_conversation_context(state).last_sql_query or "" + previous_sql = ( + DashboardChatConversationContext.model_validate( + state.get("conversation_context") or {} + ).last_sql_query + or "" + ) current_dimensions = structural_dimensions_from_sql(sql) previous_dimensions = structural_dimensions_from_sql(previous_sql) normalized_requested_dimension = normalize_dimension_name(requested_dimension) @@ -131,7 +136,9 @@ def find_missing_distinct_filters( turn_context, tables=query_tables, ) - all_schema_snippets_by_table = get_or_load_schema_snippets(warehouse_tools_factory, state, turn_context) + all_schema_snippets_by_table = get_or_load_schema_snippets( + warehouse_tools_factory, state, turn_context + ) column_types = { table_name: { @@ -152,11 +159,15 @@ def find_missing_distinct_filters( schema_snippets_by_table=full_schema_snippets_by_table, ) if resolved_table is None and qualifier is None: - matching_tables = tables_with_column(normalized_column, query_tables, full_schema_snippets_by_table) + matching_tables = tables_with_column( + normalized_column, query_tables, full_schema_snippets_by_table + ) if len(matching_tables) > 1: continue if resolved_table is None: - candidate_tables = find_tables_with_column(normalized_column, all_schema_snippets_by_table) + candidate_tables = find_tables_with_column( + normalized_column, all_schema_snippets_by_table + ) if qualifier is None and candidate_tables: continue missing.append( diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py index 7c41f0321..fb11e8abd 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/vector_retrieval_tool.py @@ -11,12 +11,12 @@ get_or_embed_query, retrieve_vector_documents, ) +from ddpui.models.org import Org +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import ( - get_runtime_allowlist, - get_runtime_org, +from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import ( + DashboardChatTurnContext, ) -from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.turn_context import DashboardChatTurnContext def handle_retrieve_docs_tool( @@ -28,8 +28,8 @@ def handle_retrieve_docs_tool( turn_context: DashboardChatTurnContext, ) -> dict[str, Any]: """Retrieve current-dashboard, org, and dbt context using the tool contract.""" - org = get_runtime_org(state) - allowlist = get_runtime_allowlist(state) + org = Org.objects.select_related("dbt").get(id=int(state["org_id"])) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) dashboard_export = state.get("dashboard_export_payload") or {} query = str(args.get("query") or state["user_query"]).strip() limit = max(1, min(int(args.get("limit", 8)), 20)) diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py index dcdbe7a65..8599b039a 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py @@ -6,7 +6,9 @@ from django.core.serializers.json import DjangoJSONEncoder -from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseToolsError +from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import ( + DashboardChatWarehouseToolsError, +) from ddpui.utils.custom_logger import CustomLogger from ddpui.core.dashboard_chat.orchestration.response_composer import ( @@ -15,8 +17,8 @@ max_turns_message, fallback_answer_text, ) +from ddpui.core.dashboard_chat.contracts import DashboardChatIntentDecision from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.dbt_tools import ( handle_get_dbt_model_info_tool, handle_search_dbt_models_tool, @@ -63,7 +65,7 @@ def execute_tool_loop( ) tool_loop_started_at = perf_counter() seed_validated_distinct_values_from_previous_sql(state, turn_context) - intent_decision = get_intent_decision(state) + intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) for turn_index in range(max_turns): tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py index ac326776d..d6fbb2dc0 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py @@ -10,19 +10,12 @@ from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools from ddpui.utils.custom_logger import CustomLogger +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatSchemaSnippet from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query +from ddpui.core.dashboard_chat.contracts import DashboardChatConversationContext +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.models.org import Org from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - deserialize_distinct_payloads, - deserialize_schema_snippets, - serialize_distinct_payloads, - serialize_schema_snippets, -) -from ddpui.core.dashboard_chat.orchestration.state.accessors import ( - get_conversation_context, - get_runtime_allowlist, - get_runtime_org, -) from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( extract_text_filter_values, resolve_identifier_table, @@ -79,29 +72,45 @@ def get_turn_warehouse_tools( """Build the warehouse tool helper lazily for the turn.""" warehouse_tools = turn_context.warehouse_tools if warehouse_tools is None: - warehouse_tools = warehouse_tools_factory(get_runtime_org(state)) + warehouse_tools = warehouse_tools_factory( + Org.objects.select_related("dbt").get(id=int(state["org_id"])) + ) turn_context.warehouse_tools = warehouse_tools return warehouse_tools def hydrate_schema_snippets_by_table(state: DashboardChatGraphState) -> dict[str, Any]: """Rebuild schema snippets from checkpoint payloads for one turn.""" - return deserialize_schema_snippets(state.get("schema_snippet_payloads")) + return { + k: DashboardChatSchemaSnippet.model_validate(v) + for k, v in (state.get("schema_snippet_payloads") or {}).items() + } def hydrate_validated_distinct_values(state: DashboardChatGraphState) -> set[tuple[str, str, str]]: """Rebuild validated distinct values from checkpoint payloads for one turn.""" - return deserialize_distinct_payloads(state.get("validated_distinct_payloads")) + validated: set[tuple[str, str, str]] = set() + for table_name, column_map in (state.get("validated_distinct_payloads") or {}).items(): + for column_name, values in (column_map or {}).items(): + for value in values or []: + validated.add((str(table_name).lower(), str(column_name).lower(), str(value))) + return validated def current_schema_snippet_payloads(turn_context: DashboardChatTurnContext) -> dict[str, Any]: """Serialize the current turn's schema snippets back into checkpoint-safe payloads.""" - return serialize_schema_snippets(turn_context.schema_snippets_by_table) + return {k: v.model_dump(mode="json") for k, v in turn_context.schema_snippets_by_table.items()} def current_validated_distinct_payloads(turn_context: DashboardChatTurnContext) -> dict[str, Any]: """Serialize the current turn's validated distinct values back into checkpoint-safe payloads.""" - return serialize_distinct_payloads(turn_context.validated_distinct_values) + serialized: dict[str, dict[str, list[str]]] = {} + for table_name, column_name, value in turn_context.validated_distinct_values: + serialized.setdefault(table_name, {}).setdefault(column_name, []).append(value) + return { + table_name: {column_name: sorted(set(values)) for column_name, values in column_map.items()} + for table_name, column_map in serialized.items() + } def get_or_load_schema_snippets( @@ -111,7 +120,7 @@ def get_or_load_schema_snippets( tables: Sequence[str] | None = None, ) -> dict[str, Any]: """Load and keep schema snippets in the current turn state.""" - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) requested_tables = [ table_name.lower() for table_name in (tables if tables is not None else allowlist.prioritized_tables()) @@ -210,7 +219,9 @@ def record_validated_filters_from_sql( ) if resolved_table is None and qualifier is None: if schema_snippets_by_table: - matching = tables_with_column(normalized_column, query_tables, schema_snippets_by_table) + matching = tables_with_column( + normalized_column, query_tables, schema_snippets_by_table + ) if len(matching) == 1: resolved_table = matching[0] elif len(query_tables) == 1: @@ -231,7 +242,9 @@ def seed_validated_distinct_values_from_previous_sql( turn_context: DashboardChatTurnContext, ) -> None: """Treat text filters from the previous successful SQL as already validated for follow-ups.""" - previous_sql = get_conversation_context(state).last_sql_query + previous_sql = DashboardChatConversationContext.model_validate( + state.get("conversation_context") or {} + ).last_sql_query if not previous_sql: return record_validated_filters_from_sql( diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py index 116705daf..9eed5996a 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py @@ -2,7 +2,12 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatResponse +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatIntentDecision, + DashboardChatResponse, + DashboardChatRetrievedDocument, +) from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, @@ -12,16 +17,6 @@ ) from ddpui.core.dashboard_chat.orchestration.retrieval_support import build_citations from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import ( - get_intent_decision, - get_retrieved_documents, - get_runtime_allowlist, - get_runtime_response, -) -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_citations, - serialize_response, -) def compose_response_node( @@ -31,25 +26,26 @@ def compose_response_node( ) -> dict[str, Any]: """Compose the final dashboard-chat response from state accumulated by prior nodes.""" if state.get("response") is not None: - response = get_runtime_response(state) + response = DashboardChatResponse.model_validate(state.get("response") or {}) return { - "response": serialize_response( - DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=response.citations, - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata=response.metadata, - ) - ) + "response": DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=response.citations, + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response.metadata, + ).to_dict() } - allowlist = get_runtime_allowlist(state) - retrieved_documents = get_retrieved_documents(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) + retrieved_documents = [ + DashboardChatRetrievedDocument.model_validate(p) + for p in (state.get("retrieved_documents") or []) + ] citations = build_citations( retrieved_documents=retrieved_documents, dashboard_export=state.get("dashboard_export_payload") or {}, @@ -67,27 +63,26 @@ def compose_response_node( "warnings": list(state.get("warnings") or []), "tool_calls": list(state.get("tool_calls") or []), } + intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) return { - "citations": serialize_citations(citations), - "response": serialize_response( - DashboardChatResponse( - answer_text=compose_final_answer_text( - llm_client, - state, - execution_result, - response_format=response_format, - ), - intent=get_intent_decision(state).intent, - citations=citations, - warnings=list(state.get("warnings") or []), - sql=state.get("sql"), - sql_results=state.get("sql_results"), - usage=build_usage_summary(llm_client, vector_store), - tool_calls=list(state.get("tool_calls") or []), - metadata={ - "response_format": response_format, - "table_columns": sql_result_columns(state.get("sql_results")), - }, - ) - ), + "citations": [c.model_dump(mode="json") for c in citations], + "response": DashboardChatResponse( + answer_text=compose_final_answer_text( + llm_client, + state, + execution_result, + response_format=response_format, + ), + intent=intent_decision.intent, + citations=citations, + warnings=list(state.get("warnings") or []), + sql=state.get("sql"), + sql_results=state.get("sql_results"), + usage=build_usage_summary(llm_client, vector_store), + tool_calls=list(state.get("tool_calls") or []), + metadata={ + "response_format": response_format, + "table_columns": sql_result_columns(state.get("sql_results")), + }, + ).to_dict(), } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index 52fea65d5..5f810da26 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -2,24 +2,28 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatCitation, DashboardChatResponse - -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import ( - get_intent_decision, - get_retrieved_documents, - get_runtime_allowlist, - get_runtime_response, - get_sql_validation_result, +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatCitation, + DashboardChatIntentDecision, + DashboardChatResponse, + DashboardChatRetrievedDocument, + DashboardChatSqlValidationResult, ) +from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: """Attach warehouse citations and metadata to the finished response.""" - response = get_runtime_response(state) + response = DashboardChatResponse.model_validate(state.get("response") or {}) citations = list(response.citations) - sql_validation = get_sql_validation_result(state) + + sql_validation_payload = state.get("sql_validation") + sql_validation = ( + DashboardChatSqlValidationResult.model_validate(sql_validation_payload) + if sql_validation_payload is not None + else None + ) if ( sql_validation is not None and sql_validation.is_valid @@ -37,31 +41,35 @@ def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: if table_name ) - allowlist = get_runtime_allowlist(state) + allowlist = DashboardChatAllowlist.model_validate(state.get("allowlist_payload") or {}) + intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) + retrieved_documents = [ + DashboardChatRetrievedDocument.model_validate(p) + for p in (state.get("retrieved_documents") or []) + ] + response_metadata = dict(response.metadata) response_metadata.update( { "dashboard_id": state["dashboard_id"], - "retrieved_document_ids": [document.document_id for document in get_retrieved_documents(state)], + "retrieved_document_ids": [doc.document_id for doc in retrieved_documents], "allowlisted_tables": sorted(allowlist.allowed_tables), "sql_guard_errors": sql_validation.errors if sql_validation is not None else [], - "intent_reason": get_intent_decision(state).reason, - "missing_info": get_intent_decision(state).missing_info, - "follow_up_type": get_intent_decision(state).follow_up_context.follow_up_type, + "intent_reason": intent_decision.reason, + "missing_info": intent_decision.missing_info, + "follow_up_type": intent_decision.follow_up_context.follow_up_type, } ) return { - "response": serialize_response( - DashboardChatResponse( - answer_text=response.answer_text, - intent=response.intent, - citations=list(dict.fromkeys(citations)), - warnings=response.warnings, - sql=response.sql, - sql_results=response.sql_results, - usage=response.usage, - tool_calls=response.tool_calls, - metadata=response_metadata, - ) - ) + "response": DashboardChatResponse( + answer_text=response.answer_text, + intent=response.intent, + citations=list(dict.fromkeys(citations)), + warnings=response.warnings, + sql=response.sql, + sql_results=response.sql_results, + usage=response.usage, + tool_calls=response.tool_calls, + metadata=response_metadata, + ).to_dict() } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py index ac29e3687..765e8887a 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_context.py @@ -2,13 +2,11 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_follow_up_messages +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import ( + build_follow_up_messages, +) from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_retrieved_documents, - serialize_sql_validation_result, -) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing @@ -23,9 +21,7 @@ def handle_follow_up_context_node( tool_specifications, ) -> dict[str, Any]: """Handle follow-ups that continue explanation without requiring new SQL.""" - query_embedding = get_or_embed_query( - vector_store, state["user_query"], query_embeddings={} - ) + query_embedding = get_or_embed_query(vector_store, state["user_query"], query_embeddings={}) messages = build_follow_up_messages(llm_client, state) execution_result = execute_tool_loop( @@ -41,14 +37,17 @@ def handle_follow_up_context_node( initial_query_embeddings={state["user_query"]: query_embedding}, ) + sql_validation = execution_result["sql_validation"] return { - "retrieved_documents": serialize_retrieved_documents( - execution_result["retrieved_documents"] - ), + "retrieved_documents": [ + d.model_dump(mode="json") for d in execution_result["retrieved_documents"] + ], "tool_calls": execution_result["tool_calls"], "draft_answer_text": execution_result["answer_text"], "sql": execution_result["sql"], - "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_validation": sql_validation.model_dump(mode="json") + if sql_validation is not None + else None, "sql_results": execution_result["sql_results"], "warnings": execution_result["warnings"], "timing_breakdown": merge_tool_loop_timing(state, execution_result), diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py index 52919f0fb..8d2eb8fe2 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_follow_up_sql.py @@ -2,13 +2,11 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_follow_up_messages +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import ( + build_follow_up_messages, +) from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_retrieved_documents, - serialize_sql_validation_result, -) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing @@ -23,9 +21,7 @@ def handle_follow_up_sql_node( tool_specifications, ) -> dict[str, Any]: """Handle follow-ups that are expected to modify or rerun SQL.""" - query_embedding = get_or_embed_query( - vector_store, state["user_query"], query_embeddings={} - ) + query_embedding = get_or_embed_query(vector_store, state["user_query"], query_embeddings={}) messages = build_follow_up_messages(llm_client, state) execution_result = execute_tool_loop( @@ -41,14 +37,17 @@ def handle_follow_up_sql_node( initial_query_embeddings={state["user_query"]: query_embedding}, ) + sql_validation = execution_result["sql_validation"] return { - "retrieved_documents": serialize_retrieved_documents( - execution_result["retrieved_documents"] - ), + "retrieved_documents": [ + d.model_dump(mode="json") for d in execution_result["retrieved_documents"] + ], "tool_calls": execution_result["tool_calls"], "draft_answer_text": execution_result["answer_text"], "sql": execution_result["sql"], - "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_validation": sql_validation.model_dump(mode="json") + if sql_validation is not None + else None, "sql_results": execution_result["sql_results"], "warnings": execution_result["warnings"], "timing_breakdown": merge_tool_loop_timing(state, execution_result), diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py index 0c79281f2..d154d0e25 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py @@ -5,7 +5,6 @@ from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.core.dashboard_chat.orchestration.response_composer import build_usage_summary -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState @@ -14,13 +13,11 @@ def handle_irrelevant_node( ) -> dict[str, Any]: """Handle questions outside dashboard chat scope.""" return { - "response": serialize_response( - DashboardChatResponse( - answer_text=( - "I can only answer questions about this dashboard, its charts, and the data behind them." - ), - intent=DashboardChatIntent.IRRELEVANT, - usage=build_usage_summary(llm_client, vector_store), - ) - ) + "response": DashboardChatResponse( + answer_text=( + "I can only answer questions about this dashboard, its charts, and the data behind them." + ), + intent=DashboardChatIntent.IRRELEVANT, + usage=build_usage_summary(llm_client, vector_store), + ).to_dict() } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py index 67636ec06..7cf642e4c 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py @@ -2,31 +2,30 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse - +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatIntent, + DashboardChatIntentDecision, + DashboardChatResponse, +) from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, clarification_fallback, ) from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision def handle_needs_clarification_node( state: DashboardChatGraphState, llm_client, vector_store ) -> dict[str, Any]: """Ask for clarification when the router says the query is underspecified.""" - intent_decision = get_intent_decision(state) + intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) return { - "response": serialize_response( - DashboardChatResponse( - answer_text=( - intent_decision.clarification_question - or clarification_fallback(intent_decision.missing_info) - ), - intent=DashboardChatIntent.NEEDS_CLARIFICATION, - usage=build_usage_summary(llm_client, vector_store), - ) - ) + "response": DashboardChatResponse( + answer_text=( + intent_decision.clarification_question + or clarification_fallback(intent_decision.missing_info) + ), + intent=DashboardChatIntent.NEEDS_CLARIFICATION, + usage=build_usage_summary(llm_client, vector_store), + ).to_dict() } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py index 91950cf65..2efad00a5 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_with_sql.py @@ -2,13 +2,11 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_new_query_messages +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import ( + build_new_query_messages, +) from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_retrieved_documents, - serialize_sql_validation_result, -) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing @@ -23,9 +21,7 @@ def handle_query_with_sql_node( tool_specifications, ) -> dict[str, Any]: """Handle new questions that are expected to produce SQL-backed answers.""" - query_embedding = get_or_embed_query( - vector_store, state["user_query"], query_embeddings={} - ) + query_embedding = get_or_embed_query(vector_store, state["user_query"], query_embeddings={}) messages = build_new_query_messages(llm_client, state) execution_result = execute_tool_loop( @@ -41,14 +37,17 @@ def handle_query_with_sql_node( initial_query_embeddings={state["user_query"]: query_embedding}, ) + sql_validation = execution_result["sql_validation"] return { - "retrieved_documents": serialize_retrieved_documents( - execution_result["retrieved_documents"] - ), + "retrieved_documents": [ + d.model_dump(mode="json") for d in execution_result["retrieved_documents"] + ], "tool_calls": execution_result["tool_calls"], "draft_answer_text": execution_result["answer_text"], "sql": execution_result["sql"], - "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_validation": sql_validation.model_dump(mode="json") + if sql_validation is not None + else None, "sql_results": execution_result["sql_results"], "warnings": execution_result["warnings"], "timing_breakdown": merge_tool_loop_timing(state, execution_result), diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py index 1c4bc7056..1782bcd08 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_query_without_sql.py @@ -2,13 +2,11 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import build_new_query_messages +from ddpui.core.dashboard_chat.orchestration.tool_loop_message_builder import ( + build_new_query_messages, +) from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_retrieved_documents, - serialize_sql_validation_result, -) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_loop import execute_tool_loop from ddpui.core.dashboard_chat.orchestration.timing_breakdown import merge_tool_loop_timing @@ -23,9 +21,7 @@ def handle_query_without_sql_node( tool_specifications, ) -> dict[str, Any]: """Handle new questions that continue with retrieved context but no SQL requirement.""" - query_embedding = get_or_embed_query( - vector_store, state["user_query"], query_embeddings={} - ) + query_embedding = get_or_embed_query(vector_store, state["user_query"], query_embeddings={}) messages = build_new_query_messages(llm_client, state) execution_result = execute_tool_loop( @@ -41,14 +37,17 @@ def handle_query_without_sql_node( initial_query_embeddings={state["user_query"]: query_embedding}, ) + sql_validation = execution_result["sql_validation"] return { - "retrieved_documents": serialize_retrieved_documents( - execution_result["retrieved_documents"] - ), + "retrieved_documents": [ + d.model_dump(mode="json") for d in execution_result["retrieved_documents"] + ], "tool_calls": execution_result["tool_calls"], "draft_answer_text": execution_result["answer_text"], "sql": execution_result["sql"], - "sql_validation": serialize_sql_validation_result(execution_result["sql_validation"]), + "sql_validation": sql_validation.model_dump(mode="json") + if sql_validation is not None + else None, "sql_results": execution_result["sql_results"], "warnings": execution_result["warnings"], "timing_breakdown": merge_tool_loop_timing(state, execution_result), diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py index 2ce1ea3a8..37c814425 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py @@ -8,7 +8,6 @@ build_usage_summary, compose_small_talk_response, ) -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_response from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState @@ -17,14 +16,12 @@ def handle_small_talk_node( ) -> dict[str, Any]: """Handle simple social turns without any tool use.""" return { - "response": serialize_response( - DashboardChatResponse( - answer_text=( - state.get("small_talk_response") - or compose_small_talk_response(llm_client, state["user_query"]) - ), - intent=DashboardChatIntent.SMALL_TALK, - usage=build_usage_summary(llm_client, vector_store), - ) - ) + "response": DashboardChatResponse( + answer_text=( + state.get("small_talk_response") + or compose_small_talk_response(llm_client, state["user_query"]) + ), + intent=DashboardChatIntent.SMALL_TALK, + usage=build_usage_summary(llm_client, vector_store), + ).to_dict() } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py index 2d372e973..731423a50 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/load_context.py @@ -2,12 +2,13 @@ from typing import Any -from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlistBuilder +from ddpui.models.org import Org +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import ( + DashboardChatAllowlistBuilder, +) from ddpui.services.dashboard_service import DashboardService from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import serialize_allowlist -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_org def load_context_node(state: DashboardChatGraphState) -> dict[str, Any]: @@ -22,7 +23,7 @@ def load_context_node(state: DashboardChatGraphState) -> dict[str, Any]: "validated_distinct_payloads": dict(state.get("validated_distinct_payloads") or {}), } - org = get_runtime_org(state) + org = Org.objects.select_related("dbt").get(id=int(state["org_id"])) dashboard_export = DashboardService.export_dashboard_context( state["dashboard_id"], org, @@ -35,7 +36,7 @@ def load_context_node(state: DashboardChatGraphState) -> dict[str, Any]: return { "dashboard_export_payload": dashboard_export, "dbt_index": DashboardChatAllowlistBuilder.build_dbt_index(manifest_json, allowlist), - "allowlist_payload": serialize_allowlist(allowlist), + "allowlist_payload": allowlist.model_dump(mode="json"), "schema_snippet_payloads": dict(state.get("schema_snippet_payloads") or {}), "validated_distinct_payloads": dict(state.get("validated_distinct_payloads") or {}), } diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py index c191923e6..1e187dcd2 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/route_intent.py @@ -2,15 +2,13 @@ from typing import Any -from ddpui.core.dashboard_chat.orchestration.conversation_context import extract_conversation_context +from ddpui.core.dashboard_chat.orchestration.conversation_context import ( + extract_conversation_context, +) from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_fast_path_intent, build_fast_path_small_talk_response, ) -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - serialize_conversation_context, - serialize_intent_decision, -) from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState @@ -20,8 +18,8 @@ def route_intent_node(state: DashboardChatGraphState, llm_client) -> dict[str, A fast_path_intent = build_fast_path_intent(state["user_query"]) if fast_path_intent is not None: return { - "conversation_context": serialize_conversation_context(conversation_context), - "intent_decision": serialize_intent_decision(fast_path_intent), + "conversation_context": conversation_context.model_dump(mode="json"), + "intent_decision": fast_path_intent.model_dump(mode="json"), "small_talk_response": build_fast_path_small_talk_response(state["user_query"]), } intent_decision = llm_client.classify_intent( @@ -29,6 +27,6 @@ def route_intent_node(state: DashboardChatGraphState, llm_client) -> dict[str, A conversation_context=conversation_context, ) return { - "conversation_context": serialize_conversation_context(conversation_context), - "intent_decision": serialize_intent_decision(intent_decision), + "conversation_context": conversation_context.model_dump(mode="json"), + "intent_decision": intent_decision.model_dump(mode="json"), } diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index aea961a06..c4db119c6 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -15,7 +15,9 @@ from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools from ddpui.models.org import Org -from ddpui.core.dashboard_chat.orchestration.conversation_context import normalize_conversation_history +from ddpui.core.dashboard_chat.orchestration.conversation_context import ( + normalize_conversation_history, +) from ddpui.core.dashboard_chat.orchestration.checkpoints import get_dashboard_chat_checkpointer from ddpui.core.dashboard_chat.orchestration.nodes.compose_response import compose_response_node from ddpui.core.dashboard_chat.orchestration.nodes.finalize import finalize_node @@ -42,7 +44,7 @@ from ddpui.core.dashboard_chat.orchestration.state import ( DashboardChatGraphState, ) -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_runtime_response + from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_specifications import ( DASHBOARD_CHAT_TOOL_SPECIFICATIONS, ) @@ -346,7 +348,7 @@ def run( "Resume the session from its persisted checkpoint." ) runtime_total_ms = round((perf_counter() - runtime_started_at) * 1000, 2) - response = get_runtime_response(final_state) + response = DashboardChatResponse.model_validate(final_state.get("response") or {}) timing_breakdown = dict(final_state.get("timing_breakdown") or {}) timing_breakdown["runtime_total_ms"] = runtime_total_ms response_metadata = dict(response.metadata) @@ -383,12 +385,13 @@ def resume( if not state_snapshot.next: if persisted_state.get("response") is None: raise ValueError(f"Session {session_id} has no resumable or completed response") - return get_runtime_response(persisted_state) + response = DashboardChatResponse.model_validate(persisted_state.get("response") or {}) + return response runtime_started_at = perf_counter() final_state = graph.invoke(None, config=config) runtime_total_ms = round((perf_counter() - runtime_started_at) * 1000, 2) - response = get_runtime_response(final_state) + response = DashboardChatResponse.model_validate(final_state.get("response") or {}) timing_breakdown = dict(final_state.get("timing_breakdown") or {}) timing_breakdown["runtime_total_ms"] = runtime_total_ms response_metadata = dict(response.metadata) diff --git a/ddpui/core/dashboard_chat/orchestration/response_composer.py b/ddpui/core/dashboard_chat/orchestration/response_composer.py index b9aaa0eb5..aaa6b5dca 100644 --- a/ddpui/core/dashboard_chat/orchestration/response_composer.py +++ b/ddpui/core/dashboard_chat/orchestration/response_composer.py @@ -12,7 +12,6 @@ from ddpui.utils.custom_logger import CustomLogger from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_intent_decision logger = CustomLogger("dashboard_chat") @@ -109,7 +108,9 @@ def compose_final_answer_text( try: answer_text = llm_client.compose_final_answer( user_query=state["user_query"], - intent=get_intent_decision(state).intent, + intent=DashboardChatIntentDecision.model_validate( + state.get("intent_decision") or {} + ).intent, response_format=response_format, draft_answer=draft_answer, retrieved_documents=list(execution_result.get("retrieved_documents") or []), diff --git a/ddpui/core/dashboard_chat/orchestration/state/accessors.py b/ddpui/core/dashboard_chat/orchestration/state/accessors.py deleted file mode 100644 index 6c83c0eae..000000000 --- a/ddpui/core/dashboard_chat/orchestration/state/accessors.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Typed accessors for reconstructing runtime views from checkpoint-safe state payloads.""" - -from ddpui.models.org import Org - -from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatConversationContext, - DashboardChatIntentDecision, - DashboardChatResponse, - DashboardChatRetrievedDocument, - DashboardChatSqlValidationResult, -) -from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( - deserialize_allowlist, - deserialize_conversation_context, - deserialize_intent_decision, - deserialize_response, - deserialize_retrieved_documents, - deserialize_sql_validation_result, -) - -def get_runtime_org(state: DashboardChatGraphState) -> Org: - """Return the Django org object for the current runtime state.""" - return Org.objects.select_related("dbt").get(id=int(state["org_id"])) - - -def get_runtime_allowlist(state: DashboardChatGraphState) -> DashboardChatAllowlist: - """Return the reconstructed allowlist for the current runtime state.""" - return deserialize_allowlist(state.get("allowlist_payload")) - - -def get_conversation_context(state: DashboardChatGraphState) -> DashboardChatConversationContext: - """Return the reconstructed conversation context for the current runtime state.""" - return deserialize_conversation_context(state.get("conversation_context")) - - -def get_intent_decision(state: DashboardChatGraphState) -> DashboardChatIntentDecision: - """Return the reconstructed intent decision for the current runtime state.""" - return deserialize_intent_decision(state.get("intent_decision")) - - -def get_runtime_response(state: DashboardChatGraphState) -> DashboardChatResponse: - """Return the reconstructed response contract for the current runtime state.""" - return deserialize_response(state.get("response")) - - -def get_retrieved_documents( - state: DashboardChatGraphState, -) -> list[DashboardChatRetrievedDocument]: - """Return the reconstructed retrieved-document contracts for the current runtime state.""" - return deserialize_retrieved_documents(state.get("retrieved_documents")) - - -def get_sql_validation_result( - state: DashboardChatGraphState, -) -> DashboardChatSqlValidationResult | None: - """Return the reconstructed SQL validation result for the current runtime state.""" - return deserialize_sql_validation_result(state.get("sql_validation")) diff --git a/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py b/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py deleted file mode 100644 index a5daf33f7..000000000 --- a/ddpui/core/dashboard_chat/orchestration/state/payload_codec.py +++ /dev/null @@ -1,285 +0,0 @@ -"""Serialization helpers for checkpoint-safe dashboard chat state payloads.""" - -from typing import Any - -from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatCitation, - DashboardChatConversationContext, - DashboardChatFollowUpContext, - DashboardChatIntent, - DashboardChatIntentDecision, - DashboardChatResponse, - DashboardChatRetrievedDocument, - DashboardChatSchemaSnippet, - DashboardChatSqlValidationResult, -) - - -def serialize_allowlist(allowlist: DashboardChatAllowlist) -> dict[str, Any]: - """Convert an allowlist into a checkpoint-safe dictionary payload.""" - return { - "chart_tables": sorted(allowlist.chart_tables), - "upstream_tables": sorted(allowlist.upstream_tables), - "allowed_tables": sorted(allowlist.allowed_tables), - "allowed_unique_ids": sorted(allowlist.allowed_unique_ids), - "unique_id_to_table": dict(allowlist.unique_id_to_table), - "table_to_unique_ids": { - table_name: sorted(unique_ids) - for table_name, unique_ids in allowlist.table_to_unique_ids.items() - }, - } - - -def deserialize_allowlist(payload: dict[str, Any] | None) -> DashboardChatAllowlist: - """Rebuild an allowlist view from checkpoint-safe payload data.""" - payload = payload or {} - return DashboardChatAllowlist( - chart_tables=set(payload.get("chart_tables") or []), - upstream_tables=set(payload.get("upstream_tables") or []), - allowed_tables=set(payload.get("allowed_tables") or []), - allowed_unique_ids=set(payload.get("allowed_unique_ids") or []), - unique_id_to_table=dict(payload.get("unique_id_to_table") or {}), - table_to_unique_ids={ - table_name: set(unique_ids) - for table_name, unique_ids in (payload.get("table_to_unique_ids") or {}).items() - }, - ) - - -def serialize_schema_snippets( - snippets: dict[str, DashboardChatSchemaSnippet], -) -> dict[str, Any]: - """Convert schema snippets into checkpoint-safe dictionary payloads.""" - return { - table_name: { - "table_name": snippet.table_name, - "columns": list(snippet.columns), - } - for table_name, snippet in snippets.items() - } - - -def deserialize_schema_snippets( - payload: dict[str, Any] | None, -) -> dict[str, DashboardChatSchemaSnippet]: - """Rebuild schema snippet contracts from checkpoint payloads.""" - snippets: dict[str, DashboardChatSchemaSnippet] = {} - for table_name, snippet_payload in (payload or {}).items(): - snippets[table_name.lower()] = DashboardChatSchemaSnippet( - table_name=str(snippet_payload.get("table_name") or table_name), - columns=list(snippet_payload.get("columns") or []), - ) - return snippets - - -def serialize_distinct_payloads( - validated_distinct_values: set[tuple[str, str, str]], -) -> dict[str, Any]: - """Convert validated distinct values into a checkpoint-safe nested payload.""" - serialized: dict[str, dict[str, list[str]]] = {} - for table_name, column_name, value in validated_distinct_values: - serialized.setdefault(table_name, {}).setdefault(column_name, []).append(value) - - return { - table_name: { - column_name: sorted(set(values)) - for column_name, values in column_map.items() - } - for table_name, column_map in serialized.items() - } - - -def deserialize_distinct_payloads( - payload: dict[str, Any] | None, -) -> set[tuple[str, str, str]]: - """Rebuild validated distinct values from checkpoint payloads.""" - validated_distinct_values: set[tuple[str, str, str]] = set() - for table_name, column_map in (payload or {}).items(): - for column_name, values in (column_map or {}).items(): - for value in values or []: - validated_distinct_values.add( - (str(table_name).lower(), str(column_name).lower(), str(value)) - ) - return validated_distinct_values - - -def serialize_conversation_context( - context: DashboardChatConversationContext, -) -> dict[str, Any]: - """Convert conversation context into a checkpoint-safe payload.""" - return { - "last_sql_query": context.last_sql_query, - "last_tables_used": list(context.last_tables_used), - "last_chart_ids": list(context.last_chart_ids), - "last_metrics": list(context.last_metrics), - "last_dimensions": list(context.last_dimensions), - "last_filters": list(context.last_filters), - "last_response_type": context.last_response_type, - "last_answer_text": context.last_answer_text, - "last_intent": context.last_intent, - } - - -def deserialize_conversation_context( - payload: dict[str, Any] | None, -) -> DashboardChatConversationContext: - """Rebuild conversation context from checkpoint payload data.""" - payload = payload or {} - return DashboardChatConversationContext( - last_sql_query=payload.get("last_sql_query"), - last_tables_used=list(payload.get("last_tables_used") or []), - last_chart_ids=list(payload.get("last_chart_ids") or []), - last_metrics=list(payload.get("last_metrics") or []), - last_dimensions=list(payload.get("last_dimensions") or []), - last_filters=list(payload.get("last_filters") or []), - last_response_type=payload.get("last_response_type"), - last_answer_text=payload.get("last_answer_text"), - last_intent=payload.get("last_intent"), - ) - - -def serialize_intent_decision(decision: DashboardChatIntentDecision) -> dict[str, Any]: - """Convert one intent decision into a checkpoint-safe payload.""" - return { - "intent": decision.intent.value, - "confidence": decision.confidence, - "reason": decision.reason, - "missing_info": list(decision.missing_info), - "force_tool_usage": decision.force_tool_usage, - "clarification_question": decision.clarification_question, - "follow_up_context": { - "is_follow_up": decision.follow_up_context.is_follow_up, - "follow_up_type": decision.follow_up_context.follow_up_type, - "reusable_elements": dict(decision.follow_up_context.reusable_elements), - "modification_instruction": decision.follow_up_context.modification_instruction, - }, - } - - -def deserialize_intent_decision(payload: dict[str, Any] | None) -> DashboardChatIntentDecision: - """Rebuild an intent decision from checkpoint payload data.""" - payload = payload or {} - follow_up_payload = payload.get("follow_up_context") or {} - return DashboardChatIntentDecision( - intent=DashboardChatIntent(str(payload.get("intent") or DashboardChatIntent.IRRELEVANT.value)), - confidence=float(payload.get("confidence") or 0.0), - reason=str(payload.get("reason") or ""), - missing_info=list(payload.get("missing_info") or []), - force_tool_usage=bool(payload.get("force_tool_usage")), - clarification_question=payload.get("clarification_question"), - follow_up_context=DashboardChatFollowUpContext( - is_follow_up=bool(follow_up_payload.get("is_follow_up")), - follow_up_type=follow_up_payload.get("follow_up_type"), - reusable_elements=dict(follow_up_payload.get("reusable_elements") or {}), - modification_instruction=follow_up_payload.get("modification_instruction"), - ), - ) - - -def serialize_retrieved_documents( - documents: list[DashboardChatRetrievedDocument], -) -> list[dict[str, Any]]: - """Convert retrieved document contracts into checkpoint-safe payloads.""" - return [ - { - "document_id": document.document_id, - "source_type": document.source_type, - "source_identifier": document.source_identifier, - "content": document.content, - "dashboard_id": document.dashboard_id, - "distance": document.distance, - } - for document in documents - ] - - -def deserialize_retrieved_documents( - payloads: list[dict[str, Any]] | None, -) -> list[DashboardChatRetrievedDocument]: - """Rebuild retrieved document contracts from checkpoint payloads.""" - return [ - DashboardChatRetrievedDocument( - document_id=str(payload.get("document_id") or ""), - source_type=str(payload.get("source_type") or ""), - source_identifier=str(payload.get("source_identifier") or ""), - content=str(payload.get("content") or ""), - dashboard_id=payload.get("dashboard_id"), - distance=payload.get("distance"), - ) - for payload in (payloads or []) - ] - - -def serialize_citations(citations: list[DashboardChatCitation]) -> list[dict[str, Any]]: - """Convert citations into checkpoint-safe payloads.""" - return [citation.to_dict() for citation in citations] - - -def deserialize_citations( - payloads: list[dict[str, Any]] | None, -) -> list[DashboardChatCitation]: - """Rebuild citation contracts from checkpoint payloads.""" - return [ - DashboardChatCitation( - source_type=str(payload.get("source_type") or ""), - source_identifier=str(payload.get("source_identifier") or ""), - title=str(payload.get("title") or ""), - snippet=str(payload.get("snippet") or ""), - dashboard_id=payload.get("dashboard_id"), - table_name=payload.get("table_name"), - ) - for payload in (payloads or []) - ] - - -def serialize_sql_validation_result( - validation: DashboardChatSqlValidationResult | None, -) -> dict[str, Any] | None: - """Convert SQL validation state into a checkpoint-safe payload.""" - if validation is None: - return None - return { - "is_valid": validation.is_valid, - "sanitized_sql": validation.sanitized_sql, - "tables": list(validation.tables), - "warnings": list(validation.warnings), - "errors": list(validation.errors), - } - - -def deserialize_sql_validation_result( - payload: dict[str, Any] | None, -) -> DashboardChatSqlValidationResult | None: - """Rebuild SQL validation state from checkpoint payload data.""" - if payload is None: - return None - return DashboardChatSqlValidationResult( - is_valid=bool(payload.get("is_valid")), - sanitized_sql=payload.get("sanitized_sql"), - tables=list(payload.get("tables") or []), - warnings=list(payload.get("warnings") or []), - errors=list(payload.get("errors") or []), - ) - - -def serialize_response(response: DashboardChatResponse) -> dict[str, Any]: - """Convert the final response contract into a checkpoint-safe payload.""" - return response.to_dict() - - -def deserialize_response(payload: dict[str, Any] | None) -> DashboardChatResponse: - """Rebuild the final response contract from checkpoint payload data.""" - payload = payload or {} - intent_value = str(payload.get("intent") or DashboardChatIntent.IRRELEVANT.value) - return DashboardChatResponse( - answer_text=str(payload.get("answer_text") or ""), - intent=DashboardChatIntent(intent_value), - citations=deserialize_citations(payload.get("citations") or []), - warnings=list(payload.get("warnings") or []), - sql=payload.get("sql"), - sql_results=payload.get("sql_results"), - usage=dict(payload.get("usage") or {}), - tool_calls=list(payload.get("tool_calls") or []), - metadata=dict(payload.get("metadata") or {}), - ) diff --git a/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py index a38e8545c..15bbfc745 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py @@ -8,8 +8,8 @@ build_follow_up_context_prompt, detect_sql_modification_type, ) +from ddpui.core.dashboard_chat.contracts import DashboardChatConversationContext from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState -from ddpui.core.dashboard_chat.orchestration.state.accessors import get_conversation_context def build_new_query_messages( @@ -36,7 +36,9 @@ def build_follow_up_messages( { "role": "system", "content": build_follow_up_context_prompt( - get_conversation_context(state), + DashboardChatConversationContext.model_validate( + state.get("conversation_context") or {} + ), state["user_query"], ), }, diff --git a/ddpui/core/dashboard_chat/sessions/session_service.py b/ddpui/core/dashboard_chat/sessions/session_service.py index ed7e54cf5..e526be7b2 100644 --- a/ddpui/core/dashboard_chat/sessions/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -160,22 +160,6 @@ def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: } -def find_dashboard_chat_assistant_reply( - *, - session: DashboardChatSession, - user_message: DashboardChatMessage, -) -> DashboardChatMessage | None: - """Return the first assistant reply that follows a user turn, if it exists.""" - return ( - session.messages.filter( - role=DashboardChatMessageRole.ASSISTANT.value, - sequence_number__gt=user_message.sequence_number, - ) - .order_by("sequence_number") - .first() - ) - - def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> DashboardChatMessage: """Load session and message, run the runtime, persist and return the assistant reply. @@ -201,9 +185,13 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> Dashbo raise Exception("Chat message could not be found") # Safety net: if an assistant reply already exists, return it without re-running. - existing_assistant_message = find_dashboard_chat_assistant_reply( - session=session, - user_message=user_message, + existing_assistant_message = ( + session.messages.filter( + role=DashboardChatMessageRole.ASSISTANT.value, + sequence_number__gt=user_message.sequence_number, + ) + .order_by("sequence_number") + .first() ) if existing_assistant_message is not None: return existing_assistant_message From 4b3e2b2d0fde63330b82d5b2d1849da2282fc87c Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 19:17:46 +0530 Subject: [PATCH 35/49] not needed --- .../dashboard_chat/agents/final_answer_formatting.py | 9 +-------- ddpui/core/dashboard_chat/agents/openai_llm_client.py | 5 ++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/ddpui/core/dashboard_chat/agents/final_answer_formatting.py b/ddpui/core/dashboard_chat/agents/final_answer_formatting.py index 0f9361a52..88c7859d3 100644 --- a/ddpui/core/dashboard_chat/agents/final_answer_formatting.py +++ b/ddpui/core/dashboard_chat/agents/final_answer_formatting.py @@ -68,9 +68,7 @@ def format_table_summary_markdown(result: dict[str, Any]) -> str: summary = str(result.get("summary") or "").strip() raw_key_points = result.get("key_points") or [] key_points = [ - str(point).strip() - for point in raw_key_points - if isinstance(point, str) and point.strip() + str(point).strip() for point in raw_key_points if isinstance(point, str) and point.strip() ][:3] sections: list[str] = [] @@ -81,8 +79,3 @@ def format_table_summary_markdown(result: dict[str, Any]) -> str: if key_points: sections.append("\n".join(f"- {point}" for point in key_points)) return "\n\n".join(section for section in sections if section).strip() - - -def serialize_final_answer_context_payload(payload: dict[str, Any]) -> str: - """Serialize final answer prompt context deterministically.""" - return json.dumps(payload, ensure_ascii=False) diff --git a/ddpui/core/dashboard_chat/agents/openai_llm_client.py b/ddpui/core/dashboard_chat/agents/openai_llm_client.py index 462f36307..65c5ab538 100644 --- a/ddpui/core/dashboard_chat/agents/openai_llm_client.py +++ b/ddpui/core/dashboard_chat/agents/openai_llm_client.py @@ -11,7 +11,6 @@ TABLE_SUMMARY_JSON_INSTRUCTIONS, build_final_answer_context_payload, format_table_summary_markdown, - serialize_final_answer_context_payload, ) from ddpui.core.dashboard_chat.agents.prompt_template_store import DashboardChatPromptStore from ddpui.core.dashboard_chat.contracts import ( @@ -183,7 +182,7 @@ def compose_final_answer( + "\n\n" + TABLE_SUMMARY_JSON_INSTRUCTIONS ), - user_prompt=serialize_final_answer_context_payload(context_payload), + user_prompt=json.dumps(context_payload, ensure_ascii=False), ) return format_table_summary_markdown(result) @@ -197,7 +196,7 @@ def compose_final_answer( }, { "role": "user", - "content": serialize_final_answer_context_payload(context_payload), + "content": json.dumps(context_payload, ensure_ascii=False), }, ], temperature=0.1, From e4d05fb84d344da08821ccfe6566e2d4df9f0701 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 19:21:25 +0530 Subject: [PATCH 36/49] updates --- .../llm_tools/runtime/turn_context.py | 18 ------------------ .../orchestration/state/__init__.py | 2 -- 2 files changed, 20 deletions(-) diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py index d6fbb2dc0..134e401c4 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py @@ -257,21 +257,3 @@ def dbt_resources_by_unique_id(state: DashboardChatGraphState) -> dict[str, dict """Return the allowlisted dbt index built at session start.""" dbt_index = state.get("dbt_index") or {} return dict(dbt_index.get("resources_by_unique_id") or {}) - - -__all__ = [ - "DashboardChatTurnContext", - "current_validated_distinct_payloads", - "current_schema_snippet_payloads", - "dbt_resources_by_unique_id", - "get_or_embed_query", - "get_or_load_schema_snippets", - "get_turn_warehouse_tools", - "has_validated_distinct_value", - "hydrate_validated_distinct_values", - "hydrate_schema_snippets_by_table", - "is_text_type", - "record_validated_distinct_values", - "record_validated_filters_from_sql", - "seed_validated_distinct_values_from_previous_sql", -] diff --git a/ddpui/core/dashboard_chat/orchestration/state/__init__.py b/ddpui/core/dashboard_chat/orchestration/state/__init__.py index 109fe0724..a17162980 100644 --- a/ddpui/core/dashboard_chat/orchestration/state/__init__.py +++ b/ddpui/core/dashboard_chat/orchestration/state/__init__.py @@ -3,5 +3,3 @@ from ddpui.core.dashboard_chat.orchestration.state.graph_state import ( DashboardChatGraphState, ) - -__all__ = ["DashboardChatGraphState"] From 898d1986ac90ccd3aa38f13d62747040cf8d9c92 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Wed, 1 Apr 2026 19:31:26 +0530 Subject: [PATCH 37/49] revert --- ddpui/settings.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ddpui/settings.py b/ddpui/settings.py index 6d8c6bf68..bc6971a20 100644 --- a/ddpui/settings.py +++ b/ddpui/settings.py @@ -341,16 +341,8 @@ # Cookie settings -_cookie_secure_override = os.getenv("COOKIE_SECURE") -COOKIE_SECURE = ( - _cookie_secure_override.lower() in {"1", "true", "yes", "on"} - if _cookie_secure_override is not None - else True -) -COOKIE_SAMESITE = os.getenv( - "COOKIE_SAMESITE", - "Lax" if os.getenv("ENVIRONMENT", "") == "production" else "None", -) -COOKIE_HTTPONLY = os.getenv("COOKIE_HTTPONLY", "true").lower() in {"1", "true", "yes", "on"} +COOKIE_SECURE = True +COOKIE_SAMESITE = "Lax" if os.getenv("ENVIRONMENT", "") == "production" else "None" +COOKIE_HTTPONLY = True DATA_UPLOAD_MAX_MEMORY_SIZE = 5242880 # 5 MB From 7951bd492204b0899f646254787d320d638d9c92 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Mon, 6 Apr 2026 10:38:42 +0530 Subject: [PATCH 38/49] feat(ai-chat): add stop progress and explore sources --- .../contracts/event_contracts.py | 67 +++ .../contracts/response_contracts.py | 1 + .../implementations/sql_execution_tools.py | 11 + .../llm_tools/runtime/tool_loop.py | 40 +- .../orchestration/nodes/finalize.py | 2 + .../orchestration/orchestrator.py | 60 ++- .../orchestration/retrieval_support.py | 55 ++- .../orchestration/runtime_signals.py | 56 +++ .../sessions/session_service.py | 353 +++++++++++++++- ddpui/migrations/0160_dashboardchatturn.py | 116 +++++ ddpui/models/dashboard_chat.py | 53 +++ .../dashboard_chat/test_response_payloads.py | 73 ++++ .../dashboard_chat/test_session_service.py | 93 +++- .../test_dashboard_chat_consumer.py | 399 +++++++++++++----- ddpui/websockets/dashboard_chat_consumer.py | 211 +++++++-- 15 files changed, 1414 insertions(+), 176 deletions(-) create mode 100644 ddpui/core/dashboard_chat/contracts/event_contracts.py create mode 100644 ddpui/core/dashboard_chat/orchestration/runtime_signals.py create mode 100644 ddpui/migrations/0160_dashboardchatturn.py create mode 100644 ddpui/tests/core/dashboard_chat/test_response_payloads.py diff --git a/ddpui/core/dashboard_chat/contracts/event_contracts.py b/ddpui/core/dashboard_chat/contracts/event_contracts.py new file mode 100644 index 000000000..477e6ec8f --- /dev/null +++ b/ddpui/core/dashboard_chat/contracts/event_contracts.py @@ -0,0 +1,67 @@ +"""Websocket event contracts for dashboard chat.""" + +from datetime import datetime +from enum import Enum +from typing import Literal + +from pydantic import BaseModel, ConfigDict + + +class DashboardChatProgressStage(str, Enum): + """Stable progress stages exposed to the chat UI.""" + + UNDERSTANDING_QUESTION = "understanding_question" + LOADING_CONTEXT = "loading_context" + SEARCHING_CONTEXT = "searching_context" + VALIDATING_QUERY = "validating_query" + QUERYING_DATA = "querying_data" + PREPARING_ANSWER = "preparing_answer" + CANCELLING = "cancelling" + + +class DashboardChatProgressEvent(BaseModel): + """One in-flight progress update for a running dashboard-chat turn.""" + + model_config = ConfigDict(frozen=True) + + event_type: Literal["progress"] = "progress" + session_id: str + turn_id: str + dashboard_id: int + occurred_at: datetime + label: str + stage: DashboardChatProgressStage | None = None + message_id: str | None = None + + +class DashboardChatCancelledEvent(BaseModel): + """Event emitted when one dashboard-chat turn is cancelled.""" + + model_config = ConfigDict(frozen=True) + + event_type: Literal["cancelled"] = "cancelled" + session_id: str + turn_id: str + dashboard_id: int + occurred_at: datetime + label: str + + +class DashboardChatAssistantMessageEvent(BaseModel): + """Assistant message event emitted over websocket after one turn completes.""" + + model_config = ConfigDict(frozen=True) + + event_type: Literal["assistant_message"] = "assistant_message" + session_id: str + turn_id: str + message_id: str + dashboard_id: int + occurred_at: datetime + id: str + role: Literal["assistant"] + content: str + created_at: datetime + payload: dict + response_latency_ms: int | None = None + timing_breakdown: dict | None = None diff --git a/ddpui/core/dashboard_chat/contracts/response_contracts.py b/ddpui/core/dashboard_chat/contracts/response_contracts.py index 60df8ddaa..e104d4aae 100644 --- a/ddpui/core/dashboard_chat/contracts/response_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/response_contracts.py @@ -18,6 +18,7 @@ class DashboardChatCitation(BaseModel): source_identifier: str title: str snippet: str + url: str | None = None dashboard_id: int | None = None table_name: str | None = None diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py index 83c5054d1..563af8d42 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_execution_tools.py @@ -8,6 +8,7 @@ from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_corrections import ( missing_columns_in_primary_table, @@ -23,6 +24,10 @@ get_turn_warehouse_tools, record_validated_filters_from_sql, ) +from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( + publish_runtime_progress, + raise_if_runtime_cancelled, +) def handle_run_sql_query_tool( @@ -92,6 +97,12 @@ def handle_run_sql_query_tool( return missing_columns turn_context.last_sql = validation.sanitized_sql + table_label = ", ".join(validation.tables[:2]) if validation.tables else "allowlisted table" + publish_runtime_progress( + f"Querying data from {table_label}", + DashboardChatProgressStage.QUERYING_DATA, + ) + raise_if_runtime_cancelled() try: rows = get_turn_warehouse_tools( warehouse_tools_factory, diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py index 8599b039a..169842df3 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/tool_loop.py @@ -17,7 +17,8 @@ max_turns_message, fallback_answer_text, ) -from ddpui.core.dashboard_chat.contracts import DashboardChatIntentDecision +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntentDecision from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.dbt_tools import ( handle_get_dbt_model_info_tool, @@ -41,9 +42,40 @@ current_schema_snippet_payloads, seed_validated_distinct_values_from_previous_sql, ) +from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( + publish_runtime_progress, + raise_if_runtime_cancelled, +) logger = CustomLogger("dashboard_chat") +TOOL_PROGRESS = { + "retrieve_docs": ( + DashboardChatProgressStage.SEARCHING_CONTEXT, + "Searching relevant sources", + ), + "get_schema_snippets": ( + DashboardChatProgressStage.VALIDATING_QUERY, + "Validating query", + ), + "get_distinct_values": ( + DashboardChatProgressStage.VALIDATING_QUERY, + "Validating filters", + ), + "list_tables_by_keyword": ( + DashboardChatProgressStage.VALIDATING_QUERY, + "Validating query", + ), + "check_table_row_count": ( + DashboardChatProgressStage.VALIDATING_QUERY, + "Validating query", + ), + "run_sql_query": ( + DashboardChatProgressStage.VALIDATING_QUERY, + "Validating query", + ), +} + def execute_tool_loop( llm_client, @@ -68,6 +100,7 @@ def execute_tool_loop( intent_decision = DashboardChatIntentDecision.model_validate(state.get("intent_decision") or {}) for turn_index in range(max_turns): + raise_if_runtime_cancelled() tool_choice = "required" if intent_decision.force_tool_usage and turn_index == 0 else "auto" ai_message = llm_client.run_tool_loop_turn( messages=messages, @@ -113,6 +146,7 @@ def execute_tool_loop( ) for tool_call in tool_calls: + raise_if_runtime_cancelled() raw_args = tool_call.get("args") or {} args = raw_args if isinstance(raw_args, str): @@ -154,6 +188,7 @@ def execute_tool_loop( ), } ) + raise_if_runtime_cancelled() if tool_name == "run_sql_query" and result.get("success"): return build_tool_loop_result( answer_text="", @@ -186,6 +221,9 @@ def execute_tool_call( ) -> dict[str, Any]: """Execute one prototype tool against the Dalgo runtime primitives.""" try: + progress = TOOL_PROGRESS.get(tool_name) + if progress is not None: + publish_runtime_progress(progress[1], progress[0]) if tool_name == "retrieve_docs": return handle_retrieve_docs_tool( vector_store, source_config, runtime_config, args, state, turn_context diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index 5f810da26..1fd8cda07 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -11,6 +11,7 @@ DashboardChatSqlValidationResult, ) from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState +from ddpui.core.dashboard_chat.orchestration.retrieval_support import explore_table_url def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: @@ -35,6 +36,7 @@ def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: source_identifier=table_name, title=f"Warehouse table: {table_name}", snippet=f"SQL executed against {table_name}.", + url=explore_table_url(table_name), table_name=table_name, ) for table_name in sql_validation.tables diff --git a/ddpui/core/dashboard_chat/orchestration/orchestrator.py b/ddpui/core/dashboard_chat/orchestration/orchestrator.py index c4db119c6..3194bbf5f 100644 --- a/ddpui/core/dashboard_chat/orchestration/orchestrator.py +++ b/ddpui/core/dashboard_chat/orchestration/orchestrator.py @@ -10,7 +10,8 @@ from ddpui.core.dashboard_chat.config import DashboardChatRuntimeConfig, DashboardChatSourceConfig from ddpui.core.dashboard_chat.agents.llm_client_interface import DashboardChatLlmClient from ddpui.core.dashboard_chat.agents.openai_llm_client import OpenAIDashboardChatLlmClient -from ddpui.core.dashboard_chat.contracts import DashboardChatResponse +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse from ddpui.core.dashboard_chat.vector.org_vector_store import OrgVectorStore from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools from ddpui.models.org import Org @@ -44,16 +45,72 @@ from ddpui.core.dashboard_chat.orchestration.state import ( DashboardChatGraphState, ) +from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( + publish_runtime_progress, + raise_if_runtime_cancelled, +) from ddpui.core.dashboard_chat.orchestration.llm_tools.runtime.tool_specifications import ( DASHBOARD_CHAT_TOOL_SPECIFICATIONS, ) +NODE_PROGRESS = { + "load_context": ( + DashboardChatProgressStage.LOADING_CONTEXT, + "Loading dashboard context", + ), + "route_intent": ( + DashboardChatProgressStage.UNDERSTANDING_QUESTION, + "Understanding question", + ), + "handle_query_with_sql": ( + DashboardChatProgressStage.SEARCHING_CONTEXT, + "Searching relevant sources", + ), + "handle_query_without_sql": ( + DashboardChatProgressStage.SEARCHING_CONTEXT, + "Searching relevant sources", + ), + "handle_follow_up_sql": ( + DashboardChatProgressStage.SEARCHING_CONTEXT, + "Searching relevant sources", + ), + "handle_follow_up_context": ( + DashboardChatProgressStage.SEARCHING_CONTEXT, + "Searching relevant sources", + ), + "handle_small_talk": ( + DashboardChatProgressStage.PREPARING_ANSWER, + "Preparing answer", + ), + "handle_irrelevant": ( + DashboardChatProgressStage.PREPARING_ANSWER, + "Preparing answer", + ), + "handle_needs_clarification": ( + DashboardChatProgressStage.PREPARING_ANSWER, + "Preparing answer", + ), + "compose_response": ( + DashboardChatProgressStage.PREPARING_ANSWER, + "Preparing answer", + ), + "finalize": ( + DashboardChatProgressStage.PREPARING_ANSWER, + "Preparing answer", + ), +} + + def _timed_node(node_name: str, handler): """Wrap one graph node so per-node duration is recorded in timing_breakdown.""" def wrapped(state: DashboardChatGraphState) -> dict: + raise_if_runtime_cancelled() + progress = NODE_PROGRESS.get(node_name) + if progress is not None: + publish_runtime_progress(progress[1], progress[0]) started_at = perf_counter() updates = handler(state) elapsed_ms = round((perf_counter() - started_at) * 1000, 2) @@ -63,6 +120,7 @@ def wrapped(state: DashboardChatGraphState) -> dict: graph_nodes_ms[node_name] = elapsed_ms new_timing["graph_nodes_ms"] = graph_nodes_ms updates["timing_breakdown"] = new_timing + raise_if_runtime_cancelled() return updates return wrapped diff --git a/ddpui/core/dashboard_chat/orchestration/retrieval_support.py b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py index cfc7f419e..b0e9b937a 100644 --- a/ddpui/core/dashboard_chat/orchestration/retrieval_support.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py @@ -2,12 +2,11 @@ from collections.abc import Sequence from typing import Any +from urllib.parse import urlencode from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist, build_dashboard_chat_table_name -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatCitation, - DashboardChatRetrievedDocument, -) +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatCitation +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType from ddpui.core.dashboard_chat.orchestration.source_identifier_parsing import ( @@ -95,6 +94,7 @@ def build_citations( chart.get("id"): chart.get("title") or f"Chart {chart.get('id')}" for chart in dashboard_export.get("charts") or [] } + dashboard_id = dashboard_export.get("dashboard", {}).get("id") citations: list[DashboardChatCitation] = [] for document in retrieved_documents[:6]: table_name = None @@ -115,6 +115,11 @@ def build_citations( table_name=table_name, ), snippet=compact_snippet(document.content), + url=citation_url( + document=document, + dashboard_id=document.dashboard_id or dashboard_id, + table_name=table_name, + ), dashboard_id=document.dashboard_id, table_name=table_name, ) @@ -154,6 +159,48 @@ def compact_snippet(content: str, max_length: int = 220) -> str: return normalized[: max_length - 3].rstrip() + "..." +def citation_url( + *, + document: DashboardChatRetrievedDocument, + dashboard_id: int | None, + table_name: str | None, +) -> str | None: + """Build the most useful frontend destination for one citation.""" + if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: + return "/settings/organization" + + if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: + if dashboard_id is None: + return None + return f"/dashboards/{dashboard_id}" + + if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + chart_id = chart_id_from_source_identifier(document.source_identifier) + if chart_id is not None: + return f"/charts/{chart_id}" + if dashboard_id is None: + return None + return f"/dashboards/{dashboard_id}" + + if document.source_type in { + DashboardChatSourceType.DBT_MANIFEST.value, + DashboardChatSourceType.DBT_CATALOG.value, + }: + if table_name is None: + return None + return explore_table_url(table_name) + + return None + + +def explore_table_url(table_name: str) -> str | None: + """Build one Explore deep link from a schema-qualified table name.""" + schema_name, _, raw_table_name = table_name.partition(".") + if not schema_name or not raw_table_name: + return None + return f"/explore?{urlencode({'schema_name': schema_name, 'table_name': raw_table_name})}" + + def build_tool_document_payload( document: DashboardChatRetrievedDocument, allowlist: DashboardChatAllowlist, diff --git a/ddpui/core/dashboard_chat/orchestration/runtime_signals.py b/ddpui/core/dashboard_chat/orchestration/runtime_signals.py new file mode 100644 index 000000000..c2bf52d21 --- /dev/null +++ b/ddpui/core/dashboard_chat/orchestration/runtime_signals.py @@ -0,0 +1,56 @@ +"""Progress and cancellation hooks for one dashboard-chat runtime invocation.""" + +from collections.abc import Callable, Iterator +from contextlib import contextmanager +from contextvars import ContextVar + +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage + + +class DashboardChatRunCancelled(Exception): + """Raised when a running dashboard-chat turn has been cancelled.""" + + +_current_progress_publisher: ContextVar[ + Callable[[str, DashboardChatProgressStage | None], None] | None +] = ContextVar("dashboard_chat_progress_publisher", default=None) +_current_cancel_checker: ContextVar[Callable[[], bool] | None] = ContextVar( + "dashboard_chat_cancel_checker", + default=None, +) + + +@contextmanager +def dashboard_chat_runtime_hooks( + *, + progress_publisher: Callable[[str, DashboardChatProgressStage | None], None] | None = None, + cancel_checker: Callable[[], bool] | None = None, +) -> Iterator[None]: + """Install per-run progress and cancellation hooks for the current execution context.""" + + progress_token = _current_progress_publisher.set(progress_publisher) + cancel_token = _current_cancel_checker.set(cancel_checker) + try: + yield + finally: + _current_progress_publisher.reset(progress_token) + _current_cancel_checker.reset(cancel_token) + + +def publish_runtime_progress( + label: str, + stage: DashboardChatProgressStage | None = None, +) -> None: + """Publish one progress label if the current run has a registered publisher.""" + + progress_publisher = _current_progress_publisher.get() + if progress_publisher is not None: + progress_publisher(label, stage) + + +def raise_if_runtime_cancelled() -> None: + """Raise if the current run has been marked cancelled by its owner.""" + + cancel_checker = _current_cancel_checker.get() + if cancel_checker is not None and cancel_checker(): + raise DashboardChatRunCancelled() diff --git a/ddpui/core/dashboard_chat/sessions/session_service.py b/ddpui/core/dashboard_chat/sessions/session_service.py index e526be7b2..8fe292996 100644 --- a/ddpui/core/dashboard_chat/sessions/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -1,8 +1,12 @@ """Session and message persistence helpers for dashboard chat.""" from dataclasses import dataclass +from threading import Thread from uuid import UUID +from asgiref.sync import async_to_sync +from channels.layers import get_channel_layer +from django.db import close_old_connections from django.db import IntegrityError from django.db import transaction from django.db.models import Max @@ -10,17 +14,32 @@ from ddpui.core.dashboard_chat.config import DashboardChatVectorStoreConfig from ddpui.core.dashboard_chat.vector.vector_documents import build_dashboard_chat_collection_name -from ddpui.core.dashboard_chat.contracts import DashboardChatConversationMessage +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( + DashboardChatConversationMessage, +) +from ddpui.core.dashboard_chat.contracts.event_contracts import ( + DashboardChatAssistantMessageEvent, + DashboardChatCancelledEvent, + DashboardChatProgressEvent, + DashboardChatProgressStage, +) +from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( + dashboard_chat_runtime_hooks, + DashboardChatRunCancelled, +) from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import ( DashboardChatMessage, DashboardChatMessageRole, DashboardChatSession, + DashboardChatTurn, + DashboardChatTurnStatus, ) from ddpui.models.org_user import OrgUser from ddpui.utils.custom_logger import CustomLogger logger = CustomLogger("dashboard_chat") +DASHBOARD_CHAT_SESSION_GROUP_PREFIX = "dashboard_chat_session_" class DashboardChatSessionError(Exception): @@ -160,7 +179,286 @@ def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: } -def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> DashboardChatMessage: +def update_dashboard_chat_turn( + turn_id: int, + *, + status: DashboardChatTurnStatus | None = None, + progress_label: str | None = None, + assistant_message: DashboardChatMessage | None = None, + error_message: str | None = None, + started_at=None, + completed_at=None, + cancel_requested_at=None, +) -> DashboardChatTurn: + """Persist one dashboard-chat turn status transition and return the refreshed row.""" + + update_fields: dict = {} + if status is not None: + update_fields["status"] = status + if progress_label is not None: + update_fields["progress_label"] = progress_label + if assistant_message is not None: + update_fields["assistant_message"] = assistant_message + if error_message is not None: + update_fields["error_message"] = error_message + if started_at is not None: + update_fields["started_at"] = started_at + if completed_at is not None: + update_fields["completed_at"] = completed_at + if cancel_requested_at is not None: + update_fields["cancel_requested_at"] = cancel_requested_at + if update_fields: + DashboardChatTurn.objects.filter(id=turn_id).update(**update_fields) + return DashboardChatTurn.objects.select_related("session", "user_message").get(id=turn_id) + + +def publish_dashboard_chat_progress( + *, + session: DashboardChatSession, + turn: DashboardChatTurn, + label: str, + stage: DashboardChatProgressStage | None, + message_id: int | None = None, +) -> None: + """Publish one progress update to all websocket listeners for the current session.""" + + event = DashboardChatProgressEvent( + session_id=str(session.session_id), + turn_id=str(turn.id), + dashboard_id=session.dashboard_id or 0, + occurred_at=timezone.now(), + label=label, + stage=stage, + message_id=str(message_id) if message_id is not None else None, + ) + _publish_dashboard_chat_event( + session_id=str(session.session_id), + status="success", + message="", + data=event.model_dump(mode="json"), + ) + + +def publish_dashboard_chat_cancelled( + *, + session: DashboardChatSession, + turn: DashboardChatTurn, + label: str = "Generation stopped", +) -> None: + """Publish a cancelled event to the active dashboard-chat websocket session.""" + + event = DashboardChatCancelledEvent( + session_id=str(session.session_id), + turn_id=str(turn.id), + dashboard_id=session.dashboard_id or 0, + occurred_at=timezone.now(), + label=label, + ) + _publish_dashboard_chat_event( + session_id=str(session.session_id), + status="success", + message="", + data=event.model_dump(mode="json"), + ) + + +def publish_dashboard_chat_assistant_message( + *, + session: DashboardChatSession, + turn: DashboardChatTurn, + message: DashboardChatMessage, +) -> None: + """Publish the completed assistant reply for one dashboard-chat turn.""" + + event = DashboardChatAssistantMessageEvent( + session_id=str(session.session_id), + turn_id=str(turn.id), + message_id=str(message.id), + dashboard_id=session.dashboard_id or 0, + occurred_at=timezone.now(), + id=str(message.id), + role="assistant", + content=message.content, + created_at=message.created_at, + payload=message.payload or {}, + response_latency_ms=message.response_latency_ms, + timing_breakdown=message.timing_breakdown or {}, + ) + _publish_dashboard_chat_event( + session_id=str(session.session_id), + status="success", + message="", + data=event.model_dump(mode="json"), + ) + + +def publish_dashboard_chat_error( + *, + session: DashboardChatSession, + message: str, +) -> None: + """Publish one terminal error envelope to the current dashboard-chat websocket session.""" + + _publish_dashboard_chat_event( + session_id=str(session.session_id), + status="error", + message=message, + data={}, + ) + + +def start_dashboard_chat_turn_background(turn_id: int) -> None: + """Run one dashboard-chat turn in a background thread without blocking the websocket.""" + + Thread( + target=_run_dashboard_chat_turn_in_background, + kwargs={"turn_id": turn_id}, + daemon=True, + name=f"dashboard-chat-turn-{turn_id}", + ).start() + + +def _run_dashboard_chat_turn_in_background(turn_id: int) -> None: + """Own execution, progress, and cancellation for one dashboard-chat turn.""" + + close_old_connections() + try: + turn = ( + DashboardChatTurn.objects.select_related( + "session", + "session__dashboard", + "session__org", + "session__orguser", + "user_message", + ) + .filter(id=turn_id) + .first() + ) + if turn is None or turn.session is None or turn.user_message is None: + logger.warning("dashboard chat turn %s not found", turn_id) + return + + if turn.status in { + DashboardChatTurnStatus.CANCEL_REQUESTED, + DashboardChatTurnStatus.CANCELLED, + }: + cancelled_turn = update_dashboard_chat_turn( + turn.id, + status=DashboardChatTurnStatus.CANCELLED, + progress_label="Generation stopped", + completed_at=timezone.now(), + ) + publish_dashboard_chat_cancelled( + session=cancelled_turn.session, + turn=cancelled_turn, + ) + return + + running_turn = update_dashboard_chat_turn( + turn.id, + status=DashboardChatTurnStatus.RUNNING, + progress_label="Understanding question", + started_at=timezone.now(), + error_message="", + ) + + def progress_publisher( + label: str, + stage: DashboardChatProgressStage | None, + ) -> None: + refreshed_turn = update_dashboard_chat_turn( + running_turn.id, + progress_label=label, + ) + publish_dashboard_chat_progress( + session=refreshed_turn.session, + turn=refreshed_turn, + label=label, + stage=stage, + message_id=refreshed_turn.user_message_id, + ) + + def cancel_checker() -> bool: + status = ( + DashboardChatTurn.objects.filter(id=running_turn.id) + .values_list("status", flat=True) + .first() + ) + return status in { + DashboardChatTurnStatus.CANCEL_REQUESTED, + DashboardChatTurnStatus.CANCELLED, + } + + try: + if cancel_checker(): + cancelled_turn = update_dashboard_chat_turn( + running_turn.id, + status=DashboardChatTurnStatus.CANCELLED, + progress_label="Generation stopped", + completed_at=timezone.now(), + ) + publish_dashboard_chat_cancelled( + session=cancelled_turn.session, + turn=cancelled_turn, + ) + return + + assistant_message = execute_dashboard_chat_turn( + str(running_turn.session.session_id), + running_turn.user_message_id, + progress_publisher=progress_publisher, + cancel_checker=cancel_checker, + ) + except DashboardChatRunCancelled: + cancelled_turn = update_dashboard_chat_turn( + running_turn.id, + status=DashboardChatTurnStatus.CANCELLED, + progress_label="Generation stopped", + completed_at=timezone.now(), + ) + publish_dashboard_chat_cancelled( + session=cancelled_turn.session, + turn=cancelled_turn, + ) + return + except Exception: + logger.exception("dashboard chat turn %s failed", turn_id) + failed_turn = update_dashboard_chat_turn( + running_turn.id, + status=DashboardChatTurnStatus.FAILED, + progress_label="", + error_message="Something went wrong while generating the response", + completed_at=timezone.now(), + ) + publish_dashboard_chat_error( + session=failed_turn.session, + message="Something went wrong while generating the response", + ) + return + + completed_turn = update_dashboard_chat_turn( + running_turn.id, + status=DashboardChatTurnStatus.COMPLETED, + progress_label="", + assistant_message=assistant_message, + completed_at=timezone.now(), + ) + publish_dashboard_chat_assistant_message( + session=completed_turn.session, + turn=completed_turn, + message=assistant_message, + ) + finally: + close_old_connections() + + +def execute_dashboard_chat_turn( + session_id: str, + user_message_id: int, + *, + progress_publisher=None, + cancel_checker=None, +) -> DashboardChatMessage: """Load session and message, run the runtime, persist and return the assistant reply. Returns the assistant DashboardChatMessage on success. @@ -196,17 +494,23 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> Dashbo if existing_assistant_message is not None: return existing_assistant_message - response = get_dashboard_chat_runtime().run( - org=session.org, - dashboard_id=session.dashboard.id, - user_query=user_message.content, - session_id=str(session.session_id), - vector_collection_name=session.vector_collection_name, - conversation_history=list_dashboard_chat_history( - session, - exclude_message_id=user_message.id, - ), - ) + with dashboard_chat_runtime_hooks( + progress_publisher=progress_publisher, + cancel_checker=cancel_checker, + ): + response = get_dashboard_chat_runtime().run( + org=session.org, + dashboard_id=session.dashboard.id, + user_query=user_message.content, + session_id=str(session.session_id), + vector_collection_name=session.vector_collection_name, + conversation_history=list_dashboard_chat_history( + session, + exclude_message_id=user_message.id, + ), + ) + if cancel_checker is not None and cancel_checker(): + raise DashboardChatRunCancelled() response_payload = response.to_dict() assistant_payload = { key: value for key, value in response_payload.items() if key != "answer_text" @@ -227,6 +531,29 @@ def execute_dashboard_chat_turn(session_id: str, user_message_id: int) -> Dashbo return assistant_message +def _publish_dashboard_chat_event( + *, + session_id: str, + status: str, + message: str, + data: dict, +) -> None: + """Send one event envelope to all websocket listeners for one chat session.""" + + channel_layer = get_channel_layer() + if channel_layer is None: + return + async_to_sync(channel_layer.group_send)( + f"{DASHBOARD_CHAT_SESSION_GROUP_PREFIX}{session_id}", + { + "type": "dashboard_chat_event", + "status": status, + "message": message, + "data": data, + }, + ) + + def _create_dashboard_chat_message( *, session: DashboardChatSession, diff --git a/ddpui/migrations/0160_dashboardchatturn.py b/ddpui/migrations/0160_dashboardchatturn.py new file mode 100644 index 000000000..9ff84e224 --- /dev/null +++ b/ddpui/migrations/0160_dashboardchatturn.py @@ -0,0 +1,116 @@ +# Generated by Django 4.2 on 2026-04-06 15:30 + +import django.db.models.deletion +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0156_add_comment_snapshot_index"), + ("ddpui", "0159_dashboardchatprompttemplate_intent_follow_up_refs"), + ] + + operations = [ + migrations.CreateModel( + name="DashboardChatTurn", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "status", + models.CharField( + choices=[ + ("queued", "Queued"), + ("running", "Running"), + ("cancel_requested", "Cancel Requested"), + ("cancelled", "Cancelled"), + ("completed", "Completed"), + ("failed", "Failed"), + ], + default="queued", + max_length=32, + ), + ), + ( + "progress_label", + models.CharField(blank=True, default="", max_length=255), + ), + ( + "error_message", + models.TextField(blank=True, default=""), + ), + ( + "cancel_requested_at", + models.DateTimeField(blank=True, null=True), + ), + ( + "started_at", + models.DateTimeField(blank=True, null=True), + ), + ( + "completed_at", + models.DateTimeField(blank=True, null=True), + ), + ( + "created_at", + models.DateTimeField(default=django.utils.timezone.now), + ), + ( + "updated_at", + models.DateTimeField(auto_now=True), + ), + ( + "assistant_message", + models.OneToOneField( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="assistant_turn", + to="ddpui.dashboardchatmessage", + ), + ), + ( + "session", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="turns", + to="ddpui.dashboardchatsession", + ), + ), + ( + "user_message", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="turn", + to="ddpui.dashboardchatmessage", + ), + ), + ], + options={ + "db_table": "dashboard_chat_turn", + "ordering": ["created_at"], + }, + ), + migrations.AddIndex( + model_name="dashboardchatturn", + index=models.Index( + fields=["session", "status"], + name="dchat_turn_session_status_idx", + ), + ), + migrations.AddIndex( + model_name="dashboardchatturn", + index=models.Index( + fields=["created_at"], + name="dchat_turn_created_idx", + ), + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index f30be157f..1fc1fe877 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -20,6 +20,17 @@ def choices(cls): return [(key.value, key.name) for key in cls] +class DashboardChatTurnStatus(models.TextChoices): + """Lifecycle states for one dashboard-chat turn.""" + + QUEUED = "queued", "Queued" + RUNNING = "running", "Running" + CANCEL_REQUESTED = "cancel_requested", "Cancel Requested" + CANCELLED = "cancelled", "Cancelled" + COMPLETED = "completed", "Completed" + FAILED = "failed", "Failed" + + class DashboardChatPromptTemplateKey(models.TextChoices): """Runtime-editable prompt templates used by the dashboard chat LLM client.""" @@ -147,3 +158,45 @@ class Meta: name="dchat_message_session_client_msg_unique", ), ] + + +class DashboardChatTurn(models.Model): + """Runtime state for one queued/running/completed dashboard-chat turn.""" + + session = models.ForeignKey( + DashboardChatSession, + on_delete=models.CASCADE, + related_name="turns", + ) + user_message = models.OneToOneField( + DashboardChatMessage, + on_delete=models.CASCADE, + related_name="turn", + ) + assistant_message = models.OneToOneField( + DashboardChatMessage, + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="assistant_turn", + ) + status = models.CharField( + max_length=32, + choices=DashboardChatTurnStatus.choices, + default=DashboardChatTurnStatus.QUEUED, + ) + progress_label = models.CharField(max_length=255, blank=True, default="") + error_message = models.TextField(blank=True, default="") + cancel_requested_at = models.DateTimeField(null=True, blank=True) + started_at = models.DateTimeField(null=True, blank=True) + completed_at = models.DateTimeField(null=True, blank=True) + created_at = models.DateTimeField(default=timezone.now) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + db_table = "dashboard_chat_turn" + ordering = ["created_at"] + indexes = [ + models.Index(fields=["session", "status"], name="dchat_turn_session_status_idx"), + models.Index(fields=["created_at"], name="dchat_turn_created_idx"), + ] diff --git a/ddpui/tests/core/dashboard_chat/test_response_payloads.py b/ddpui/tests/core/dashboard_chat/test_response_payloads.py new file mode 100644 index 000000000..2be353040 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_response_payloads.py @@ -0,0 +1,73 @@ +from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist +from ddpui.core.dashboard_chat.contracts import ( + DashboardChatCitation, + DashboardChatIntent, + DashboardChatResponse, + DashboardChatRetrievedDocument, +) +from ddpui.core.dashboard_chat.orchestration.retrieval_support import ( + build_citations, + explore_table_url, +) +from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType + + +def test_dashboard_chat_response_to_dict_includes_citation_urls(): + response = DashboardChatResponse( + answer_text="Answer", + intent=DashboardChatIntent.QUERY_WITH_SQL, + citations=[ + DashboardChatCitation( + source_type="warehouse_table", + source_identifier="analytics.sessions", + title="Warehouse table: analytics.sessions", + snippet="SQL executed against analytics.sessions.", + url="/explore?schema_name=analytics&table_name=sessions", + ) + ], + ) + + payload = response.to_dict() + + assert payload["citations"][0]["url"] == "/explore?schema_name=analytics&table_name=sessions" + + +def test_explore_table_url_requires_schema_qualified_table_name(): + assert explore_table_url("analytics.sessions") == "/explore?schema_name=analytics&table_name=sessions" + assert explore_table_url("sessions") is None + + +def test_build_citations_adds_frontend_urls(): + allowlist = DashboardChatAllowlist( + allowed_tables={"analytics.sessions"}, + unique_id_to_table={"model.dalgo.sessions": "analytics.sessions"}, + ) + retrieved_documents = [ + DashboardChatRetrievedDocument( + document_id="doc-1", + source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, + source_identifier="dashboard:6:chart:7", + content="Chart content", + dashboard_id=6, + ), + DashboardChatRetrievedDocument( + document_id="doc-2", + source_type=DashboardChatSourceType.DBT_MANIFEST.value, + source_identifier="manifest:model.dalgo.sessions", + content="Model content", + dashboard_id=6, + ), + ] + dashboard_export = { + "dashboard": {"id": 6, "title": "Impact Overview"}, + "charts": [{"id": 7, "title": "Sessions by District"}], + } + + citations = build_citations( + retrieved_documents=retrieved_documents, + dashboard_export=dashboard_export, + allowlist=allowlist, + ) + + assert citations[0].url == "/charts/7" + assert citations[1].url == "/explore?schema_name=analytics&table_name=sessions" diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index b3268dea0..e279bc3cd 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -8,6 +8,11 @@ from django.contrib.auth.models import User from ddpui.auth import ACCOUNT_MANAGER_ROLE +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage +from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( + DashboardChatRunCancelled, + publish_runtime_progress, +) from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, create_dashboard_chat_user_message, @@ -311,8 +316,7 @@ def test_execute_dashboard_chat_turn_persists_assistant_message(get_runtime, ses "runtime_total_ms": 123.4, "graph_nodes_ms": {"load_context": 10.0}, } - assert result["status"] == "completed" - assert result["assistant_message"].id == assistant_message.id + assert result.id == assistant_message.id @patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") @@ -367,6 +371,87 @@ def test_execute_dashboard_chat_turn_reuses_existing_assistant_reply( result = execute_dashboard_chat_turn(str(session.session_id), user_message.id) - assert result["status"] == "skipped_existing_reply" - assert result["assistant_message"].id == assistant_message.id + assert result.id == assistant_message.id get_runtime.assert_not_called() + + +@patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") +def test_execute_dashboard_chat_turn_forwards_progress_updates( + get_runtime, + session_owner, + dashboard, +): + """Runtime progress hooks should reach the session-owned progress publisher.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + progress_publisher = Mock() + + runtime = Mock() + + def run_with_progress(**kwargs): + publish_runtime_progress( + "Loading dashboard context", + DashboardChatProgressStage.LOADING_CONTEXT, + ) + return DashboardChatResponse( + answer_text="Funding dropped because donor inflows slowed this quarter.", + intent=DashboardChatIntent.QUERY_WITH_SQL, + ) + + runtime.run.side_effect = run_with_progress + get_runtime.return_value = runtime + + execute_dashboard_chat_turn( + str(session.session_id), + user_message.id, + progress_publisher=progress_publisher, + ) + + progress_publisher.assert_called_once_with( + "Loading dashboard context", + DashboardChatProgressStage.LOADING_CONTEXT, + ) + + +@patch("ddpui.core.dashboard_chat.orchestration.orchestrator.get_dashboard_chat_runtime") +def test_execute_dashboard_chat_turn_stops_before_persisting_when_cancelled( + get_runtime, + session_owner, + dashboard, +): + """A cancelled turn should not persist a new assistant reply.""" + session = DashboardChatSession.objects.create( + org=session_owner.org, + orguser=session_owner, + dashboard=dashboard, + ) + user_message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="user", + content="Why did funding drop?", + ) + runtime = Mock() + runtime.run.return_value = DashboardChatResponse( + answer_text="Funding dropped because donor inflows slowed this quarter.", + intent=DashboardChatIntent.QUERY_WITH_SQL, + ) + get_runtime.return_value = runtime + + with pytest.raises(DashboardChatRunCancelled): + execute_dashboard_chat_turn( + str(session.session_id), + user_message.id, + cancel_checker=lambda: True, + ) + + assert DashboardChatMessage.objects.filter(session=session, role="assistant").count() == 0 diff --git a/ddpui/tests/websockets/test_dashboard_chat_consumer.py b/ddpui/tests/websockets/test_dashboard_chat_consumer.py index 55c22ab79..e26c5c5a4 100644 --- a/ddpui/tests/websockets/test_dashboard_chat_consumer.py +++ b/ddpui/tests/websockets/test_dashboard_chat_consumer.py @@ -1,174 +1,353 @@ import json +from types import SimpleNamespace from unittest.mock import Mock, patch -import pytest - +from ddpui.models.dashboard_chat import DashboardChatTurnStatus from ddpui.websockets.dashboard_chat_consumer import DashboardChatConsumer -def test_dashboard_chat_consumer_send_message_requires_message(): +def build_consumer() -> DashboardChatConsumer: consumer = DashboardChatConsumer() consumer.send = Mock() consumer.dashboard = Mock(id=42) + consumer.orguser = Mock() + consumer.active_session_group = None + consumer._subscribe_to_session = Mock() + consumer._assert_chat_available = Mock(return_value=None) + return consumer + + +def latest_payload(consumer: DashboardChatConsumer) -> dict: + return json.loads(consumer.send.call_args.kwargs["text_data"]) + + +def test_dashboard_chat_consumer_send_message_requires_message(): + consumer = build_consumer() + consumer.websocket_receive({"text": json.dumps({"action": "send_message"})}) - payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + payload = latest_payload(consumer) assert payload["status"] == "error" assert payload["message"] == "Message is required" def test_dashboard_chat_consumer_send_message_requires_available_chat(): - consumer = DashboardChatConsumer() - consumer.send = Mock() - consumer.dashboard = Mock(id=42) - consumer._chat_available = Mock(return_value=(False, "Chat unavailable")) + consumer = build_consumer() + consumer._assert_chat_available.side_effect = Exception("Chat unavailable") + consumer.websocket_receive( - { - "text": json.dumps( - { - "action": "send_message", - "message": "Why did funding drop?", - } - ) - } + {"text": json.dumps({"action": "send_message", "message": "Why did funding drop?"})} ) - payload = json.loads(consumer.send.call_args.kwargs["text_data"]) + payload = latest_payload(consumer) assert payload["status"] == "error" assert payload["message"] == "Chat unavailable" -@patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") -@patch("ddpui.websockets.dashboard_chat_consumer.execute_dashboard_chat_turn") +@patch("ddpui.websockets.dashboard_chat_consumer.start_dashboard_chat_turn_background") +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_progress") +@patch("ddpui.websockets.dashboard_chat_consumer.update_dashboard_chat_turn") @patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") @patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") -def test_dashboard_chat_consumer_send_message_creates_session_and_runs_inline( +def test_dashboard_chat_consumer_send_message_starts_background_turn( mock_get_or_create_session, mock_create_user_message, - mock_execute_turn, - mock_serialize_message, + mock_update_turn, + mock_publish_progress, + mock_start_background_turn, ): session = Mock(session_id="session-123") user_message = Mock(id=17) - assistant_message = Mock(id=18) + turn = Mock(id=23, status=DashboardChatTurnStatus.QUEUED, assistant_message=None) + mock_get_or_create_session.return_value = session - mock_create_user_message.return_value = Mock(message=user_message, created=True) - mock_execute_turn.return_value = {"status": "completed", "assistant_message": assistant_message} - mock_serialize_message.return_value = {"id": "18", "role": "assistant"} + mock_create_user_message.return_value = SimpleNamespace(message=user_message, created=True) - consumer = DashboardChatConsumer() - consumer.dashboard = Mock(id=42) - consumer.orguser = Mock() - consumer.send = Mock() - consumer._chat_available = Mock(return_value=(True, "")) - consumer._subscribe_to_session = Mock() + turn_manager = Mock() + turn_manager.get_or_create.return_value = (turn, True) + turn_manager.select_related.return_value.get.return_value = turn - consumer.websocket_receive( - { - "text": json.dumps( - { - "action": "send_message", - "message": "Why did funding drop?", - "client_message_id": "ui-1", - } - ) - } - ) + consumer = build_consumer() - mock_get_or_create_session.assert_called_once() - mock_create_user_message.assert_called_once() - consumer._subscribe_to_session.assert_called_once_with("session-123") - mock_execute_turn.assert_called_once_with("session-123", 17) + with patch("ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", turn_manager): + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) - first_payload = json.loads(consumer.send.call_args_list[0].kwargs["text_data"]) - second_payload = json.loads(consumer.send.call_args_list[1].kwargs["text_data"]) - assert first_payload["status"] == "success" - assert first_payload["data"]["event_type"] == "progress" - assert second_payload["status"] == "success" - assert second_payload["data"]["event_type"] == "assistant_message" + consumer._subscribe_to_session.assert_called_once_with(session) + mock_publish_progress.assert_called_once() + mock_update_turn.assert_called_once_with(23, progress_label="Understanding question") + mock_start_background_turn.assert_called_once_with(23) + consumer.send.assert_not_called() -@patch( - "ddpui.websockets.dashboard_chat_consumer.execute_dashboard_chat_turn", - side_effect=RuntimeError("inline failed"), -) +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_assistant_message") @patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") @patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") -def test_dashboard_chat_consumer_send_message_returns_error_when_inline_turn_fails( +def test_dashboard_chat_consumer_duplicate_send_reuses_completed_turn( mock_get_or_create_session, mock_create_user_message, - mock_execute_turn, + mock_publish_assistant_message, ): session = Mock(session_id="session-123") user_message = Mock(id=17) + assistant_message = Mock(id=31) + turn = Mock( + id=23, + status=DashboardChatTurnStatus.COMPLETED, + assistant_message=assistant_message, + ) + mock_get_or_create_session.return_value = session - mock_create_user_message.return_value = Mock(message=user_message, created=True) + mock_create_user_message.return_value = SimpleNamespace(message=user_message, created=False) - consumer = DashboardChatConsumer() - consumer.dashboard = Mock(id=42) - consumer.orguser = Mock() - consumer.send = Mock() - consumer._chat_available = Mock(return_value=(True, "")) - consumer._subscribe_to_session = Mock() + turn_manager = Mock() + turn_manager.get_or_create.return_value = (turn, False) + turn_manager.select_related.return_value.get.return_value = turn - consumer.websocket_receive( - { - "text": json.dumps( - { - "action": "send_message", - "message": "Why did funding drop?", - "client_message_id": "ui-1", - } - ) - } + consumer = build_consumer() + + with patch( + "ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", + turn_manager, + ), patch( + "ddpui.websockets.dashboard_chat_consumer.start_dashboard_chat_turn_background" + ) as mock_start_background_turn: + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + mock_publish_assistant_message.assert_called_once_with( + session=session, + turn=turn, + message=assistant_message, ) + mock_start_background_turn.assert_not_called() - mock_execute_turn.assert_called_once_with("session-123", 17) - consumer._subscribe_to_session.assert_called_once_with("session-123") - payload = json.loads(consumer.send.call_args_list[-1].kwargs["text_data"]) - assert payload["status"] == "error" - assert payload["message"] == "Something went wrong while generating the response" +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_progress") +@patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_duplicate_send_reuses_in_flight_turn( + mock_get_or_create_session, + mock_create_user_message, + mock_publish_progress, +): + session = Mock(session_id="session-123") + user_message = Mock(id=17) + turn = Mock( + id=23, + status=DashboardChatTurnStatus.RUNNING, + progress_label="Searching relevant sources", + user_message_id=17, + ) + + mock_get_or_create_session.return_value = session + mock_create_user_message.return_value = SimpleNamespace(message=user_message, created=False) + + turn_manager = Mock() + turn_manager.get_or_create.return_value = (turn, False) + turn_manager.select_related.return_value.get.return_value = turn + + consumer = build_consumer() + + with patch( + "ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", + turn_manager, + ), patch( + "ddpui.websockets.dashboard_chat_consumer.start_dashboard_chat_turn_background" + ) as mock_start_background_turn: + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + mock_publish_progress.assert_called_once_with( + session=session, + turn=turn, + label="Searching relevant sources", + stage=None, + message_id=17, + ) + mock_start_background_turn.assert_not_called() -@patch("ddpui.websockets.dashboard_chat_consumer.serialize_dashboard_chat_message") -@patch("ddpui.websockets.dashboard_chat_consumer.find_dashboard_chat_assistant_reply") +@patch("ddpui.websockets.dashboard_chat_consumer.start_dashboard_chat_turn_background") +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_progress") +@patch("ddpui.websockets.dashboard_chat_consumer.update_dashboard_chat_turn") @patch("ddpui.websockets.dashboard_chat_consumer.create_dashboard_chat_user_message_with_status") @patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") -def test_dashboard_chat_consumer_reuses_existing_turn_without_running_duplicate_turn( +def test_dashboard_chat_consumer_retry_starts_background_turn_if_turn_row_was_missing( mock_get_or_create_session, mock_create_user_message, - mock_find_assistant_reply, - mock_serialize_message, + mock_update_turn, + mock_publish_progress, + mock_start_background_turn, ): session = Mock(session_id="session-123") user_message = Mock(id=17) - assistant_message = Mock(id=22) + turn = Mock(id=23, status=DashboardChatTurnStatus.QUEUED, assistant_message=None) + mock_get_or_create_session.return_value = session - mock_create_user_message.return_value = Mock(message=user_message, created=False) - mock_find_assistant_reply.return_value = assistant_message - mock_serialize_message.return_value = {"id": "22", "role": "assistant"} + mock_create_user_message.return_value = SimpleNamespace(message=user_message, created=False) - consumer = DashboardChatConsumer() - consumer.dashboard = Mock(id=42) - consumer.orguser = Mock() - consumer.send = Mock() - consumer._chat_available = Mock(return_value=(True, "")) - consumer._subscribe_to_session = Mock() + turn_manager = Mock() + turn_manager.get_or_create.return_value = (turn, True) + turn_manager.select_related.return_value.get.return_value = turn - consumer.websocket_receive( - { - "text": json.dumps( - { - "action": "send_message", - "message": "Why did funding drop?", - "client_message_id": "ui-1", - } - ) - } + consumer = build_consumer() + + with patch("ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", turn_manager): + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "send_message", + "message": "Why did funding drop?", + "client_message_id": "ui-1", + } + ) + } + ) + + mock_publish_progress.assert_called_once() + mock_update_turn.assert_called_once_with(23, progress_label="Understanding question") + mock_start_background_turn.assert_called_once_with(23) + + +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_cancelled") +@patch("ddpui.websockets.dashboard_chat_consumer.update_dashboard_chat_turn") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_cancel_queued_turn_cancels_immediately( + mock_get_or_create_session, + mock_update_turn, + mock_publish_cancelled, +): + session = Mock(session_id="session-123") + turn = Mock(id=23, status=DashboardChatTurnStatus.QUEUED) + updated_turn = Mock(id=23) + + mock_get_or_create_session.return_value = session + mock_update_turn.return_value = updated_turn + + turn_manager = Mock() + turn_manager.filter.return_value.filter.return_value.order_by.return_value.first.return_value = ( + turn ) - consumer._subscribe_to_session.assert_called_once_with("session-123") - payload = json.loads(consumer.send.call_args.kwargs["text_data"]) - assert payload["status"] == "success" - assert payload["data"]["event_type"] == "assistant_message" + consumer = build_consumer() + + with patch("ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", turn_manager): + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "cancel_message", + "session_id": "session-123", + "turn_id": "23", + } + ) + } + ) + + mock_update_turn.assert_called_once() + mock_publish_cancelled.assert_called_once_with(session=session, turn=updated_turn) + + +@patch("ddpui.websockets.dashboard_chat_consumer.publish_dashboard_chat_progress") +@patch("ddpui.websockets.dashboard_chat_consumer.update_dashboard_chat_turn") +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_cancel_running_turn_marks_cancel_requested( + mock_get_or_create_session, + mock_update_turn, + mock_publish_progress, +): + session = Mock(session_id="session-123") + turn = Mock(id=23, status=DashboardChatTurnStatus.RUNNING) + updated_turn = Mock(id=23) + + mock_get_or_create_session.return_value = session + mock_update_turn.return_value = updated_turn + + turn_manager = Mock() + turn_manager.filter.return_value.filter.return_value.order_by.return_value.first.return_value = ( + turn + ) + + consumer = build_consumer() + + with patch("ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", turn_manager): + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "cancel_message", + "session_id": "session-123", + "turn_id": "23", + } + ) + } + ) + + mock_update_turn.assert_called_once() + mock_publish_progress.assert_called_once() + + +def test_dashboard_chat_consumer_cancel_requires_session_id(): + consumer = build_consumer() + + consumer.websocket_receive({"text": json.dumps({"action": "cancel_message"})}) + + payload = latest_payload(consumer) + assert payload["status"] == "error" + assert payload["message"] == "session_id is required to cancel a message" + + +@patch("ddpui.websockets.dashboard_chat_consumer.get_or_create_dashboard_chat_session") +def test_dashboard_chat_consumer_cancel_rejects_non_integer_turn_id(mock_get_or_create_session): + session = Mock(session_id="session-123") + mock_get_or_create_session.return_value = session + consumer = build_consumer() + + turn_manager = Mock() + turn_manager.filter.return_value = turn_manager + + with patch("ddpui.websockets.dashboard_chat_consumer.DashboardChatTurn.objects", turn_manager): + consumer.websocket_receive( + { + "text": json.dumps( + { + "action": "cancel_message", + "session_id": "session-123", + "turn_id": "not-an-int", + } + ) + } + ) + + payload = latest_payload(consumer) + assert payload["status"] == "error" + assert payload["message"] == "turn_id must be an integer" diff --git a/ddpui/websockets/dashboard_chat_consumer.py b/ddpui/websockets/dashboard_chat_consumer.py index 6f2ca5f0c..36ae67723 100644 --- a/ddpui/websockets/dashboard_chat_consumer.py +++ b/ddpui/websockets/dashboard_chat_consumer.py @@ -1,17 +1,27 @@ import json from urllib.parse import parse_qs +from asgiref.sync import async_to_sync from django.utils import timezone +from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage from ddpui.core.dashboard_chat.sessions.session_service import ( DashboardChatSessionError, + DASHBOARD_CHAT_SESSION_GROUP_PREFIX, create_dashboard_chat_user_message_with_status, - execute_dashboard_chat_turn, get_or_create_dashboard_chat_session, - serialize_dashboard_chat_message, + publish_dashboard_chat_assistant_message, + publish_dashboard_chat_cancelled, + publish_dashboard_chat_progress, + start_dashboard_chat_turn_background, + update_dashboard_chat_turn, ) from ddpui.models.dashboard import Dashboard -from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession +from ddpui.models.dashboard_chat import ( + DashboardChatSession, + DashboardChatTurn, + DashboardChatTurnStatus, +) from ddpui.models.org_preferences import OrgPreferences from ddpui.models.role_based_access import RolePermission from ddpui.utils.custom_logger import CustomLogger @@ -26,6 +36,7 @@ class DashboardChatConsumer(BaseConsumer): """Authenticated websocket for dashboard-level chat.""" def connect(self): + self.active_session_group = None query_string = parse_qs(self.scope["query_string"].decode()) token = query_string.get("token", [None])[0] orgslug = query_string.get("orgslug", [None])[0] @@ -50,10 +61,37 @@ def websocket_receive(self, message): self._send_error("Invalid websocket payload") return - if payload.get("action") != "send_message": + action = payload.get("action") + if action == "send_message": + self._handle_send_message(payload) + return + if action == "cancel_message": + self._handle_cancel_message(payload) + return + if action != "send_message": self._send_error("Unsupported websocket action") return + def disconnect(self, code): + if self.active_session_group is not None: + async_to_sync(self.channel_layer.group_discard)( + self.active_session_group, + self.channel_name, + ) + super().disconnect(code) + + def dashboard_chat_event(self, event): + self.respond( + WebsocketResponse( + status=event["status"], + message=event["message"], + data=event["data"], + ) + ) + + def _handle_send_message(self, payload: dict) -> None: + """Persist one user message, start one background turn, and stream progress via channels.""" + raw_message = str(payload.get("message") or "").strip() if not raw_message: self._send_error("Message is required") @@ -75,59 +113,134 @@ def websocket_receive(self, message): self._send_error(str(error)) return - user_message = create_dashboard_chat_user_message_with_status( + self._subscribe_to_session(session) + + user_message_result = create_dashboard_chat_user_message_with_status( session=session, content=raw_message, client_message_id=payload.get("client_message_id"), - ).message + ) + turn, turn_created = DashboardChatTurn.objects.get_or_create( + session=session, + user_message=user_message_result.message, + defaults={"status": DashboardChatTurnStatus.QUEUED}, + ) + turn = DashboardChatTurn.objects.select_related("assistant_message").get(id=turn.id) + if not user_message_result.created and not turn_created: + self._handle_existing_turn(session, turn) + return + publish_dashboard_chat_progress( + session=session, + turn=turn, + label="Understanding question", + stage=DashboardChatProgressStage.UNDERSTANDING_QUESTION, + message_id=user_message_result.message.id, + ) + update_dashboard_chat_turn( + turn.id, + progress_label="Understanding question", + ) + start_dashboard_chat_turn_background(turn.id) - self._send_progress(session, user_message) + def _handle_cancel_message(self, payload: dict) -> None: + """Mark one queued/running dashboard-chat turn as cancelled or cancel-requested.""" + session_id = payload.get("session_id") + if not session_id: + self._send_error("session_id is required to cancel a message") + return try: - assistant_message = execute_dashboard_chat_turn( - str(session.session_id), user_message.id + session = get_or_create_dashboard_chat_session( + orguser=self.orguser, + dashboard=self.dashboard, + session_id=session_id, ) - except Exception: - logger.exception("dashboard chat turn failed for session=%s", session.session_id) - self._send_error("Something went wrong while generating the response") + except DashboardChatSessionError as error: + self._send_error(str(error)) return - self._send_assistant_message(session, assistant_message) - - # ------------------------------------------------------------------------- - # Response helpers - # ------------------------------------------------------------------------- + turn_id = payload.get("turn_id") + turn_query = DashboardChatTurn.objects.filter(session=session) + if turn_id is not None: + try: + turn_query = turn_query.filter(id=int(turn_id)) + except (TypeError, ValueError): + self._send_error("turn_id must be an integer") + return + turn = turn_query.order_by("-created_at").first() + if turn is None: + self._send_error("No active chat turn found for this session") + return - def _send_progress(self, session: DashboardChatSession, user_message: DashboardChatMessage): - self.respond( - WebsocketResponse( - status=WebsocketResponseStatus.SUCCESS, - message="", - data={ - "event_type": "progress", - "session_id": str(session.session_id), - "message_id": str(user_message.id), - "dashboard_id": self.dashboard.id, - "occurred_at": timezone.now().isoformat(), - }, + if turn.status == DashboardChatTurnStatus.QUEUED: + cancelled_turn = update_dashboard_chat_turn( + turn.id, + status=DashboardChatTurnStatus.CANCELLED, + progress_label="Generation stopped", + cancel_requested_at=timezone.now(), + completed_at=timezone.now(), ) + publish_dashboard_chat_cancelled(session=session, turn=cancelled_turn) + return + + if turn.status != DashboardChatTurnStatus.RUNNING: + self._send_error("This chat turn can no longer be cancelled") + return + + updated_turn = update_dashboard_chat_turn( + turn.id, + status=DashboardChatTurnStatus.CANCEL_REQUESTED, + progress_label="Stopping...", + cancel_requested_at=timezone.now(), + ) + publish_dashboard_chat_progress( + session=session, + turn=updated_turn, + label="Stopping...", + stage=DashboardChatProgressStage.CANCELLING, ) - def _send_assistant_message(self, session: DashboardChatSession, message: DashboardChatMessage): - self.respond( - WebsocketResponse( - status=WebsocketResponseStatus.SUCCESS, - message="", - data={ - "event_type": "assistant_message", - "session_id": str(session.session_id), - "message_id": str(message.id), - "dashboard_id": self.dashboard.id, - "occurred_at": timezone.now().isoformat(), - **serialize_dashboard_chat_message(message), - }, + def _handle_existing_turn( + self, + session: DashboardChatSession, + turn: DashboardChatTurn, + ) -> None: + """Reuse the existing turn state for duplicate client-message retries.""" + + if ( + turn.status == DashboardChatTurnStatus.COMPLETED + and turn.assistant_message is not None + ): + publish_dashboard_chat_assistant_message( + session=session, + turn=turn, + message=turn.assistant_message, ) - ) + return + + if turn.status in { + DashboardChatTurnStatus.QUEUED, + DashboardChatTurnStatus.RUNNING, + DashboardChatTurnStatus.CANCEL_REQUESTED, + }: + publish_dashboard_chat_progress( + session=session, + turn=turn, + label=turn.progress_label or "Understanding question", + stage=None, + message_id=turn.user_message_id, + ) + return + + if turn.status == DashboardChatTurnStatus.CANCELLED: + publish_dashboard_chat_cancelled(session=session, turn=turn) + return + + self._send_error("This chat turn can no longer be retried") + + # ------------------------------------------------------------------------- + # Response helpers + # ------------------------------------------------------------------------- def _send_error(self, message: str): self.respond( @@ -166,3 +279,15 @@ def _has_permission(self, permission_slug: str) -> bool: role=self.orguser.new_role, permission__slug=permission_slug, ).exists() + + def _subscribe_to_session(self, session: DashboardChatSession) -> None: + session_group = f"{DASHBOARD_CHAT_SESSION_GROUP_PREFIX}{session.session_id}" + if self.active_session_group == session_group: + return + if self.active_session_group is not None: + async_to_sync(self.channel_layer.group_discard)( + self.active_session_group, + self.channel_name, + ) + async_to_sync(self.channel_layer.group_add)(session_group, self.channel_name) + self.active_session_group = session_group From b73d1020b053f6ae753bc13aabfaa84cdcb12eae Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Mon, 6 Apr 2026 10:51:49 +0530 Subject: [PATCH 39/49] refactor(ai-chat): use explicit dashboard chat contracts --- .../agents/final_answer_formatting.py | 2 +- .../agents/llm_client_interface.py | 6 ++-- .../agents/openai_llm_client.py | 6 ++-- .../core/dashboard_chat/contracts/__init__.py | 20 ------------ .../orchestration/conversation_context.py | 2 +- .../orchestration/intent_routing.py | 2 +- .../llm_tools/implementations/sql_parsing.py | 2 +- .../implementations/sql_validation.py | 12 ++++--- .../llm_tools/runtime/turn_context.py | 10 ++++-- .../orchestration/nodes/compose_response.py | 8 ++--- .../orchestration/nodes/finalize.py | 8 ++--- .../orchestration/nodes/handle_irrelevant.py | 3 +- .../nodes/handle_needs_clarification.py | 4 +-- .../orchestration/nodes/handle_small_talk.py | 3 +- .../orchestration/response_composer.py | 4 +-- .../tool_loop_message_builder.py | 4 ++- .../sessions/session_service.py | 8 ++--- .../dashboard_chat/warehouse/sql_guard.py | 2 +- .../warehouse/warehouse_access_tools.py | 2 +- .../test_langgraph_checkpointing.py | 4 +-- .../core/dashboard_chat/test_llm_client.py | 6 ++-- .../dashboard_chat/test_response_payloads.py | 6 ++-- .../tests/core/dashboard_chat/test_runtime.py | 10 +++--- .../dashboard_chat/test_session_service.py | 3 +- .../test_session_service_background_runner.py | 32 +++++++++++++++++++ 25 files changed, 99 insertions(+), 70 deletions(-) delete mode 100644 ddpui/core/dashboard_chat/contracts/__init__.py create mode 100644 ddpui/tests/core/dashboard_chat/test_session_service_background_runner.py diff --git a/ddpui/core/dashboard_chat/agents/final_answer_formatting.py b/ddpui/core/dashboard_chat/agents/final_answer_formatting.py index 88c7859d3..086ee7105 100644 --- a/ddpui/core/dashboard_chat/agents/final_answer_formatting.py +++ b/ddpui/core/dashboard_chat/agents/final_answer_formatting.py @@ -3,7 +3,7 @@ import json from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatRetrievedDocument +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument TABLE_SUMMARY_JSON_INSTRUCTIONS = """ For table-like responses, return valid JSON only with this shape: diff --git a/ddpui/core/dashboard_chat/agents/llm_client_interface.py b/ddpui/core/dashboard_chat/agents/llm_client_interface.py index 30a7e3dd0..ce32da79f 100644 --- a/ddpui/core/dashboard_chat/agents/llm_client_interface.py +++ b/ddpui/core/dashboard_chat/agents/llm_client_interface.py @@ -2,12 +2,14 @@ from typing import Any, Protocol -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, +) +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatRetrievedDocument, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey diff --git a/ddpui/core/dashboard_chat/agents/openai_llm_client.py b/ddpui/core/dashboard_chat/agents/openai_llm_client.py index 65c5ab538..c101788c8 100644 --- a/ddpui/core/dashboard_chat/agents/openai_llm_client.py +++ b/ddpui/core/dashboard_chat/agents/openai_llm_client.py @@ -13,13 +13,15 @@ format_table_summary_markdown, ) from ddpui.core.dashboard_chat.agents.prompt_template_store import DashboardChatPromptStore -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, +) +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatRetrievedDocument, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.models.dashboard_chat import DashboardChatPromptTemplateKey from ddpui.utils.custom_logger import CustomLogger from ddpui.utils.openai_client import get_shared_openai_client diff --git a/ddpui/core/dashboard_chat/contracts/__init__.py b/ddpui/core/dashboard_chat/contracts/__init__.py deleted file mode 100644 index 421bb38f3..000000000 --- a/ddpui/core/dashboard_chat/contracts/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Typed contracts for dashboard chat orchestration.""" - -from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( - DashboardChatConversationContext, - DashboardChatConversationMessage, -) -from ddpui.core.dashboard_chat.contracts.intent_contracts import ( - DashboardChatFollowUpContext, - DashboardChatIntent, - DashboardChatIntentDecision, -) -from ddpui.core.dashboard_chat.contracts.response_contracts import ( - DashboardChatCitation, - DashboardChatResponse, -) -from ddpui.core.dashboard_chat.contracts.retrieval_contracts import ( - DashboardChatRetrievedDocument, - DashboardChatSchemaSnippet, -) -from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult diff --git a/ddpui/core/dashboard_chat/orchestration/conversation_context.py b/ddpui/core/dashboard_chat/orchestration/conversation_context.py index 84158d743..0a1542210 100644 --- a/ddpui/core/dashboard_chat/orchestration/conversation_context.py +++ b/ddpui/core/dashboard_chat/orchestration/conversation_context.py @@ -4,7 +4,7 @@ import re from typing import Any -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, ) diff --git a/ddpui/core/dashboard_chat/orchestration/intent_routing.py b/ddpui/core/dashboard_chat/orchestration/intent_routing.py index acec8d20d..0c7dfb7b1 100644 --- a/ddpui/core/dashboard_chat/orchestration/intent_routing.py +++ b/ddpui/core/dashboard_chat/orchestration/intent_routing.py @@ -1,6 +1,6 @@ """Graph intent-routing helpers for dashboard chat orchestration.""" -from ddpui.core.dashboard_chat.contracts import DashboardChatIntentDecision +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntentDecision from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py index 560cf88ad..14a39f512 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_parsing.py @@ -5,7 +5,7 @@ from typing import Any from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import normalize_dashboard_chat_table_name -from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatSchemaSnippet from ddpui.core.dashboard_chat.orchestration.conversation_context import extract_dimensions_from_sql from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py index ba5912e2a..9b33f386c 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/implementations/sql_validation.py @@ -4,15 +4,17 @@ from typing import Any from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( + DashboardChatConversationContext, +) +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( + DashboardChatIntent, + DashboardChatIntentDecision, +) from ddpui.core.dashboard_chat.orchestration.conversation_context import ( extract_requested_follow_up_dimension, ) -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatConversationContext, - DashboardChatIntentDecision, -) from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState from ddpui.core.dashboard_chat.orchestration.llm_tools.implementations.sql_parsing import ( extract_text_filter_values, diff --git a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py index 134e401c4..82c1e1517 100644 --- a/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py +++ b/ddpui/core/dashboard_chat/orchestration/llm_tools/runtime/turn_context.py @@ -5,14 +5,18 @@ import re from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatRetrievedDocument +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( + DashboardChatConversationContext, +) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import ( + DashboardChatRetrievedDocument, + DashboardChatSchemaSnippet, +) from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult from ddpui.core.dashboard_chat.warehouse.warehouse_access_tools import DashboardChatWarehouseTools from ddpui.utils.custom_logger import CustomLogger -from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatSchemaSnippet from ddpui.core.dashboard_chat.orchestration.retrieval_support import get_or_embed_query -from ddpui.core.dashboard_chat.contracts import DashboardChatConversationContext from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist from ddpui.models.org import Org from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py index 9eed5996a..53299b364 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/compose_response.py @@ -3,11 +3,9 @@ from typing import Any from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( - DashboardChatIntentDecision, - DashboardChatResponse, - DashboardChatRetrievedDocument, -) +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntentDecision +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index 1fd8cda07..db08b3ddb 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -3,13 +3,13 @@ from typing import Any from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntentDecision +from ddpui.core.dashboard_chat.contracts.response_contracts import ( DashboardChatCitation, - DashboardChatIntentDecision, DashboardChatResponse, - DashboardChatRetrievedDocument, - DashboardChatSqlValidationResult, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument +from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState from ddpui.core.dashboard_chat.orchestration.retrieval_support import explore_table_url diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py index d154d0e25..9a9e164f6 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_irrelevant.py @@ -2,7 +2,8 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse from ddpui.core.dashboard_chat.orchestration.response_composer import build_usage_summary from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py index 7cf642e4c..6a41aeaee 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_needs_clarification.py @@ -2,11 +2,11 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatResponse, ) +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, clarification_fallback, diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py index 37c814425..90b1150e6 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/handle_small_talk.py @@ -2,7 +2,8 @@ from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse from ddpui.core.dashboard_chat.orchestration.response_composer import ( build_usage_summary, diff --git a/ddpui/core/dashboard_chat/orchestration/response_composer.py b/ddpui/core/dashboard_chat/orchestration/response_composer.py index aaa6b5dca..a1ff873e3 100644 --- a/ddpui/core/dashboard_chat/orchestration/response_composer.py +++ b/ddpui/core/dashboard_chat/orchestration/response_composer.py @@ -4,11 +4,11 @@ import re from typing import Any -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatRetrievedDocument, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.utils.custom_logger import CustomLogger from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState diff --git a/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py index 15bbfc745..3bfeaf7b7 100644 --- a/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py +++ b/ddpui/core/dashboard_chat/orchestration/tool_loop_message_builder.py @@ -8,7 +8,9 @@ build_follow_up_context_prompt, detect_sql_modification_type, ) -from ddpui.core.dashboard_chat.contracts import DashboardChatConversationContext +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( + DashboardChatConversationContext, +) from ddpui.core.dashboard_chat.orchestration.state import DashboardChatGraphState diff --git a/ddpui/core/dashboard_chat/sessions/session_service.py b/ddpui/core/dashboard_chat/sessions/session_service.py index 8fe292996..32c5a9701 100644 --- a/ddpui/core/dashboard_chat/sessions/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -338,10 +338,10 @@ def _run_dashboard_chat_turn_in_background(turn_id: int) -> None: logger.warning("dashboard chat turn %s not found", turn_id) return - if turn.status in { - DashboardChatTurnStatus.CANCEL_REQUESTED, - DashboardChatTurnStatus.CANCELLED, - }: + if turn.status == DashboardChatTurnStatus.CANCELLED: + return + + if turn.status == DashboardChatTurnStatus.CANCEL_REQUESTED: cancelled_turn = update_dashboard_chat_turn( turn.id, status=DashboardChatTurnStatus.CANCELLED, diff --git a/ddpui/core/dashboard_chat/warehouse/sql_guard.py b/ddpui/core/dashboard_chat/warehouse/sql_guard.py index cf121a024..ac44aca3a 100644 --- a/ddpui/core/dashboard_chat/warehouse/sql_guard.py +++ b/ddpui/core/dashboard_chat/warehouse/sql_guard.py @@ -5,7 +5,7 @@ import sqlparse from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import DashboardChatSqlValidationResult +from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult FORBIDDEN_SQL_KEYWORDS = { "INTO", diff --git a/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py b/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py index 10f529ed3..2132b38c9 100644 --- a/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py +++ b/ddpui/core/dashboard_chat/warehouse/warehouse_access_tools.py @@ -4,7 +4,7 @@ import re from typing import Any -from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatSchemaSnippet from ddpui.models.org import Org, OrgWarehouse from ddpui.utils.custom_logger import CustomLogger from ddpui.utils import secretsmanager diff --git a/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py b/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py index d5440b694..338e6b6a6 100644 --- a/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py +++ b/ddpui/tests/core/dashboard_chat/test_langgraph_checkpointing.py @@ -15,12 +15,12 @@ from ddpui.core.dashboard_chat.orchestration.state.payload_codec import ( serialize_sql_validation_result, ) -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatSqlValidationResult, ) +from ddpui.core.dashboard_chat.contracts.sql_contracts import DashboardChatSqlValidationResult from ddpui.models.dashboard import Dashboard from ddpui.models.org import Org from ddpui.models.org_user import OrgUser diff --git a/ddpui/tests/core/dashboard_chat/test_llm_client.py b/ddpui/tests/core/dashboard_chat/test_llm_client.py index 306096d5d..21b93e321 100644 --- a/ddpui/tests/core/dashboard_chat/test_llm_client.py +++ b/ddpui/tests/core/dashboard_chat/test_llm_client.py @@ -4,12 +4,14 @@ import ddpui.core.dashboard_chat.agents.openai_llm_client as llm_client_module from ddpui.core.dashboard_chat.agents.openai_llm_client import OpenAIDashboardChatLlmClient -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, +) +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatRetrievedDocument, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument class FakePromptStore: diff --git a/ddpui/tests/core/dashboard_chat/test_response_payloads.py b/ddpui/tests/core/dashboard_chat/test_response_payloads.py index 2be353040..49f1c91c3 100644 --- a/ddpui/tests/core/dashboard_chat/test_response_payloads.py +++ b/ddpui/tests/core/dashboard_chat/test_response_payloads.py @@ -1,10 +1,10 @@ from ddpui.core.dashboard_chat.context.dashboard_table_allowlist import DashboardChatAllowlist -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.response_contracts import ( DashboardChatCitation, - DashboardChatIntent, DashboardChatResponse, - DashboardChatRetrievedDocument, ) +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.core.dashboard_chat.orchestration.retrieval_support import ( build_citations, explore_table_url, diff --git a/ddpui/tests/core/dashboard_chat/test_runtime.py b/ddpui/tests/core/dashboard_chat/test_runtime.py index fbb4751ca..f9bf61c1f 100644 --- a/ddpui/tests/core/dashboard_chat/test_runtime.py +++ b/ddpui/tests/core/dashboard_chat/test_runtime.py @@ -49,15 +49,17 @@ DashboardChatTurnContext, seed_validated_distinct_values_from_previous_sql, ) -from ddpui.core.dashboard_chat.contracts import ( +from ddpui.core.dashboard_chat.contracts.conversation_contracts import ( DashboardChatConversationContext, DashboardChatConversationMessage, +) +from ddpui.core.dashboard_chat.contracts.intent_contracts import ( DashboardChatFollowUpContext, DashboardChatIntent, DashboardChatIntentDecision, - DashboardChatRetrievedDocument, - DashboardChatResponse, ) +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse +from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatRetrievedDocument from ddpui.core.dashboard_chat.warehouse.sql_guard import DashboardChatSqlGuard from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType from ddpui.utils.vector.interface import VectorQueryResult as DashboardChatVectorQueryResult @@ -330,7 +332,7 @@ def execute_sql(self, sql): @staticmethod def _schema_snippet(table_name, columns): - from ddpui.core.dashboard_chat.contracts import DashboardChatSchemaSnippet + from ddpui.core.dashboard_chat.contracts.retrieval_contracts import DashboardChatSchemaSnippet return DashboardChatSchemaSnippet(table_name=table_name, columns=columns) diff --git a/ddpui/tests/core/dashboard_chat/test_session_service.py b/ddpui/tests/core/dashboard_chat/test_session_service.py index e279bc3cd..4c04967bc 100644 --- a/ddpui/tests/core/dashboard_chat/test_session_service.py +++ b/ddpui/tests/core/dashboard_chat/test_session_service.py @@ -9,6 +9,8 @@ from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.core.dashboard_chat.contracts.event_contracts import DashboardChatProgressStage +from ddpui.core.dashboard_chat.contracts.intent_contracts import DashboardChatIntent +from ddpui.core.dashboard_chat.contracts.response_contracts import DashboardChatResponse from ddpui.core.dashboard_chat.orchestration.runtime_signals import ( DashboardChatRunCancelled, publish_runtime_progress, @@ -21,7 +23,6 @@ get_or_create_dashboard_chat_session, ) from ddpui.core.dashboard_chat.vector.vector_documents import build_dashboard_chat_collection_name -from ddpui.core.dashboard_chat.contracts import DashboardChatIntent, DashboardChatResponse from ddpui.models.dashboard import Dashboard from ddpui.models.dashboard_chat import DashboardChatMessage, DashboardChatSession from ddpui.models.org import Org, OrgDbt diff --git a/ddpui/tests/core/dashboard_chat/test_session_service_background_runner.py b/ddpui/tests/core/dashboard_chat/test_session_service_background_runner.py new file mode 100644 index 000000000..58914c001 --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_session_service_background_runner.py @@ -0,0 +1,32 @@ +from unittest.mock import Mock, patch + +from ddpui.core.dashboard_chat.sessions.session_service import _run_dashboard_chat_turn_in_background +from ddpui.models.dashboard_chat import DashboardChatTurnStatus + + +@patch("ddpui.core.dashboard_chat.sessions.session_service.close_old_connections") +@patch("ddpui.core.dashboard_chat.sessions.session_service.publish_dashboard_chat_cancelled") +@patch("ddpui.core.dashboard_chat.sessions.session_service.update_dashboard_chat_turn") +def test_background_runner_skips_duplicate_cancelled_event( + mock_update_turn, + mock_publish_cancelled, + mock_close_old_connections, +): + turn = Mock( + id=23, + status=DashboardChatTurnStatus.CANCELLED, + session=Mock(), + user_message=Mock(), + ) + turn_manager = Mock() + turn_manager.select_related.return_value.filter.return_value.first.return_value = turn + + with patch( + "ddpui.core.dashboard_chat.sessions.session_service.DashboardChatTurn.objects", + turn_manager, + ): + _run_dashboard_chat_turn_in_background(23) + + mock_close_old_connections.assert_called() + mock_update_turn.assert_not_called() + mock_publish_cancelled.assert_not_called() From 49737f6094e413e6bd385d7c2d744b771eae4ddb Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Mon, 6 Apr 2026 12:36:33 +0530 Subject: [PATCH 40/49] feat(ai-chat): add prompts feedback and source polish --- ddpui/api/dashboard_native_api.py | 91 ++++- ddpui/api/user_org_api.py | 1 + .../contracts/event_contracts.py | 1 + .../orchestration/nodes/finalize.py | 24 +- .../orchestration/retrieval_support.py | 86 +++- .../sessions/session_service.py | 2 + .../core/dashboard_chat/suggested_prompts.py | 374 ++++++++++++++++++ .../0161_dashboardchatmessage_feedback.py | 22 ++ ddpui/models/dashboard_chat.py | 13 + ddpui/models/org_user.py | 1 + ddpui/schemas/dashboard_schema.py | 22 +- .../api_tests/test_dashboard_native_api.py | 78 ++++ ddpui/tests/api_tests/test_user_org_api.py | 7 +- .../dashboard_chat/test_response_payloads.py | 114 +++++- .../dashboard_chat/test_suggested_prompts.py | 115 ++++++ .../models/test_dashboard_chat_models.py | 18 + 16 files changed, 943 insertions(+), 26 deletions(-) create mode 100644 ddpui/core/dashboard_chat/suggested_prompts.py create mode 100644 ddpui/migrations/0161_dashboardchatmessage_feedback.py create mode 100644 ddpui/tests/core/dashboard_chat/test_suggested_prompts.py diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index c2702b14d..b499a835b 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -16,7 +16,12 @@ DashboardLock, DashboardFilterType, ) -from ddpui.models.dashboard_chat import DashboardAIContext +from ddpui.models.dashboard_chat import ( + DashboardAIContext, + DashboardChatMessage, + DashboardChatMessageRole, + OrgAIContext, +) from ddpui.models.org_preferences import OrgPreferences from ddpui.models.org_user import OrgUser from ddpui.auth import has_permission @@ -51,8 +56,12 @@ LandingPageResponse, LandingPageResolveResponse, DashboardAIContextResponse, + DashboardChatBootstrapResponse, + DashboardChatMessageFeedbackRequest, + DashboardChatMessageFeedbackResponse, UpdateDashboardAIContextSchema, ) +from ddpui.core.dashboard_chat.suggested_prompts import build_dashboard_suggested_prompts logger = CustomLogger("ddpui") @@ -76,6 +85,21 @@ def _serialize_dashboard_ai_context(dashboard: Dashboard, context: DashboardAICo ) +def _serialize_dashboard_chat_bootstrap(dashboard: Dashboard) -> DashboardChatBootstrapResponse: + dashboard_export = DashboardService.export_dashboard_context_for_dashboard(dashboard, dashboard.org) + org_context, _ = OrgAIContext.objects.get_or_create(org=dashboard.org) + dashboard_context, _ = DashboardAIContext.objects.get_or_create(dashboard=dashboard) + + return DashboardChatBootstrapResponse( + dashboard_id=dashboard.id, + suggested_prompts=build_dashboard_suggested_prompts( + dashboard_export=dashboard_export, + org_context_markdown=org_context.markdown, + dashboard_context_markdown=dashboard_context.markdown, + ), + ) + + def _ensure_dashboard_chat_feature_enabled(org) -> None: """Hide dashboard chat settings endpoints unless the feature flag is enabled.""" if not get_all_feature_flags_for_org(org).get("AI_DASHBOARD_CHAT", False): @@ -157,6 +181,71 @@ def get_dashboard_ai_context(request, dashboard_id: int): return _serialize_dashboard_ai_context(dashboard, context) +@dashboard_native_router.get( + "/{dashboard_id}/chat-bootstrap/", + response=DashboardChatBootstrapResponse, +) +@has_permission(["can_view_dashboards"]) +def get_dashboard_chat_bootstrap(request, dashboard_id: int): + """Return deterministic UI bootstrap data for dashboard chat.""" + orguser: OrgUser = request.orguser + _ensure_dashboard_chat_feature_enabled(orguser.org) + + try: + dashboard = DashboardService.get_dashboard(dashboard_id, orguser.org) + except DashboardNotFoundError as err: + raise HttpError(404, "Dashboard not found") from err + + return _serialize_dashboard_chat_bootstrap(dashboard) + + +@dashboard_native_router.post( + "/{dashboard_id}/chat/messages/{message_id}/feedback/", + response=DashboardChatMessageFeedbackResponse, +) +@has_permission(["can_view_dashboards"]) +@transaction.atomic +def set_dashboard_chat_message_feedback( + request, + dashboard_id: int, + message_id: int, + payload: DashboardChatMessageFeedbackRequest, +): + """Persist one locked thumbs-up/thumbs-down selection for an assistant answer.""" + orguser: OrgUser = request.orguser + _ensure_dashboard_chat_feature_enabled(orguser.org) + + message = ( + DashboardChatMessage.objects.select_related("session") + .filter( + id=message_id, + session__org=orguser.org, + session__orguser=orguser, + session__dashboard_id=dashboard_id, + role=DashboardChatMessageRole.ASSISTANT.value, + ) + .first() + ) + if message is None: + raise HttpError(404, "Assistant message not found") + + if message.feedback is not None: + if message.feedback == payload.feedback: + return DashboardChatMessageFeedbackResponse( + message_id=message.id, + feedback=message.feedback, + ) + raise HttpError(409, "Feedback has already been recorded for this message") + + message.feedback = payload.feedback + message.save(update_fields=["feedback"]) + + return DashboardChatMessageFeedbackResponse( + message_id=message.id, + feedback=message.feedback, + ) + + @dashboard_native_router.put( "/{dashboard_id}/ai-context/", response=DashboardAIContextResponse, diff --git a/ddpui/api/user_org_api.py b/ddpui/api/user_org_api.py index aa79d3f67..38777dcf3 100644 --- a/ddpui/api/user_org_api.py +++ b/ddpui/api/user_org_api.py @@ -113,6 +113,7 @@ def get_current_user_v2(request, org_slug: str = None): res.append( OrgUserResponse( email=user.email, + first_name=user.first_name or None, org=curr_orguser.org, active=user.is_active, new_role_slug=curr_orguser.new_role.slug, diff --git a/ddpui/core/dashboard_chat/contracts/event_contracts.py b/ddpui/core/dashboard_chat/contracts/event_contracts.py index 477e6ec8f..a6270e79a 100644 --- a/ddpui/core/dashboard_chat/contracts/event_contracts.py +++ b/ddpui/core/dashboard_chat/contracts/event_contracts.py @@ -63,5 +63,6 @@ class DashboardChatAssistantMessageEvent(BaseModel): content: str created_at: datetime payload: dict + feedback: Literal["thumbs_up", "thumbs_down"] | None = None response_latency_ms: int | None = None timing_breakdown: dict | None = None diff --git a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py index db08b3ddb..a0f54eb61 100644 --- a/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py +++ b/ddpui/core/dashboard_chat/orchestration/nodes/finalize.py @@ -14,6 +14,28 @@ from ddpui.core.dashboard_chat.orchestration.retrieval_support import explore_table_url +def _dedupe_citations( + citations: list[DashboardChatCitation], +) -> list[DashboardChatCitation]: + """Collapse duplicate sources while preferring SQL-execution table citations.""" + deduped: dict[tuple[str, str], DashboardChatCitation] = {} + for citation in citations: + key = (citation.source_type, citation.source_identifier) + existing = deduped.get(key) + if existing is None: + deduped[key] = citation + continue + + if ( + citation.source_type == "warehouse_table" + and citation.snippet.startswith("SQL executed against ") + and not existing.snippet.startswith("SQL executed against ") + ): + deduped[key] = citation + + return list(deduped.values()) + + def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: """Attach warehouse citations and metadata to the finished response.""" response = DashboardChatResponse.model_validate(state.get("response") or {}) @@ -66,7 +88,7 @@ def finalize_node(state: DashboardChatGraphState) -> dict[str, Any]: "response": DashboardChatResponse( answer_text=response.answer_text, intent=response.intent, - citations=list(dict.fromkeys(citations)), + citations=_dedupe_citations(citations), warnings=response.warnings, sql=response.sql, sql_results=response.sql_results, diff --git a/ddpui/core/dashboard_chat/orchestration/retrieval_support.py b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py index b0e9b937a..5331666d4 100644 --- a/ddpui/core/dashboard_chat/orchestration/retrieval_support.py +++ b/ddpui/core/dashboard_chat/orchestration/retrieval_support.py @@ -96,34 +96,56 @@ def build_citations( } dashboard_id = dashboard_export.get("dashboard", {}).get("id") citations: list[DashboardChatCitation] = [] + seen_sources: set[tuple[str, str]] = set() for document in retrieved_documents[:6]: - table_name = None if document.source_type in { DashboardChatSourceType.DBT_MANIFEST.value, DashboardChatSourceType.DBT_CATALOG.value, }: - unique_id = unique_id_from_source_identifier(document.source_identifier) - table_name = allowlist.unique_id_to_table.get(unique_id) if unique_id else None + continue + + if ( + document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value + and chart_id_from_source_identifier(document.source_identifier) is None + ): + continue + + table_name = None + source_type = document.source_type + source_identifier = document.source_identifier + title = citation_title( + document=document, + dashboard_title=dashboard_title, + chart_lookup=chart_lookup, + table_name=table_name, + ) + snippet = citation_snippet( + document=document, + dashboard_title=dashboard_title, + chart_lookup=chart_lookup, + ) + url = citation_url( + document=document, + dashboard_id=document.dashboard_id or dashboard_id, + table_name=table_name, + ) + + source_key = (source_type, source_identifier) + if source_key in seen_sources: + continue + citations.append( DashboardChatCitation( - source_type=document.source_type, - source_identifier=document.source_identifier, - title=citation_title( - document=document, - dashboard_title=dashboard_title, - chart_lookup=chart_lookup, - table_name=table_name, - ), - snippet=compact_snippet(document.content), - url=citation_url( - document=document, - dashboard_id=document.dashboard_id or dashboard_id, - table_name=table_name, - ), + source_type=source_type, + source_identifier=source_identifier, + title=title, + snippet=snippet, + url=url, dashboard_id=document.dashboard_id, table_name=table_name, ) ) + seen_sources.add(source_key) return citations @@ -136,9 +158,9 @@ def citation_title( ) -> str: """Map a retrieved document into a human-readable citation title.""" if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: - return "Organization context" + return "Organization context file" if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: - return f"Dashboard context: {dashboard_title}" + return f"Dashboard context file: {dashboard_title}" if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: chart_id = chart_id_from_source_identifier(document.source_identifier) if chart_id is not None and chart_id in chart_lookup: @@ -159,6 +181,28 @@ def compact_snippet(content: str, max_length: int = 220) -> str: return normalized[: max_length - 3].rstrip() + "..." +def citation_snippet( + *, + document: DashboardChatRetrievedDocument, + dashboard_title: str, + chart_lookup: dict[int, str], +) -> str: + """Return the user-facing source subtitle for supported citation types.""" + if document.source_type == DashboardChatSourceType.ORG_CONTEXT.value: + return "Studied context about the organization from the organization context file." + + if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: + return f'Studied context about "{dashboard_title}" from the dashboard context file.' + + if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: + chart_id = chart_id_from_source_identifier(document.source_identifier) + chart_title = chart_lookup.get(chart_id) + if chart_title: + return f'Reviewed chart configuration and metadata for "{chart_title}".' + + return compact_snippet(document.content) + + def citation_url( *, document: DashboardChatRetrievedDocument, @@ -171,8 +215,8 @@ def citation_url( if document.source_type == DashboardChatSourceType.DASHBOARD_CONTEXT.value: if dashboard_id is None: - return None - return f"/dashboards/{dashboard_id}" + return "/settings/organization" + return f"/settings/organization?dashboard_id={dashboard_id}" if document.source_type == DashboardChatSourceType.DASHBOARD_EXPORT.value: chart_id = chart_id_from_source_identifier(document.source_identifier) diff --git a/ddpui/core/dashboard_chat/sessions/session_service.py b/ddpui/core/dashboard_chat/sessions/session_service.py index 32c5a9701..6ee67b260 100644 --- a/ddpui/core/dashboard_chat/sessions/session_service.py +++ b/ddpui/core/dashboard_chat/sessions/session_service.py @@ -173,6 +173,7 @@ def serialize_dashboard_chat_message(message: DashboardChatMessage) -> dict: "role": message.role, "content": message.content, "payload": message.payload or {}, + "feedback": message.feedback, "response_latency_ms": message.response_latency_ms, "timing_breakdown": message.timing_breakdown or {}, "created_at": message.created_at.isoformat(), @@ -281,6 +282,7 @@ def publish_dashboard_chat_assistant_message( content=message.content, created_at=message.created_at, payload=message.payload or {}, + feedback=message.feedback, response_latency_ms=message.response_latency_ms, timing_breakdown=message.timing_breakdown or {}, ) diff --git a/ddpui/core/dashboard_chat/suggested_prompts.py b/ddpui/core/dashboard_chat/suggested_prompts.py new file mode 100644 index 000000000..bf3a02c7c --- /dev/null +++ b/ddpui/core/dashboard_chat/suggested_prompts.py @@ -0,0 +1,374 @@ +from __future__ import annotations + +import re +from collections.abc import Iterable + + +TIME_TOKENS = ( + "quarter", + "quarterly", + "month", + "monthly", + "year", + "yearly", + "week", + "weekly", + "day", + "daily", + "date", + "time", + "period", +) + +ENTITY_LABELS = { + "district": "districts", + "facilitator": "facilitators", + "school": "schools", + "state": "states", + "program": "programs", + "block": "blocks", + "ward": "wards", + "village": "villages", + "student": "students", + "learner": "learners", + "teacher": "teachers", + "partner": "partners", + "organization": "organizations", + "org": "organizations", +} + +METRIC_PREFIX_PATTERNS = ( + ("count_distinct_", "unique"), + ("count_", "number of"), + ("avg_", "average"), + ("average_", "average"), + ("sum_", ""), + ("total_", "total"), + ("max_", "highest"), + ("min_", "lowest"), +) + + +def _normalize_text(parts: Iterable[str | None]) -> str: + return " ".join(part.strip() for part in parts if part and part.strip()) + + +def _humanize_identifier(value: str | None) -> str: + if not value: + return "" + + normalized_value = re.sub(r"[_\.]+", " ", str(value).strip().lower()) + normalized_value = re.sub(r"\b(label|name|id)\b", "", normalized_value) + normalized_value = re.sub(r"\s+", " ", normalized_value).strip() + return normalized_value + + +def _pluralize_label(label: str) -> str: + normalized_label = label.strip().lower() + if not normalized_label: + return "categories" + if normalized_label.endswith("ies") or normalized_label.endswith("s"): + return normalized_label + if normalized_label.endswith("y") and normalized_label[-2:-1] not in {"a", "e", "i", "o", "u"}: + return f"{normalized_label[:-1]}ies" + return f"{normalized_label}s" + + +def _looks_time_like(value: str | None) -> bool: + normalized_value = _humanize_identifier(value) + return any(token in normalized_value for token in TIME_TOKENS) + + +def _search_entity_label(text: str) -> str | None: + normalized_text = text.lower() + for token, label in ENTITY_LABELS.items(): + if token in normalized_text: + return label + return None + + +def _metric_label_from_string(metric_name: str | None) -> str: + normalized_metric_name = _humanize_identifier(metric_name) + if not normalized_metric_name: + return "" + + for prefix, prefix_label in METRIC_PREFIX_PATTERNS: + if normalized_metric_name.startswith(prefix.replace("_", " ")): + suffix = normalized_metric_name.removeprefix(prefix.replace("_", " ")).strip() + if prefix_label == "number of": + return f"{prefix_label} {_pluralize_label(suffix)}" + if prefix_label: + return f"{prefix_label} {suffix}".strip() + return suffix + return normalized_metric_name + + +def _metric_label_from_chart(chart: dict) -> str: + extra_config = chart.get("extra_config") or {} + metrics = extra_config.get("metrics") or [] + for metric in metrics: + if isinstance(metric, dict): + alias = _humanize_identifier(metric.get("alias")) + if alias: + return alias + column = metric.get("column") + aggregation = _humanize_identifier(metric.get("aggregation")) + if column: + column_label = _humanize_identifier(column) + if aggregation in {"avg", "average"}: + return f"average {column_label}" + if aggregation == "count_distinct": + return f"unique {_pluralize_label(column_label)}" + if aggregation == "count": + return f"number of {_pluralize_label(column_label)}" + if aggregation: + return f"{aggregation} {column_label}".strip() + return column_label + elif isinstance(metric, str): + metric_label = _metric_label_from_string(metric) + if metric_label: + return metric_label + + aggregate_column = extra_config.get("aggregate_column") or extra_config.get("value_column") + aggregate_function = extra_config.get("aggregate_function") + if aggregate_column: + column_label = _humanize_identifier(aggregate_column) + if aggregate_function == "avg": + return f"average {column_label}" + if aggregate_function == "count_distinct": + return f"unique {_pluralize_label(column_label)}" + if aggregate_function == "count": + return f"number of {_pluralize_label(column_label)}" + if aggregate_function in {"min", "max"}: + return f"{aggregate_function} {column_label}" + return column_label + + chart_title = str(chart.get("title") or "").strip() + if chart_title: + title_prefix = re.split(r"\b(by|over|across|vs)\b", chart_title, maxsplit=1, flags=re.IGNORECASE)[ + 0 + ].strip() + humanized_prefix = _humanize_identifier(title_prefix) + if humanized_prefix: + return humanized_prefix + + return _humanize_identifier(chart.get("table_name")) or "this metric" + + +def _dimension_label_from_chart(chart: dict, fallback_context: str) -> str | None: + extra_config = chart.get("extra_config") or {} + dimension_candidates = [] + + for key in ("dimension_column", "extra_dimension_column", "geographic_column"): + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + dimension_candidates.append(value) + + dimensions = extra_config.get("dimensions") or [] + if isinstance(dimensions, list): + dimension_candidates.extend(value for value in dimensions if isinstance(value, str) and value.strip()) + + for candidate in dimension_candidates: + if _looks_time_like(candidate): + continue + label = _humanize_identifier(candidate) + if label: + return _pluralize_label(label) + + return _search_entity_label(fallback_context) + + +def _time_label_from_chart(chart: dict, fallback_context: str) -> str | None: + extra_config = chart.get("extra_config") or {} + dimension_candidates = [] + + for key in ("dimension_column", "extra_dimension_column"): + value = extra_config.get(key) + if isinstance(value, str) and value.strip(): + dimension_candidates.append(value) + + dimensions = extra_config.get("dimensions") or [] + if isinstance(dimensions, list): + dimension_candidates.extend(value for value in dimensions if isinstance(value, str) and value.strip()) + + for candidate in dimension_candidates: + if _looks_time_like(candidate): + return _humanize_identifier(candidate) + + normalized_context = fallback_context.lower() + for token in TIME_TOKENS: + if token in normalized_context: + return token + return None + + +def _uses_plural_verb(metric_label: str) -> bool: + normalized_metric_label = metric_label.strip().lower() + if normalized_metric_label.startswith(("number of ", "average ", "highest ", "lowest ", "total ")): + return False + if normalized_metric_label.endswith("ies"): + return True + return normalized_metric_label.endswith("s") and not normalized_metric_label.endswith("ss") + + +def _build_trend_prompt(chart_prompt_context: dict) -> str: + metric_label = chart_prompt_context["metric_label"] + time_label = chart_prompt_context["time_label"] or "time" + verb = "have" if _uses_plural_verb(metric_label) else "has" + if time_label == "time": + return f"How {verb} {metric_label} changed over time?" + return f"How {verb} {metric_label} changed by {time_label}?" + + +def _build_comparison_prompt(chart_prompt_context: dict) -> str: + return ( + f'Which {chart_prompt_context["dimension_label"]} have the highest ' + f'{chart_prompt_context["metric_label"]}?' + ) + + +def _build_explanation_prompt(chart_prompt_context: dict) -> str: + chart_title = chart_prompt_context["chart_title"] + if chart_prompt_context["chart_type"] == "number": + return f'What does the "{chart_title}" metric represent?' + if chart_prompt_context["time_label"]: + return f'What does the "{chart_title}" chart measure by {chart_prompt_context["time_label"]}?' + if chart_prompt_context["dimension_label"]: + return ( + f'What does the "{chart_title}" chart compare across ' + f'{chart_prompt_context["dimension_label"]}?' + ) + return f'What does the "{chart_title}" chart measure?' + + +def _can_build_trend_prompt(chart_prompt_context: dict) -> bool: + return bool(chart_prompt_context["time_label"] or chart_prompt_context["chart_type"] == "line") + + +def _can_build_comparison_prompt(chart_prompt_context: dict) -> bool: + return bool(chart_prompt_context["dimension_label"] and chart_prompt_context["chart_type"] != "number") + + +def _build_chart_prompt_contexts( + dashboard_export: dict, + org_context_markdown: str, + dashboard_context_markdown: str, +) -> list[dict]: + dashboard = dashboard_export.get("dashboard") or {} + charts = dashboard_export.get("charts") or [] + dashboard_title = str(dashboard.get("title") or "this dashboard").strip() + dashboard_description = str(dashboard.get("description") or "").strip() + shared_context = _normalize_text( + [dashboard_title, dashboard_description, org_context_markdown, dashboard_context_markdown] + ) + + chart_prompt_contexts = [] + for chart in charts: + chart_title = str(chart.get("title") or "").strip() or dashboard_title + chart_description = str(chart.get("description") or "").strip() + chart_context = _normalize_text( + [ + chart_title, + chart_description, + str(chart.get("schema_name") or "").strip(), + str(chart.get("table_name") or "").replace("_", " ").strip(), + shared_context, + ] + ) + chart_prompt_contexts.append( + { + "chart_id": chart.get("id"), + "chart_title": chart_title, + "chart_type": str(chart.get("chart_type") or "").strip().lower(), + "metric_label": _metric_label_from_chart(chart), + "dimension_label": _dimension_label_from_chart(chart, chart_context), + "time_label": _time_label_from_chart(chart, chart_context), + } + ) + + return chart_prompt_contexts + + +def _select_prompt( + chart_prompt_contexts: list[dict], + used_chart_ids: set[int | None], + *, + prompt_builder, + predicate, +) -> str | None: + for prefer_unused_chart in (True, False): + for chart_prompt_context in chart_prompt_contexts: + chart_id = chart_prompt_context["chart_id"] + if prefer_unused_chart and chart_id in used_chart_ids: + continue + if not predicate(chart_prompt_context): + continue + prompt = prompt_builder(chart_prompt_context) + if not prompt: + continue + used_chart_ids.add(chart_id) + return prompt + return None + + +def build_dashboard_suggested_prompts( + dashboard_export: dict, + org_context_markdown: str, + dashboard_context_markdown: str, +) -> list[str]: + chart_prompt_contexts = _build_chart_prompt_contexts( + dashboard_export=dashboard_export, + org_context_markdown=org_context_markdown, + dashboard_context_markdown=dashboard_context_markdown, + ) + + suggested_prompts: list[str] = [] + used_chart_ids: set[int | None] = set() + + trend_prompt = _select_prompt( + chart_prompt_contexts, + used_chart_ids, + prompt_builder=_build_trend_prompt, + predicate=_can_build_trend_prompt, + ) + if trend_prompt: + suggested_prompts.append(trend_prompt) + + comparison_prompt = _select_prompt( + chart_prompt_contexts, + used_chart_ids, + prompt_builder=_build_comparison_prompt, + predicate=_can_build_comparison_prompt, + ) + if comparison_prompt and comparison_prompt not in suggested_prompts: + suggested_prompts.append(comparison_prompt) + + explanation_prompt = _select_prompt( + chart_prompt_contexts, + used_chart_ids, + prompt_builder=_build_explanation_prompt, + predicate=lambda chart_prompt_context: True, + ) + if explanation_prompt and explanation_prompt not in suggested_prompts: + suggested_prompts.append(explanation_prompt) + + for chart_prompt_context in chart_prompt_contexts: + if len(suggested_prompts) == 3: + break + if chart_prompt_context["chart_id"] in used_chart_ids: + continue + explanation_prompt = _build_explanation_prompt(chart_prompt_context) + if explanation_prompt in suggested_prompts: + continue + used_chart_ids.add(chart_prompt_context["chart_id"]) + suggested_prompts.append(explanation_prompt) + + if suggested_prompts: + return suggested_prompts[:3] + + dashboard_title = str((dashboard_export.get("dashboard") or {}).get("title") or "this dashboard").strip() + return [ + f'What does the "{dashboard_title}" dashboard measure?', + f'Which metrics stand out on the "{dashboard_title}" dashboard?', + f'How are results grouped on the "{dashboard_title}" dashboard?', + ] diff --git a/ddpui/migrations/0161_dashboardchatmessage_feedback.py b/ddpui/migrations/0161_dashboardchatmessage_feedback.py new file mode 100644 index 000000000..ef1d18982 --- /dev/null +++ b/ddpui/migrations/0161_dashboardchatmessage_feedback.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2 on 2026-04-06 06:52 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0160_dashboardchatturn"), + ] + + operations = [ + migrations.AddField( + model_name="dashboardchatmessage", + name="feedback", + field=models.CharField( + blank=True, + choices=[("thumbs_up", "Thumbs Up"), ("thumbs_down", "Thumbs Down")], + max_length=16, + null=True, + ), + ), + ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index 1fc1fe877..6c2cd13ac 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -31,6 +31,13 @@ class DashboardChatTurnStatus(models.TextChoices): FAILED = "failed", "Failed" +class DashboardChatMessageFeedback(models.TextChoices): + """Locked-in user feedback for one assistant answer.""" + + THUMBS_UP = "thumbs_up", "Thumbs Up" + THUMBS_DOWN = "thumbs_down", "Thumbs Down" + + class DashboardChatPromptTemplateKey(models.TextChoices): """Runtime-editable prompt templates used by the dashboard chat LLM client.""" @@ -141,6 +148,12 @@ class DashboardChatMessage(models.Model): content = models.TextField(blank=True, default="") client_message_id = models.CharField(max_length=100, null=True, blank=True) payload = models.JSONField(null=True, blank=True) + feedback = models.CharField( + max_length=16, + choices=DashboardChatMessageFeedback.choices, + null=True, + blank=True, + ) response_latency_ms = models.PositiveIntegerField(null=True, blank=True) timing_breakdown = models.JSONField(null=True, blank=True) created_at = models.DateTimeField(default=timezone.now) diff --git a/ddpui/models/org_user.py b/ddpui/models/org_user.py index 5fec79fd2..599b0f20f 100644 --- a/ddpui/models/org_user.py +++ b/ddpui/models/org_user.py @@ -128,6 +128,7 @@ class OrgUserResponse(Schema): """structure for returning an OrgUser in an http response""" email: str + first_name: str | None = None org: Optional[OrgSchema] = None active: bool wtype: str | None diff --git a/ddpui/schemas/dashboard_schema.py b/ddpui/schemas/dashboard_schema.py index 6a33824bd..229ba2278 100644 --- a/ddpui/schemas/dashboard_schema.py +++ b/ddpui/schemas/dashboard_schema.py @@ -4,7 +4,7 @@ """ from datetime import datetime -from typing import Optional, List +from typing import Literal, Optional, List from ninja import Schema @@ -216,3 +216,23 @@ class UpdateDashboardAIContextSchema(Schema): """Request schema for dashboard-level AI context updates.""" dashboard_context_markdown: str + + +class DashboardChatBootstrapResponse(Schema): + """Response schema for chat bootstrap UI state.""" + + dashboard_id: int + suggested_prompts: List[str] + + +class DashboardChatMessageFeedbackRequest(Schema): + """Request schema for locked feedback on one assistant answer.""" + + feedback: Literal["thumbs_up", "thumbs_down"] + + +class DashboardChatMessageFeedbackResponse(Schema): + """Response schema for stored feedback on one assistant answer.""" + + message_id: int + feedback: Literal["thumbs_up", "thumbs_down"] diff --git a/ddpui/tests/api_tests/test_dashboard_native_api.py b/ddpui/tests/api_tests/test_dashboard_native_api.py index eb11c0ef5..79dc54a2e 100644 --- a/ddpui/tests/api_tests/test_dashboard_native_api.py +++ b/ddpui/tests/api_tests/test_dashboard_native_api.py @@ -26,6 +26,11 @@ from ddpui.models.org_user import OrgUser from ddpui.models.role_based_access import Role from ddpui.models.dashboard import Dashboard, DashboardFilter +from ddpui.models.dashboard_chat import ( + DashboardChatMessage, + DashboardChatMessageFeedback, + DashboardChatSession, +) from ddpui.models.visualization import Chart from ddpui.auth import ACCOUNT_MANAGER_ROLE from ddpui.api.dashboard_native_api import ( @@ -38,12 +43,14 @@ create_filter, update_filter, delete_filter, + set_dashboard_chat_message_feedback, ) from ddpui.schemas.dashboard_schema import ( DashboardCreate, DashboardUpdate, FilterCreate, FilterUpdate, + DashboardChatMessageFeedbackRequest, ) from ddpui.tests.api_tests.test_user_org_api import seed_db, mock_request @@ -707,3 +714,74 @@ def test_delete_filter_not_found(self, orguser, sample_dashboard, seed_db): def test_seed_data(seed_db): """Test that seed data is loaded correctly""" assert Role.objects.count() == 5 + + +class TestDashboardChatMessageFeedback: + """Tests for locking thumbs feedback onto assistant answers.""" + + @patch("ddpui.api.dashboard_native_api.get_all_feature_flags_for_org") + def test_set_dashboard_chat_message_feedback_success( + self, + mock_feature_flags, + orguser, + sample_dashboard, + seed_db, + ): + mock_feature_flags.return_value = {"AI_DASHBOARD_CHAT": True} + request = mock_request(orguser) + session = DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=sample_dashboard, + ) + message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="assistant", + content="Top facilitators are listed below.", + ) + + response = set_dashboard_chat_message_feedback( + request, + dashboard_id=sample_dashboard.id, + message_id=message.id, + payload=DashboardChatMessageFeedbackRequest(feedback="thumbs_up"), + ) + + message.refresh_from_db() + assert response.message_id == message.id + assert response.feedback == "thumbs_up" + assert message.feedback == DashboardChatMessageFeedback.THUMBS_UP + + @patch("ddpui.api.dashboard_native_api.get_all_feature_flags_for_org") + def test_set_dashboard_chat_message_feedback_is_locked_after_first_selection( + self, + mock_feature_flags, + orguser, + sample_dashboard, + seed_db, + ): + mock_feature_flags.return_value = {"AI_DASHBOARD_CHAT": True} + request = mock_request(orguser) + session = DashboardChatSession.objects.create( + org=orguser.org, + orguser=orguser, + dashboard=sample_dashboard, + ) + message = DashboardChatMessage.objects.create( + session=session, + sequence_number=1, + role="assistant", + content="Top facilitators are listed below.", + feedback=DashboardChatMessageFeedback.THUMBS_UP, + ) + + with pytest.raises(HttpError) as excinfo: + set_dashboard_chat_message_feedback( + request, + dashboard_id=sample_dashboard.id, + message_id=message.id, + payload=DashboardChatMessageFeedbackRequest(feedback="thumbs_down"), + ) + + assert excinfo.value.status_code == 409 diff --git a/ddpui/tests/api_tests/test_user_org_api.py b/ddpui/tests/api_tests/test_user_org_api.py index d17f553a6..590f630aa 100644 --- a/ddpui/tests/api_tests/test_user_org_api.py +++ b/ddpui/tests/api_tests/test_user_org_api.py @@ -180,8 +180,10 @@ def test_can_manage_org_settings_seeded_for_admin_roles(seed_db): assert GUEST_ROLE not in role_slugs -def test_get_current_userv2_has_user(authuser, org_with_workspace, org_without_workspace): +def test_get_current_userv2_has_user(seed_db, authuser, org_with_workspace, org_without_workspace): """tests /worksspace/detatch/""" + authuser.first_name = "Pratiksha" + authuser.save() orguser1 = OrgUser.objects.create( user=authuser, org=org_with_workspace, @@ -194,13 +196,16 @@ def test_get_current_userv2_has_user(authuser, org_with_workspace, org_without_w ) request = mock_request(orguser2) + request.permissions = ["can_view_orgusers"] response = get_current_user_v2(request) assert len(response) == 2 assert response[0].email == authuser.email + assert response[0].first_name == "Pratiksha" assert response[0].active == authuser.is_active assert response[1].email == authuser.email + assert response[1].first_name == "Pratiksha" assert response[1].active == authuser.is_active if response[0].org.slug == org_with_workspace.slug: diff --git a/ddpui/tests/core/dashboard_chat/test_response_payloads.py b/ddpui/tests/core/dashboard_chat/test_response_payloads.py index 49f1c91c3..1a3c7c4ad 100644 --- a/ddpui/tests/core/dashboard_chat/test_response_payloads.py +++ b/ddpui/tests/core/dashboard_chat/test_response_payloads.py @@ -9,6 +9,7 @@ build_citations, explore_table_url, ) +from ddpui.core.dashboard_chat.orchestration.nodes.finalize import finalize_node from ddpui.core.dashboard_chat.vector.vector_documents import DashboardChatSourceType @@ -57,6 +58,13 @@ def test_build_citations_adds_frontend_urls(): content="Model content", dashboard_id=6, ), + DashboardChatRetrievedDocument( + document_id="doc-3", + source_type=DashboardChatSourceType.DASHBOARD_EXPORT.value, + source_identifier="dashboard:6", + content="Dashboard export content", + dashboard_id=6, + ), ] dashboard_export = { "dashboard": {"id": 6, "title": "Impact Overview"}, @@ -70,4 +78,108 @@ def test_build_citations_adds_frontend_urls(): ) assert citations[0].url == "/charts/7" - assert citations[1].url == "/explore?schema_name=analytics&table_name=sessions" + assert citations[0].title == "Chart: Sessions by District" + assert citations[0].snippet == 'Reviewed chart configuration and metadata for "Sessions by District".' + assert len(citations) == 1 + + +def test_build_citations_ignores_dbt_docs_for_user_facing_sources(): + allowlist = DashboardChatAllowlist( + allowed_tables={"analytics.sessions"}, + unique_id_to_table={"model.dalgo.sessions": "analytics.sessions"}, + ) + retrieved_documents = [ + DashboardChatRetrievedDocument( + document_id="doc-1", + source_type=DashboardChatSourceType.DBT_MANIFEST.value, + source_identifier="manifest:model.dalgo.sessions", + content="Manifest content", + dashboard_id=6, + ), + DashboardChatRetrievedDocument( + document_id="doc-2", + source_type=DashboardChatSourceType.DBT_CATALOG.value, + source_identifier="catalog:model.dalgo.sessions", + content="Catalog content", + dashboard_id=6, + ), + ] + + citations = build_citations( + retrieved_documents=retrieved_documents, + dashboard_export={"dashboard": {"id": 6, "title": "Impact Overview"}, "charts": []}, + allowlist=allowlist, + ) + + assert citations == [] + + +def test_build_citations_formats_context_file_cards_for_users(): + citations = build_citations( + retrieved_documents=[ + DashboardChatRetrievedDocument( + document_id="doc-1", + source_type=DashboardChatSourceType.ORG_CONTEXT.value, + source_identifier="org:1", + content="Organization markdown content", + dashboard_id=6, + ), + DashboardChatRetrievedDocument( + document_id="doc-2", + source_type=DashboardChatSourceType.DASHBOARD_CONTEXT.value, + source_identifier="dashboard_context:6", + content="Dashboard markdown content", + dashboard_id=6, + ), + ], + dashboard_export={"dashboard": {"id": 6, "title": "Impact Overview"}, "charts": []}, + allowlist=DashboardChatAllowlist(allowed_tables=set(), unique_id_to_table={}), + ) + + assert citations[0].title == "Organization context file" + assert citations[0].snippet == ( + "Studied context about the organization from the organization context file." + ) + assert citations[0].url == "/settings/organization" + assert citations[1].title == "Dashboard context file: Impact Overview" + assert citations[1].snippet == 'Studied context about "Impact Overview" from the dashboard context file.' + assert citations[1].url == "/settings/organization?dashboard_id=6" + + +def test_finalize_prefers_sql_table_citation_over_retrieval_table_citation(): + state = { + "dashboard_id": 6, + "retrieved_documents": [], + "allowlist_payload": {"allowed_tables": ["analytics.sessions"], "unique_id_to_table": {}}, + "intent_decision": { + "intent": "query_with_sql", + "confidence": 0.95, + "reason": "Needs SQL", + "missing_info": [], + "follow_up_context": {"is_follow_up": False, "follow_up_type": None}, + }, + "sql_validation": { + "is_valid": True, + "sanitized_sql": "select * from analytics.sessions", + "errors": [], + "tables": ["analytics.sessions"], + }, + "response": DashboardChatResponse( + answer_text="Answer", + intent=DashboardChatIntent.QUERY_WITH_SQL, + citations=[ + DashboardChatCitation( + source_type="warehouse_table", + source_identifier="analytics.sessions", + title="Warehouse table: analytics.sessions", + snippet="Schema and model context referenced analytics.sessions.", + url="/explore?schema_name=analytics&table_name=sessions", + ) + ], + ).to_dict(), + } + + finalized = DashboardChatResponse.model_validate(finalize_node(state)["response"]) + + assert len(finalized.citations) == 1 + assert finalized.citations[0].snippet == "SQL executed against analytics.sessions." diff --git a/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py b/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py new file mode 100644 index 000000000..daa11802f --- /dev/null +++ b/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py @@ -0,0 +1,115 @@ +from ddpui.core.dashboard_chat.suggested_prompts import build_dashboard_suggested_prompts + + +def test_build_dashboard_suggested_prompts_returns_three_grounded_questions(): + prompts = build_dashboard_suggested_prompts( + dashboard_export={ + "dashboard": { + "title": "Facilitator Effectiveness Studio", + "description": "Facilitator performance and district literacy efficiency by quarter", + }, + "charts": [ + { + "id": 7, + "title": "Facilitator Outcomes", + "description": "Quarterly facilitator effectiveness across learner outcomes", + "chart_type": "line", + "schema_name": "analytics", + "table_name": "facilitator_effectiveness_quarterly", + "extra_config": { + "dimension_column": "quarter_label", + "extra_dimension_column": "facilitator_name", + "metrics": [ + { + "column": "improved_literacy_students", + "aggregation": "sum", + "alias": "outcomes", + } + ], + }, + }, + { + "id": 3, + "title": "District Literacy Efficiency", + "description": "Improved literacy students per spend by district", + "chart_type": "bar", + "schema_name": "analytics", + "table_name": "district_funding_efficiency_quarterly", + "extra_config": { + "dimension_column": "district_name", + "metrics": [ + { + "column": "literacy_efficiency", + "aggregation": "avg", + "alias": "literacy efficiency", + } + ], + }, + }, + { + "id": 9, + "title": "Total Facilitators", + "description": "Count of facilitators on the dashboard", + "chart_type": "number", + "schema_name": "analytics", + "table_name": "facilitator_effectiveness_quarterly", + "extra_config": { + "aggregate_column": "facilitator_name", + "aggregate_function": "count_distinct", + }, + }, + ], + }, + org_context_markdown="Facilitators and districts are the main operating units.", + dashboard_context_markdown="Use this dashboard to compare outcomes over time.", + ) + + assert prompts == [ + "How have outcomes changed by quarter?", + "Which districts have the highest literacy efficiency?", + 'What does the "Total Facilitators" metric represent?', + ] + + +def test_build_dashboard_suggested_prompts_backfills_with_explanations_when_only_number_charts_exist(): + prompts = build_dashboard_suggested_prompts( + dashboard_export={ + "dashboard": { + "title": "Impact Snapshot", + "description": "Headline metrics for the current program cycle", + }, + "charts": [ + { + "id": 1, + "title": "Total Learners Reached", + "description": "Unique learners supported", + "chart_type": "number", + "schema_name": "analytics", + "table_name": "learner_rollup", + "extra_config": { + "aggregate_column": "learner_id", + "aggregate_function": "count_distinct", + }, + }, + { + "id": 2, + "title": "Average Attendance Rate", + "description": "Average attendance across all learners", + "chart_type": "number", + "schema_name": "analytics", + "table_name": "learner_rollup", + "extra_config": { + "aggregate_column": "attendance_rate", + "aggregate_function": "avg", + }, + }, + ], + }, + org_context_markdown="Learners are the key population.", + dashboard_context_markdown="Use this dashboard for quick snapshot metrics.", + ) + + assert prompts == [ + 'What does the "Total Learners Reached" metric represent?', + 'What does the "Average Attendance Rate" metric represent?', + ] diff --git a/ddpui/tests/models/test_dashboard_chat_models.py b/ddpui/tests/models/test_dashboard_chat_models.py index b0f5d282a..c42c6e7ad 100644 --- a/ddpui/tests/models/test_dashboard_chat_models.py +++ b/ddpui/tests/models/test_dashboard_chat_models.py @@ -18,6 +18,7 @@ from ddpui.models.dashboard_chat import ( DashboardAIContext, DashboardChatMessage, + DashboardChatMessageFeedback, DashboardChatSession, OrgAIContext, ) @@ -206,3 +207,20 @@ def test_dashboard_chat_message_payload(org, orguser, dashboard): assert message.payload["citations"][0]["source_type"] == "dashboard_export" assert message.payload["sql"].startswith("SELECT") + + +def test_dashboard_chat_message_feedback_persists_on_assistant_messages(org, orguser, dashboard): + session = DashboardChatSession.objects.create( + org=org, + orguser=orguser, + dashboard=dashboard, + ) + message = DashboardChatMessage.objects.create( + session=session, + sequence_number=3, + role="assistant", + content="Here is the answer.", + feedback=DashboardChatMessageFeedback.THUMBS_UP, + ) + + assert message.feedback == DashboardChatMessageFeedback.THUMBS_UP From 2a209b710f1628a91c4186e5b1195995fe961381 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Mon, 6 Apr 2026 13:13:51 +0530 Subject: [PATCH 41/49] refactor(ai-chat): simplify suggested prompt generation --- ddpui/api/dashboard_native_api.py | 5 - .../core/dashboard_chat/suggested_prompts.py | 206 ++++++------------ .../dashboard_chat/test_suggested_prompts.py | 44 +++- 3 files changed, 105 insertions(+), 150 deletions(-) diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index b499a835b..1b97d90b7 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -20,7 +20,6 @@ DashboardAIContext, DashboardChatMessage, DashboardChatMessageRole, - OrgAIContext, ) from ddpui.models.org_preferences import OrgPreferences from ddpui.models.org_user import OrgUser @@ -87,15 +86,11 @@ def _serialize_dashboard_ai_context(dashboard: Dashboard, context: DashboardAICo def _serialize_dashboard_chat_bootstrap(dashboard: Dashboard) -> DashboardChatBootstrapResponse: dashboard_export = DashboardService.export_dashboard_context_for_dashboard(dashboard, dashboard.org) - org_context, _ = OrgAIContext.objects.get_or_create(org=dashboard.org) - dashboard_context, _ = DashboardAIContext.objects.get_or_create(dashboard=dashboard) return DashboardChatBootstrapResponse( dashboard_id=dashboard.id, suggested_prompts=build_dashboard_suggested_prompts( dashboard_export=dashboard_export, - org_context_markdown=org_context.markdown, - dashboard_context_markdown=dashboard_context.markdown, ), ) diff --git a/ddpui/core/dashboard_chat/suggested_prompts.py b/ddpui/core/dashboard_chat/suggested_prompts.py index bf3a02c7c..0871fc906 100644 --- a/ddpui/core/dashboard_chat/suggested_prompts.py +++ b/ddpui/core/dashboard_chat/suggested_prompts.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -from collections.abc import Iterable TIME_TOKENS = ( @@ -20,39 +19,18 @@ "period", ) -ENTITY_LABELS = { - "district": "districts", - "facilitator": "facilitators", - "school": "schools", - "state": "states", - "program": "programs", - "block": "blocks", - "ward": "wards", - "village": "villages", - "student": "students", - "learner": "learners", - "teacher": "teachers", - "partner": "partners", - "organization": "organizations", - "org": "organizations", -} - METRIC_PREFIX_PATTERNS = ( ("count_distinct_", "unique"), ("count_", "number of"), ("avg_", "average"), ("average_", "average"), - ("sum_", ""), + ("sum_", "total"), ("total_", "total"), ("max_", "highest"), ("min_", "lowest"), ) -def _normalize_text(parts: Iterable[str | None]) -> str: - return " ".join(part.strip() for part in parts if part and part.strip()) - - def _humanize_identifier(value: str | None) -> str: if not value: return "" @@ -79,14 +57,6 @@ def _looks_time_like(value: str | None) -> bool: return any(token in normalized_value for token in TIME_TOKENS) -def _search_entity_label(text: str) -> str | None: - normalized_text = text.lower() - for token, label in ENTITY_LABELS.items(): - if token in normalized_text: - return label - return None - - def _metric_label_from_string(metric_name: str | None) -> str: normalized_metric_name = _humanize_identifier(metric_name) if not normalized_metric_name: @@ -103,6 +73,32 @@ def _metric_label_from_string(metric_name: str | None) -> str: return normalized_metric_name +def _metric_label_from_column( + column_name: str | None, + aggregation_name: str | None, +) -> str: + column_label = _humanize_identifier(column_name) + aggregation_label = _humanize_identifier(aggregation_name) + + if not column_label: + return "" + if not aggregation_label: + return column_label + if aggregation_label in {"avg", "average"}: + return f"average {column_label}" + if aggregation_label == "sum": + return f"total {column_label}" + if aggregation_label == "count_distinct": + return f"unique {_pluralize_label(column_label)}" + if aggregation_label == "count": + return f"number of {_pluralize_label(column_label)}" + if aggregation_label == "min": + return f"lowest {column_label}" + if aggregation_label == "max": + return f"highest {column_label}" + return f"{aggregation_label} of {column_label}" + + def _metric_label_from_chart(chart: dict) -> str: extra_config = chart.get("extra_config") or {} metrics = extra_config.get("metrics") or [] @@ -112,18 +108,8 @@ def _metric_label_from_chart(chart: dict) -> str: if alias: return alias column = metric.get("column") - aggregation = _humanize_identifier(metric.get("aggregation")) if column: - column_label = _humanize_identifier(column) - if aggregation in {"avg", "average"}: - return f"average {column_label}" - if aggregation == "count_distinct": - return f"unique {_pluralize_label(column_label)}" - if aggregation == "count": - return f"number of {_pluralize_label(column_label)}" - if aggregation: - return f"{aggregation} {column_label}".strip() - return column_label + return _metric_label_from_column(column, metric.get("aggregation")) elif isinstance(metric, str): metric_label = _metric_label_from_string(metric) if metric_label: @@ -132,16 +118,9 @@ def _metric_label_from_chart(chart: dict) -> str: aggregate_column = extra_config.get("aggregate_column") or extra_config.get("value_column") aggregate_function = extra_config.get("aggregate_function") if aggregate_column: - column_label = _humanize_identifier(aggregate_column) - if aggregate_function == "avg": - return f"average {column_label}" - if aggregate_function == "count_distinct": - return f"unique {_pluralize_label(column_label)}" - if aggregate_function == "count": - return f"number of {_pluralize_label(column_label)}" - if aggregate_function in {"min", "max"}: - return f"{aggregate_function} {column_label}" - return column_label + metric_label = _metric_label_from_column(aggregate_column, aggregate_function) + if metric_label: + return metric_label chart_title = str(chart.get("title") or "").strip() if chart_title: @@ -155,9 +134,9 @@ def _metric_label_from_chart(chart: dict) -> str: return _humanize_identifier(chart.get("table_name")) or "this metric" -def _dimension_label_from_chart(chart: dict, fallback_context: str) -> str | None: +def _dimension_candidates_from_chart(chart: dict) -> list[str]: extra_config = chart.get("extra_config") or {} - dimension_candidates = [] + dimension_candidates: list[str] = [] for key in ("dimension_column", "extra_dimension_column", "geographic_column"): value = extra_config.get(key) @@ -166,64 +145,43 @@ def _dimension_label_from_chart(chart: dict, fallback_context: str) -> str | Non dimensions = extra_config.get("dimensions") or [] if isinstance(dimensions, list): - dimension_candidates.extend(value for value in dimensions if isinstance(value, str) and value.strip()) + dimension_candidates.extend( + value for value in dimensions if isinstance(value, str) and value.strip() + ) + + return dimension_candidates - for candidate in dimension_candidates: + +def _dimension_label_from_chart(chart: dict) -> str | None: + for candidate in _dimension_candidates_from_chart(chart): if _looks_time_like(candidate): continue label = _humanize_identifier(candidate) if label: return _pluralize_label(label) - return _search_entity_label(fallback_context) - - -def _time_label_from_chart(chart: dict, fallback_context: str) -> str | None: - extra_config = chart.get("extra_config") or {} - dimension_candidates = [] + return None - for key in ("dimension_column", "extra_dimension_column"): - value = extra_config.get(key) - if isinstance(value, str) and value.strip(): - dimension_candidates.append(value) - dimensions = extra_config.get("dimensions") or [] - if isinstance(dimensions, list): - dimension_candidates.extend(value for value in dimensions if isinstance(value, str) and value.strip()) - - for candidate in dimension_candidates: +def _time_label_from_chart(chart: dict) -> str | None: + for candidate in _dimension_candidates_from_chart(chart): if _looks_time_like(candidate): return _humanize_identifier(candidate) - - normalized_context = fallback_context.lower() - for token in TIME_TOKENS: - if token in normalized_context: - return token return None -def _uses_plural_verb(metric_label: str) -> bool: - normalized_metric_label = metric_label.strip().lower() - if normalized_metric_label.startswith(("number of ", "average ", "highest ", "lowest ", "total ")): - return False - if normalized_metric_label.endswith("ies"): - return True - return normalized_metric_label.endswith("s") and not normalized_metric_label.endswith("ss") - - def _build_trend_prompt(chart_prompt_context: dict) -> str: metric_label = chart_prompt_context["metric_label"] time_label = chart_prompt_context["time_label"] or "time" - verb = "have" if _uses_plural_verb(metric_label) else "has" if time_label == "time": - return f"How {verb} {metric_label} changed over time?" - return f"How {verb} {metric_label} changed by {time_label}?" + return f"How did {metric_label} change over time?" + return f"How did {metric_label} change by {time_label}?" def _build_comparison_prompt(chart_prompt_context: dict) -> str: return ( - f'Which {chart_prompt_context["dimension_label"]} have the highest ' - f'{chart_prompt_context["metric_label"]}?' + f'How does {chart_prompt_context["metric_label"]} compare across ' + f'{chart_prompt_context["dimension_label"]}?' ) @@ -242,7 +200,7 @@ def _build_explanation_prompt(chart_prompt_context: dict) -> str: def _can_build_trend_prompt(chart_prompt_context: dict) -> bool: - return bool(chart_prompt_context["time_label"] or chart_prompt_context["chart_type"] == "line") + return bool(chart_prompt_context["time_label"]) def _can_build_comparison_prompt(chart_prompt_context: dict) -> bool: @@ -251,38 +209,22 @@ def _can_build_comparison_prompt(chart_prompt_context: dict) -> bool: def _build_chart_prompt_contexts( dashboard_export: dict, - org_context_markdown: str, - dashboard_context_markdown: str, ) -> list[dict]: dashboard = dashboard_export.get("dashboard") or {} charts = dashboard_export.get("charts") or [] dashboard_title = str(dashboard.get("title") or "this dashboard").strip() - dashboard_description = str(dashboard.get("description") or "").strip() - shared_context = _normalize_text( - [dashboard_title, dashboard_description, org_context_markdown, dashboard_context_markdown] - ) chart_prompt_contexts = [] - for chart in charts: + for chart in reversed(charts): chart_title = str(chart.get("title") or "").strip() or dashboard_title - chart_description = str(chart.get("description") or "").strip() - chart_context = _normalize_text( - [ - chart_title, - chart_description, - str(chart.get("schema_name") or "").strip(), - str(chart.get("table_name") or "").replace("_", " ").strip(), - shared_context, - ] - ) chart_prompt_contexts.append( { "chart_id": chart.get("id"), "chart_title": chart_title, "chart_type": str(chart.get("chart_type") or "").strip().lower(), "metric_label": _metric_label_from_chart(chart), - "dimension_label": _dimension_label_from_chart(chart, chart_context), - "time_label": _time_label_from_chart(chart, chart_context), + "dimension_label": _dimension_label_from_chart(chart), + "time_label": _time_label_from_chart(chart), } ) @@ -296,30 +238,25 @@ def _select_prompt( prompt_builder, predicate, ) -> str | None: - for prefer_unused_chart in (True, False): - for chart_prompt_context in chart_prompt_contexts: - chart_id = chart_prompt_context["chart_id"] - if prefer_unused_chart and chart_id in used_chart_ids: - continue - if not predicate(chart_prompt_context): - continue - prompt = prompt_builder(chart_prompt_context) - if not prompt: - continue - used_chart_ids.add(chart_id) - return prompt + for chart_prompt_context in chart_prompt_contexts: + chart_id = chart_prompt_context["chart_id"] + if chart_id in used_chart_ids: + continue + if not predicate(chart_prompt_context): + continue + prompt = prompt_builder(chart_prompt_context) + if not prompt: + continue + used_chart_ids.add(chart_id) + return prompt return None def build_dashboard_suggested_prompts( dashboard_export: dict, - org_context_markdown: str, - dashboard_context_markdown: str, ) -> list[str]: chart_prompt_contexts = _build_chart_prompt_contexts( dashboard_export=dashboard_export, - org_context_markdown=org_context_markdown, - dashboard_context_markdown=dashboard_context_markdown, ) suggested_prompts: list[str] = [] @@ -347,7 +284,7 @@ def build_dashboard_suggested_prompts( chart_prompt_contexts, used_chart_ids, prompt_builder=_build_explanation_prompt, - predicate=lambda chart_prompt_context: True, + predicate=lambda chart_prompt_context: bool(chart_prompt_context["chart_title"]), ) if explanation_prompt and explanation_prompt not in suggested_prompts: suggested_prompts.append(explanation_prompt) @@ -355,20 +292,13 @@ def build_dashboard_suggested_prompts( for chart_prompt_context in chart_prompt_contexts: if len(suggested_prompts) == 3: break - if chart_prompt_context["chart_id"] in used_chart_ids: + chart_id = chart_prompt_context["chart_id"] + if chart_id in used_chart_ids: continue explanation_prompt = _build_explanation_prompt(chart_prompt_context) if explanation_prompt in suggested_prompts: continue - used_chart_ids.add(chart_prompt_context["chart_id"]) + used_chart_ids.add(chart_id) suggested_prompts.append(explanation_prompt) - if suggested_prompts: - return suggested_prompts[:3] - - dashboard_title = str((dashboard_export.get("dashboard") or {}).get("title") or "this dashboard").strip() - return [ - f'What does the "{dashboard_title}" dashboard measure?', - f'Which metrics stand out on the "{dashboard_title}" dashboard?', - f'How are results grouped on the "{dashboard_title}" dashboard?', - ] + return suggested_prompts[:3] diff --git a/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py b/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py index daa11802f..ab6ad1d81 100644 --- a/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py +++ b/ddpui/tests/core/dashboard_chat/test_suggested_prompts.py @@ -60,13 +60,11 @@ def test_build_dashboard_suggested_prompts_returns_three_grounded_questions(): }, ], }, - org_context_markdown="Facilitators and districts are the main operating units.", - dashboard_context_markdown="Use this dashboard to compare outcomes over time.", ) assert prompts == [ - "How have outcomes changed by quarter?", - "Which districts have the highest literacy efficiency?", + "How did outcomes change by quarter?", + "How does literacy efficiency compare across districts?", 'What does the "Total Facilitators" metric represent?', ] @@ -105,11 +103,43 @@ def test_build_dashboard_suggested_prompts_backfills_with_explanations_when_only }, ], }, - org_context_markdown="Learners are the key population.", - dashboard_context_markdown="Use this dashboard for quick snapshot metrics.", ) assert prompts == [ - 'What does the "Total Learners Reached" metric represent?', 'What does the "Average Attendance Rate" metric represent?', + 'What does the "Total Learners Reached" metric represent?', + ] + + +def test_build_dashboard_suggested_prompts_humanizes_aggregation_labels(): + prompts = build_dashboard_suggested_prompts( + dashboard_export={ + "dashboard": { + "title": "Funding Overview", + "description": "Grant funding by quarter", + }, + "charts": [ + { + "id": 4, + "title": "Grant Funding by Quarter", + "description": "Quarterly grant funding", + "chart_type": "line", + "schema_name": "analytics", + "table_name": "grant_funding_quarterly", + "extra_config": { + "dimension_column": "quarter_label", + "metrics": [ + { + "column": "grant_funding_usd", + "aggregation": "sum", + } + ], + }, + }, + ], + }, + ) + + assert prompts == [ + "How did total grant funding usd change by quarter?", ] From 7b6e4411c9c04b32a75fa51429cea1dde373e488 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Tue, 7 Apr 2026 08:36:16 +0530 Subject: [PATCH 42/49] fix: remove dashboard chat first-name greeting --- ddpui/api/user_org_api.py | 1 - ddpui/models/org_user.py | 1 - ddpui/tests/api_tests/test_user_org_api.py | 4 ---- 3 files changed, 6 deletions(-) diff --git a/ddpui/api/user_org_api.py b/ddpui/api/user_org_api.py index 38777dcf3..aa79d3f67 100644 --- a/ddpui/api/user_org_api.py +++ b/ddpui/api/user_org_api.py @@ -113,7 +113,6 @@ def get_current_user_v2(request, org_slug: str = None): res.append( OrgUserResponse( email=user.email, - first_name=user.first_name or None, org=curr_orguser.org, active=user.is_active, new_role_slug=curr_orguser.new_role.slug, diff --git a/ddpui/models/org_user.py b/ddpui/models/org_user.py index 599b0f20f..5fec79fd2 100644 --- a/ddpui/models/org_user.py +++ b/ddpui/models/org_user.py @@ -128,7 +128,6 @@ class OrgUserResponse(Schema): """structure for returning an OrgUser in an http response""" email: str - first_name: str | None = None org: Optional[OrgSchema] = None active: bool wtype: str | None diff --git a/ddpui/tests/api_tests/test_user_org_api.py b/ddpui/tests/api_tests/test_user_org_api.py index 590f630aa..c5decc450 100644 --- a/ddpui/tests/api_tests/test_user_org_api.py +++ b/ddpui/tests/api_tests/test_user_org_api.py @@ -182,8 +182,6 @@ def test_can_manage_org_settings_seeded_for_admin_roles(seed_db): def test_get_current_userv2_has_user(seed_db, authuser, org_with_workspace, org_without_workspace): """tests /worksspace/detatch/""" - authuser.first_name = "Pratiksha" - authuser.save() orguser1 = OrgUser.objects.create( user=authuser, org=org_with_workspace, @@ -202,10 +200,8 @@ def test_get_current_userv2_has_user(seed_db, authuser, org_with_workspace, org_ assert len(response) == 2 assert response[0].email == authuser.email - assert response[0].first_name == "Pratiksha" assert response[0].active == authuser.is_active assert response[1].email == authuser.email - assert response[1].first_name == "Pratiksha" assert response[1].active == authuser.is_active if response[0].org.slug == org_with_workspace.slug: From 8875ada673f1285aa90ff7061f8efd7f061c3c6d Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Tue, 7 Apr 2026 08:40:37 +0530 Subject: [PATCH 43/49] refactor: simplify dashboard chat freshness contract --- ddpui/api/dashboard_native_api.py | 2 +- ddpui/api/org_preferences_api.py | 6 ++---- ddpui/schemas/dashboard_schema.py | 2 +- ddpui/schemas/org_preferences_schema.py | 6 ++---- ddpui/tests/api_tests/test_dashboard_chat_settings_api.py | 5 ++--- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/ddpui/api/dashboard_native_api.py b/ddpui/api/dashboard_native_api.py index 1b97d90b7..c51d87bdd 100644 --- a/ddpui/api/dashboard_native_api.py +++ b/ddpui/api/dashboard_native_api.py @@ -80,7 +80,7 @@ def _serialize_dashboard_ai_context(dashboard: Dashboard, context: DashboardAICo dashboard_context_markdown=context.markdown, dashboard_context_updated_by=context.updated_by.user.email if context.updated_by else None, dashboard_context_updated_at=context.updated_at, - vector_last_ingested_at=org_dbt.vector_last_ingested_at if org_dbt else None, + ai_context_refreshed_at=org_dbt.vector_last_ingested_at if org_dbt else None, ) diff --git a/ddpui/api/org_preferences_api.py b/ddpui/api/org_preferences_api.py index 8c46d546d..a050d39de 100644 --- a/ddpui/api/org_preferences_api.py +++ b/ddpui/api/org_preferences_api.py @@ -53,8 +53,7 @@ def _serialize_ai_dashboard_chat_settings(org, org_preferences, org_context): else None, org_context_updated_at=org_context.updated_at, dbt_configured=org_dbt is not None, - docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, - vector_last_ingested_at=org_dbt.vector_last_ingested_at if org_dbt else None, + ai_context_refreshed_at=org_dbt.vector_last_ingested_at if org_dbt else None, ) @@ -78,8 +77,7 @@ def _serialize_ai_dashboard_chat_status(org, org_preferences): and vector_last_ingested_at is not None ), dbt_configured=dbt_configured, - docs_generated_at=org_dbt.docs_generated_at if org_dbt else None, - vector_last_ingested_at=vector_last_ingested_at, + ai_context_refreshed_at=vector_last_ingested_at, ) diff --git a/ddpui/schemas/dashboard_schema.py b/ddpui/schemas/dashboard_schema.py index 229ba2278..7395427dc 100644 --- a/ddpui/schemas/dashboard_schema.py +++ b/ddpui/schemas/dashboard_schema.py @@ -209,7 +209,7 @@ class DashboardAIContextResponse(Schema): dashboard_context_markdown: str dashboard_context_updated_by: Optional[str] dashboard_context_updated_at: Optional[datetime] - vector_last_ingested_at: Optional[datetime] + ai_context_refreshed_at: Optional[datetime] class UpdateDashboardAIContextSchema(Schema): diff --git a/ddpui/schemas/org_preferences_schema.py b/ddpui/schemas/org_preferences_schema.py index 5d6374223..316110353 100644 --- a/ddpui/schemas/org_preferences_schema.py +++ b/ddpui/schemas/org_preferences_schema.py @@ -52,8 +52,7 @@ class OrgAIDashboardChatSettingsResponse(Schema): org_context_updated_by: Optional[str] org_context_updated_at: Optional[datetime] dbt_configured: bool - docs_generated_at: Optional[datetime] - vector_last_ingested_at: Optional[datetime] + ai_context_refreshed_at: Optional[datetime] class UpdateOrgAIDashboardChatSchema(Schema): @@ -70,8 +69,7 @@ class OrgAIDashboardChatStatusResponse(Schema): ai_data_sharing_enabled: bool chat_available: bool dbt_configured: bool - docs_generated_at: Optional[datetime] - vector_last_ingested_at: Optional[datetime] + ai_context_refreshed_at: Optional[datetime] class CreateOrgSupersetDetailsSchema(Schema): diff --git a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py index 1cefa61f8..89f730c37 100644 --- a/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py +++ b/ddpui/tests/api_tests/test_dashboard_chat_settings_api.py @@ -201,8 +201,7 @@ def test_get_ai_dashboard_chat_status_reports_chat_available(orguser, seed_db): assert response["res"]["ai_data_sharing_enabled"] is True assert response["res"]["dbt_configured"] is True assert response["res"]["chat_available"] is True - assert response["res"]["docs_generated_at"] == generated_at - assert response["res"]["vector_last_ingested_at"] == ingested_at + assert response["res"]["ai_context_refreshed_at"] == ingested_at def test_get_ai_dashboard_chat_settings_requires_permission(guest_orguser, seed_db): @@ -225,7 +224,7 @@ def test_get_dashboard_ai_context_returns_direct_payload(orguser, dashboard, see assert response.dashboard_title == dashboard.title assert response.dashboard_context_markdown == "" assert response.dashboard_context_updated_by is None - assert response.vector_last_ingested_at is None + assert response.ai_context_refreshed_at is None def test_update_dashboard_ai_context_persists_context(orguser, dashboard, seed_db): From f1e442f160143138401c5d3575a3bf55d92dd0a3 Mon Sep 17 00:00:00 2001 From: Ishankoradia Date: Tue, 7 Apr 2026 21:38:01 +0530 Subject: [PATCH 44/49] squashed migrations, added a seeder for initial prompts --- .../commands/seed_dashboard_chat_prompts.py | 30 +++ .../0152_orgdbt_docs_generated_at_and_more.py | 101 -------- ...ashboardchatmessage_created_at_and_more.py | 31 --- .../0154_dashboardchatprompttemplate.py | 240 ------------------ ...boardchatsession_vector_collection_name.py | 17 -- ...chat_orgdbt_and_org_preferences_changes.py | 46 ++++ ...prompttemplate_final_answer_composition.py | 66 ----- ...oard_chat_ai_contexts_org_and_dashboard.py | 82 ++++++ ...hatmessage_response_latency_ms_and_more.py | 22 -- .../0159_dashboard_chat_config_tables.py | 221 ++++++++++++++++ ...hatprompttemplate_intent_follow_up_refs.py | 157 ------------ ddpui/migrations/0160_dashboardchatturn.py | 116 --------- .../0161_dashboardchatmessage_feedback.py | 22 -- ddpui/models/dashboard_chat.py | 8 + .../dashboard-chat-chromadb-sizing.md | 80 ++++++ 15 files changed, 467 insertions(+), 772 deletions(-) create mode 100644 ddpui/management/commands/seed_dashboard_chat_prompts.py delete mode 100644 ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py delete mode 100644 ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py delete mode 100644 ddpui/migrations/0154_dashboardchatprompttemplate.py delete mode 100644 ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py create mode 100644 ddpui/migrations/0157_dashboard_chat_orgdbt_and_org_preferences_changes.py delete mode 100644 ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py create mode 100644 ddpui/migrations/0158_dashboard_chat_ai_contexts_org_and_dashboard.py delete mode 100644 ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py create mode 100644 ddpui/migrations/0159_dashboard_chat_config_tables.py delete mode 100644 ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py delete mode 100644 ddpui/migrations/0160_dashboardchatturn.py delete mode 100644 ddpui/migrations/0161_dashboardchatmessage_feedback.py create mode 100644 docs/docs/features/dashboard-chat-chromadb-sizing.md diff --git a/ddpui/management/commands/seed_dashboard_chat_prompts.py b/ddpui/management/commands/seed_dashboard_chat_prompts.py new file mode 100644 index 000000000..bcd1692ea --- /dev/null +++ b/ddpui/management/commands/seed_dashboard_chat_prompts.py @@ -0,0 +1,30 @@ +from django.core.management.base import BaseCommand + +from ddpui.core.dashboard_chat.agents.prompt_template_store import DEFAULT_DASHBOARD_CHAT_PROMPTS +from ddpui.models.dashboard_chat import DashboardChatPromptTemplate + + +class Command(BaseCommand): + """Seeds DashboardChatPromptTemplate with the default prompts from prompt_template_store.py.""" + + help = "Seed dashboard chat prompt templates with default prompts" + + def handle(self, *args, **options): + created_count = 0 + updated_count = 0 + + for key, prompt in DEFAULT_DASHBOARD_CHAT_PROMPTS.items(): + _, created = DashboardChatPromptTemplate.objects.update_or_create( + key=key, + defaults={"prompt": prompt}, + ) + if created: + created_count += 1 + self.stdout.write(f" created: {key}") + else: + updated_count += 1 + self.stdout.write(f" updated: {key}") + + self.stdout.write( + self.style.SUCCESS(f"Done. {created_count} created, {updated_count} updated.") + ) diff --git a/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py b/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py deleted file mode 100644 index fa153eaa6..000000000 --- a/ddpui/migrations/0152_orgdbt_docs_generated_at_and_more.py +++ /dev/null @@ -1,101 +0,0 @@ -# Generated by Django 4.2 on 2026-03-19 20:58 - -from django.db import migrations, models -import django.db.models.deletion -import django.utils.timezone -import uuid - - -class Migration(migrations.Migration): - - dependencies = [ - ('ddpui', '0151_alter_org_queue_config'), - ] - - operations = [ - migrations.AddField( - model_name='orgdbt', - name='docs_generated_at', - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AddField( - model_name='orgdbt', - name='vector_last_ingested_at', - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AddField( - model_name='orgpreferences', - name='ai_data_sharing_consented_at', - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AddField( - model_name='orgpreferences', - name='ai_data_sharing_consented_by', - field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='ai_data_sharing_consents', to='ddpui.orguser'), - ), - migrations.AddField( - model_name='orgpreferences', - name='ai_data_sharing_enabled', - field=models.BooleanField(default=False), - ), - migrations.CreateModel( - name='OrgAIContext', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('markdown', models.TextField(blank=True, default='')), - ('updated_at', models.DateTimeField(blank=True, null=True)), - ('org', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='ai_context', to='ddpui.org')), - ('updated_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='org_ai_context_updates', to='ddpui.orguser')), - ], - ), - migrations.CreateModel( - name='DashboardChatSession', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created_at', models.DateTimeField(auto_created=True, default=django.utils.timezone.now)), - ('session_id', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)), - ('updated_at', models.DateTimeField(auto_now=True)), - ('dashboard', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='ddpui.dashboard')), - ('org', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ddpui.org')), - ('orguser', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='ddpui.orguser')), - ], - options={ - 'db_table': 'dashboard_chat_session', - 'ordering': ['-updated_at'], - }, - ), - migrations.CreateModel( - name='DashboardChatMessage', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created_at', models.DateTimeField(auto_created=True, default=django.utils.timezone.now)), - ('sequence_number', models.PositiveIntegerField()), - ('role', models.CharField(choices=[('user', 'USER'), ('assistant', 'ASSISTANT')], max_length=20)), - ('content', models.TextField(blank=True, default='')), - ('client_message_id', models.CharField(blank=True, max_length=100, null=True)), - ('payload', models.JSONField(blank=True, null=True)), - ('session', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='messages', to='ddpui.dashboardchatsession')), - ], - options={ - 'ordering': ['sequence_number'], - }, - ), - migrations.CreateModel( - name='DashboardAIContext', - fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('markdown', models.TextField(blank=True, default='')), - ('updated_at', models.DateTimeField(blank=True, null=True)), - ('dashboard', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='ai_context', to='ddpui.dashboard')), - ('updated_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='dashboard_ai_context_updates', to='ddpui.orguser')), - ], - ), - migrations.AddIndex( - model_name='dashboardchatsession', - index=models.Index(fields=['org', 'dashboard', 'created_at'], name='dchat_sess_org_dash_idx'), - ), - migrations.AddConstraint( - model_name='dashboardchatmessage', - constraint=models.UniqueConstraint(fields=('session', 'sequence_number'), name='dchat_message_session_seq_unique'), - ), - ] diff --git a/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py b/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py deleted file mode 100644 index fa536a031..000000000 --- a/ddpui/migrations/0153_alter_dashboardchatmessage_created_at_and_more.py +++ /dev/null @@ -1,31 +0,0 @@ -# Generated by Django 4.2 on 2026-03-20 05:49 - -from django.db import migrations, models -import django.utils.timezone - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0152_orgdbt_docs_generated_at_and_more"), - ] - - operations = [ - migrations.AlterField( - model_name="dashboardchatmessage", - name="created_at", - field=models.DateTimeField(default=django.utils.timezone.now), - ), - migrations.AlterField( - model_name="dashboardchatsession", - name="created_at", - field=models.DateTimeField(default=django.utils.timezone.now), - ), - migrations.AddConstraint( - model_name="dashboardchatmessage", - constraint=models.UniqueConstraint( - condition=models.Q(client_message_id__isnull=False), - fields=("session", "client_message_id"), - name="dchat_message_session_client_msg_unique", - ), - ), - ] diff --git a/ddpui/migrations/0154_dashboardchatprompttemplate.py b/ddpui/migrations/0154_dashboardchatprompttemplate.py deleted file mode 100644 index 65f996bd2..000000000 --- a/ddpui/migrations/0154_dashboardchatprompttemplate.py +++ /dev/null @@ -1,240 +0,0 @@ -# Generated by Django 4.2 on 2026-03-21 10:44 - -from django.db import migrations, models -import django.utils.timezone - - -def seed_dashboard_chat_prompt_templates(apps, schema_editor): - DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") - - defaults = { - "intent_classification": """# Enhanced Intent Classification System Prompt - -You are an intent classification agent for a "Chat with Dashboards" system. Your job is to classify user queries about the CURRENT dashboard, its charts, its datasets, the dbt models that power it, and the organization/dashboard context attached to it. Questions about other dashboards, similar dashboards, or dashboards beyond the current one are **irrelevant**. - -## Intent Categories - -1. **query_with_sql** - Needs data analysis (numbers, trends, rankings, breakdowns, comparisons) -2. **query_without_sql** - Can be answered from metadata (definitions, calculation logic, chart explanations) -3. **follow_up_sql** - Follow-up query that modifies previous SQL query (add dimension, filter, timeframe) -4. **follow_up_context** - Follow-up requesting more explanation about previous results -5. **needs_clarification** - Question is too vague or ambiguous -6. **small_talk** - Greetings, jokes, non-business conversation -7. **irrelevant** - Questions outside the current dashboard's scope, including requests about other dashboards - -## Classification Guidelines - -**query_with_sql** examples: -- "How many students are in the EcoChamps program?" -- "Show me session completion trends over time" -- "Top 10 schools by assessment performance" -- "Compare reading comprehension by city" -- "What's the monthly breakdown of planned vs conducted sessions?" - -**query_without_sql** examples: -- "What does 'planned_session' mean?" -- "How is reading comprehension calculated?" -- "Which dataset powers the student count chart?" -- "What metrics are available in this dashboard?" -- "Explain what this chart shows" -- "What is the mission and vision of Bhumi?" -- "Summarize the Bhumi programs described in the context file" - -**follow_up_sql** examples (requires previous SQL context): -- "Now split by chapter" (add dimension) -- "Filter to CGI donors only" (add filter) -- "Same but for last quarter" (modify timeframe) -- "Show weekly instead" (change aggregation) - -**follow_up_context** examples (requires previous context): -- "Explain that metric" -- "How is that calculated?" -- "What does that mean?" -- "Tell me more about that" - -**needs_clarification** examples: -- "Is performance improving?" (missing: which metric, time period) -- "Show me the data" (missing: which data, program) -- "What's the biggest issue?" (missing: context, metric) - -## Follow-up Detection - -When conversation history is available, classify as follow-up **only if the new query depends on the previous turn**. Use all three tests: -1. Explicit reference to prior output ("that", "same", "those results", "the previous query"). -2. Modification language applied to prior query ("now split by", "filter that", "same but", "add chapter", "remove donor"). -3. Explanations about prior output ("explain that", "what does that mean"). - -If the question can stand alone and be answered without previous context, treat it as a new `query_with_sql` or `query_without_sql`, **not** follow_up_sql/follow_up_context. - -If so, classify as follow_up_sql or follow_up_context based on whether SQL modification is needed. - -## Current-Dashboard Boundary - -- Treat requests about "other dashboards", "related dashboards", "similar dashboards", or "which dashboard should I look at" as **irrelevant**. -- Treat requests that compare this dashboard to some other dashboard as **irrelevant** unless the question can be answered entirely from the current dashboard's own data and context. -- The assistant is scoped to one dashboard only. - -## Output Format - -Respond with valid JSON only: - -For new queries: -```json -{ - "intent": "query_with_sql", - "confidence": 0.9, - "reason": "User is asking for specific numbers requiring data analysis", - "force_tool_usage": true, - "follow_up_context": { - "is_follow_up": false, - "follow_up_type": null, - "reusable_elements": {}, - "modification_instruction": null - } -} -``` - -For follow-up queries: -```json -{ - "intent": "follow_up_sql", - "confidence": 0.95, - "reason": "User wants to modify previous query by adding dimension", - "force_tool_usage": true, - "follow_up_context": { - "is_follow_up": true, - "follow_up_type": "add_dimension", - "reusable_elements": { - "previous_sql": "from conversation context", - "previous_tables": ["staging.eco_student25_26_stg"], - "add_instruction": "group by chapter" - }, - "modification_instruction": "split by chapter" - } -} -``` - -## Tool Usage Rules - -Set `force_tool_usage: true` for: -- All query_with_sql intents -- All follow_up_sql intents -- query_without_sql when specific chart/dataset lookup needed - -Set `force_tool_usage: false` for: -- small_talk, needs_clarification, irrelevant -- query_without_sql for general explanation questions - -## Context Awareness - -Use conversation history to: -- Detect follow-up patterns -- Understand context references ("that metric", "same query") -- Determine if SQL modification or explanation is needed -- Extract reusable elements (tables, metrics, filters) from previous queries - -Classify the following user query:""", - "new_query_system": """You are a data analysis assistant with access to tools. Your job is to help users understand program data and answer their questions accurately. - -IMPORTANT RULES: -1. For data questions: ALWAYS start by searching for relevant charts using retrieve_docs -2. Use chart metadata to identify which datasets/tables to query - charts are your roadmap to data -3. For definition questions: You may use tools to get context or answer from human context -4. Never guess table names, column names, or data values -5. Always call get_distinct_values before using WHERE clauses on text columns -6. Only write SELECT queries, never INSERT/UPDATE/DELETE -7. CRITICAL: When list_tables_by_keyword returns tables, you MUST use the EXACT table names returned - never modify schema or table names -8. NEVER assume tables exist in specific schemas - always discover them using list_tables_by_keyword first -9. When counting entities (students, people, sites, states, programs, cases, etc.), avoid COUNT(*). Prefer COUNT(DISTINCT ) using the most specific ID/name field available (e.g., student_id, roll_no, state_name). If unsure which field uniquely identifies the entity, inspect schema first, and fetch distinct values for candidate ID columns before writing SQL. -9. When you propose SQL, immediately call run_sql_query to execute it. Do not ask for confirmation. -10. Call get_distinct_values only for columns you plan to filter in the current query. -11. Limit get_schema_snippets to the tables you intend to query (avoid extra tables). -12. If a requested geographic/location field is missing, choose the most specific available location dimension (e.g., city → chapter → school) and answer using that, explicitly noting the substitution in the response. -13. When someone asks for "changes" in metrics, look for increases and decreases by comparing values across time periods (baseline vs midline vs endline) or comparing current vs previous periods. -14. Only use the EXACT schema-qualified table names returned by the tools. Do not rewrite schemas or table names. -15. IMPORTANT: Only tables relevant to the current dashboard are accessible. If a table is not found, it may not be relevant to this dashboard. Use charts from the current dashboard to guide your analysis. -16. Do not suggest other dashboards. If the question asks about dashboards beyond the current one, stay within the current dashboard context and answer only with data available here. - -Available tools: -- retrieve_docs: Find relevant charts, datasets, context, or dbt models -- search_dbt_models: Search for dbt models by keyword -- get_dbt_model_info: Get detailed info about a specific dbt model -- get_schema_snippets: Get column names and types for tables -- get_distinct_values: Get actual values in a column (required before WHERE clauses) -- check_table_row_count: Check if a table has data before querying -- run_sql_query: Execute a read-only SQL query - -Tool usage flow for data questions: -1. FIRST: Call retrieve_docs to find relevant CHARTS that match the question -2. If charts found: Use the dataset/table names from chart metadata to guide your queries -3. If no relevant chart datasets found: ALWAYS call list_tables_by_keyword with the main entity (e.g. "students", "fellowship", "baseline") -4. Call get_schema_snippets ONLY for the exact table names returned by list_tables_by_keyword -5. Use the EXACT table names from step 3/4 in your SQL queries - do not change schema or table names -6. If filtering: Call get_distinct_values for filter columns -7. ALWAYS call run_sql_query with validated SQL - NEVER give up without trying""", - "follow_up_system": """You are handling a follow-up query that modifies a previous question. - -FOLLOW-UP RULES: -1. Reuse context from the previous query when possible (tables, metrics, base SQL) -2. For SQL modifications: modify the previous SQL rather than starting from scratch -3. For new filters: ALWAYS call get_distinct_values first -4. For new dimensions: ensure the column exists in the schema -5. When you generate SQL, execute it by calling run_sql_query immediately; do not ask for confirmation. -6. Only fetch distinct values for columns you will filter, and limit schema lookups to tables you plan to query. -7. Stay within the current dashboard only. Do not suggest or switch to other dashboards.""", - "small_talk_capabilities": ( - "You are a helpful assistant for questions about the current dashboard. " - "Briefly explain what you can do: retrieve dashboard/chart/dbt context, " - "run safe read-only SQL for counts/trends/breakdowns, and clarify metrics from this dashboard. " - "Keep answers concise, friendly, and non-technical when possible." - ), - } - - for key, prompt in defaults.items(): - DashboardChatPromptTemplate.objects.update_or_create( - key=key, - defaults={"prompt": prompt}, - ) - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0153_alter_dashboardchatmessage_created_at_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="DashboardChatPromptTemplate", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "key", - models.CharField( - choices=[ - ("intent_classification", "Intent Classification"), - ("new_query_system", "New Query System"), - ("follow_up_system", "Follow-up System"), - ("small_talk_capabilities", "Small Talk Capabilities"), - ], - max_length=64, - unique=True, - ), - ), - ("prompt", models.TextField()), - ("created_at", models.DateTimeField(default=django.utils.timezone.now)), - ("updated_at", models.DateTimeField(auto_now=True)), - ], - options={ - "ordering": ["key"], - }, - ), - migrations.RunPython( - seed_dashboard_chat_prompt_templates, - migrations.RunPython.noop, - ), - ] diff --git a/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py b/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py deleted file mode 100644 index 6095fed39..000000000 --- a/ddpui/migrations/0156_dashboardchatsession_vector_collection_name.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 4.2 on 2026-03-23 08:42 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0154_dashboardchatprompttemplate"), - ] - - operations = [ - migrations.AddField( - model_name="dashboardchatsession", - name="vector_collection_name", - field=models.CharField(blank=True, max_length=255, null=True), - ), - ] diff --git a/ddpui/migrations/0157_dashboard_chat_orgdbt_and_org_preferences_changes.py b/ddpui/migrations/0157_dashboard_chat_orgdbt_and_org_preferences_changes.py new file mode 100644 index 000000000..7a84899bd --- /dev/null +++ b/ddpui/migrations/0157_dashboard_chat_orgdbt_and_org_preferences_changes.py @@ -0,0 +1,46 @@ +# Generated by Django 4.2 on 2026-04-07 15:45 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import uuid + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0156_add_comment_snapshot_index"), + ] + + operations = [ + migrations.AddField( + model_name="orgdbt", + name="docs_generated_at", + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name="orgdbt", + name="vector_last_ingested_at", + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name="orgpreferences", + name="ai_data_sharing_consented_at", + field=models.DateTimeField(blank=True, null=True), + ), + migrations.AddField( + model_name="orgpreferences", + name="ai_data_sharing_consented_by", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="ai_data_sharing_consents", + to="ddpui.orguser", + ), + ), + migrations.AddField( + model_name="orgpreferences", + name="ai_data_sharing_enabled", + field=models.BooleanField(default=False), + ), + ] diff --git a/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py b/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py deleted file mode 100644 index c7c63bb0c..000000000 --- a/ddpui/migrations/0157_dashboardchatprompttemplate_final_answer_composition.py +++ /dev/null @@ -1,66 +0,0 @@ -# Generated by Django 4.2 on 2026-03-24 13:15 - -from django.db import migrations, models - -FINAL_ANSWER_COMPOSITION_PROMPT = """You are the final answer writer for Chat with Dashboards. - -You will receive a JSON payload containing: -- the user query -- the routed intent -- a draft tool-loop answer, if any -- retrieved context snippets -- SQL used, if any -- SQL result rows or summaries, if any -- a response format hint -- warnings - -Write the final user-facing answer in markdown. - -CRITICAL RULES: -1. Never output raw JSON objects or raw tool payloads. -2. Never dump SQL result rows verbatim. -3. If `response_format` is `text_with_table` or `table`, write a short narrative summary only. The UI will render the structured table separately. -4. If `response_format` is `text`, answer fully in markdown using headings or bullets when helpful. -5. If the question is explanatory or contextual, answer directly from the provided context and draft answer. Do not append unrelated row data. -6. If no matching rows were found, say so plainly. -7. Use concise, analyst-quality language. Prefer clear interpretation over exhaustive repetition. -8. If the provided result values look like rates or percentages, describe them naturally as percentages when appropriate. -9. Mention important caveats only when they materially affect the answer. - -Return markdown only, with no code fences unless the user explicitly asked for code or SQL.""" - - -def seed_final_answer_composition_prompt(apps, schema_editor): - DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") - DashboardChatPromptTemplate.objects.update_or_create( - key="final_answer_composition", - defaults={"prompt": FINAL_ANSWER_COMPOSITION_PROMPT}, - ) - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0156_dashboardchatsession_vector_collection_name"), - ] - - operations = [ - migrations.AlterField( - model_name="dashboardchatprompttemplate", - name="key", - field=models.CharField( - choices=[ - ("intent_classification", "Intent Classification"), - ("new_query_system", "New Query System"), - ("follow_up_system", "Follow-up System"), - ("final_answer_composition", "Final Answer Composition"), - ("small_talk_capabilities", "Small Talk Capabilities"), - ], - max_length=64, - unique=True, - ), - ), - migrations.RunPython( - seed_final_answer_composition_prompt, - migrations.RunPython.noop, - ), - ] diff --git a/ddpui/migrations/0158_dashboard_chat_ai_contexts_org_and_dashboard.py b/ddpui/migrations/0158_dashboard_chat_ai_contexts_org_and_dashboard.py new file mode 100644 index 000000000..54e58d1ac --- /dev/null +++ b/ddpui/migrations/0158_dashboard_chat_ai_contexts_org_and_dashboard.py @@ -0,0 +1,82 @@ +# Generated by Django 4.2 on 2026-04-07 15:45 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0157_dashboard_chat_orgdbt_and_org_preferences_changes"), + ] + + operations = [ + migrations.CreateModel( + name="OrgAIContext", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("markdown", models.TextField(blank=True, default="")), + ("updated_at", models.DateTimeField(blank=True, null=True)), + ( + "org", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="ai_context", + to="ddpui.org", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="org_ai_context_updates", + to="ddpui.orguser", + ), + ), + ], + options={ + "db_table": "org_ai_context", + }, + ), + migrations.CreateModel( + name="DashboardAIContext", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("markdown", models.TextField(blank=True, default="")), + ("updated_at", models.DateTimeField(blank=True, null=True)), + ( + "dashboard", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="ai_context", + to="ddpui.dashboard", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="dashboard_ai_context_updates", + to="ddpui.orguser", + ), + ), + ], + options={ + "db_table": "dashboard_ai_context", + }, + ), + ] diff --git a/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py b/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py deleted file mode 100644 index 41e42dd09..000000000 --- a/ddpui/migrations/0158_dashboardchatmessage_response_latency_ms_and_more.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 4.2 on 2026-03-26 05:38 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0157_dashboardchatprompttemplate_final_answer_composition"), - ] - - operations = [ - migrations.AddField( - model_name="dashboardchatmessage", - name="response_latency_ms", - field=models.PositiveIntegerField(blank=True, null=True), - ), - migrations.AddField( - model_name="dashboardchatmessage", - name="timing_breakdown", - field=models.JSONField(blank=True, null=True), - ), - ] diff --git a/ddpui/migrations/0159_dashboard_chat_config_tables.py b/ddpui/migrations/0159_dashboard_chat_config_tables.py new file mode 100644 index 000000000..1d68c3ed9 --- /dev/null +++ b/ddpui/migrations/0159_dashboard_chat_config_tables.py @@ -0,0 +1,221 @@ +# Generated by Django 4.2 on 2026-04-07 15:45 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import uuid + + +class Migration(migrations.Migration): + dependencies = [ + ("ddpui", "0158_dashboard_chat_ai_contexts_org_and_dashboard"), + ] + + operations = [ + migrations.CreateModel( + name="DashboardChatMessage", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("sequence_number", models.PositiveIntegerField()), + ( + "role", + models.CharField( + choices=[("user", "USER"), ("assistant", "ASSISTANT")], max_length=20 + ), + ), + ("content", models.TextField(blank=True, default="")), + ("client_message_id", models.CharField(blank=True, max_length=100, null=True)), + ("payload", models.JSONField(blank=True, null=True)), + ( + "feedback", + models.CharField( + blank=True, + choices=[("thumbs_up", "Thumbs Up"), ("thumbs_down", "Thumbs Down")], + max_length=16, + null=True, + ), + ), + ("response_latency_ms", models.PositiveIntegerField(blank=True, null=True)), + ("timing_breakdown", models.JSONField(blank=True, null=True)), + ("created_at", models.DateTimeField(default=django.utils.timezone.now)), + ], + options={ + "db_table": "dashboard_chat_message", + "ordering": ["sequence_number"], + }, + ), + migrations.CreateModel( + name="DashboardChatPromptTemplate", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "key", + models.CharField( + choices=[ + ("intent_classification", "Intent Classification"), + ("new_query_system", "New Query System"), + ("follow_up_system", "Follow-up System"), + ("final_answer_composition", "Final Answer Composition"), + ("small_talk_capabilities", "Small Talk Capabilities"), + ], + max_length=64, + unique=True, + ), + ), + ("prompt", models.TextField()), + ("created_at", models.DateTimeField(default=django.utils.timezone.now)), + ("updated_at", models.DateTimeField(auto_now=True)), + ], + options={ + "db_table": "dashboard_chat_prompt_template", + "ordering": ["key"], + }, + ), + migrations.CreateModel( + name="DashboardChatSession", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("session_id", models.UUIDField(default=uuid.uuid4, editable=False, unique=True)), + ("vector_collection_name", models.CharField(blank=True, max_length=255, null=True)), + ("created_at", models.DateTimeField(default=django.utils.timezone.now)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "dashboard", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="ddpui.dashboard", + ), + ), + ( + "org", + models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="ddpui.org"), + ), + ( + "orguser", + models.ForeignKey( + null=True, on_delete=django.db.models.deletion.SET_NULL, to="ddpui.orguser" + ), + ), + ], + options={ + "db_table": "dashboard_chat_session", + "ordering": ["-updated_at"], + }, + ), + migrations.CreateModel( + name="DashboardChatTurn", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "status", + models.CharField( + choices=[ + ("queued", "Queued"), + ("running", "Running"), + ("cancel_requested", "Cancel Requested"), + ("cancelled", "Cancelled"), + ("completed", "Completed"), + ("failed", "Failed"), + ], + default="queued", + max_length=32, + ), + ), + ("progress_label", models.CharField(blank=True, default="", max_length=255)), + ("error_message", models.TextField(blank=True, default="")), + ("cancel_requested_at", models.DateTimeField(blank=True, null=True)), + ("started_at", models.DateTimeField(blank=True, null=True)), + ("completed_at", models.DateTimeField(blank=True, null=True)), + ("created_at", models.DateTimeField(default=django.utils.timezone.now)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "assistant_message", + models.OneToOneField( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="assistant_turn", + to="ddpui.dashboardchatmessage", + ), + ), + ( + "session", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="turns", + to="ddpui.dashboardchatsession", + ), + ), + ( + "user_message", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + related_name="turn", + to="ddpui.dashboardchatmessage", + ), + ), + ], + options={ + "db_table": "dashboard_chat_turn", + "ordering": ["created_at"], + }, + ), + migrations.AddField( + model_name="dashboardchatmessage", + name="session", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="messages", + to="ddpui.dashboardchatsession", + ), + ), + migrations.AddIndex( + model_name="dashboardchatturn", + index=models.Index(fields=["session", "status"], name="dchat_turn_session_status_idx"), + ), + migrations.AddIndex( + model_name="dashboardchatturn", + index=models.Index(fields=["created_at"], name="dchat_turn_created_idx"), + ), + migrations.AddIndex( + model_name="dashboardchatsession", + index=models.Index( + fields=["org", "dashboard", "created_at"], name="dchat_sess_org_dash_idx" + ), + ), + migrations.AddConstraint( + model_name="dashboardchatmessage", + constraint=models.UniqueConstraint( + fields=("session", "sequence_number"), name="dchat_message_session_seq_unique" + ), + ), + migrations.AddConstraint( + model_name="dashboardchatmessage", + constraint=models.UniqueConstraint( + condition=models.Q(("client_message_id__isnull", False)), + fields=("session", "client_message_id"), + name="dchat_message_session_client_msg_unique", + ), + ), + ] diff --git a/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py b/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py deleted file mode 100644 index f94b69542..000000000 --- a/ddpui/migrations/0159_dashboardchatprompttemplate_intent_follow_up_refs.py +++ /dev/null @@ -1,157 +0,0 @@ -# Generated by Django 4.2 on 2026-03-27 19:00 - -from django.db import migrations - - -INTENT_CLASSIFICATION_PROMPT = """# Enhanced Intent Classification System Prompt - -You are an intent classification agent for a "Chat with Dashboards" system. Your job is to classify user queries about the CURRENT dashboard, its charts, its datasets, the dbt models that power it, and the organization/dashboard context attached to it. Questions about other dashboards, similar dashboards, or dashboards beyond the current one are **irrelevant**. - -## Intent Categories - -1. **query_with_sql** - Needs data analysis (numbers, trends, rankings, breakdowns, comparisons) -2. **query_without_sql** - Can be answered from metadata (definitions, calculation logic, chart explanations) -3. **follow_up_sql** - Follow-up query that modifies previous SQL query (add dimension, filter, timeframe) -4. **follow_up_context** - Follow-up requesting more explanation about previous results -5. **needs_clarification** - Question is too vague or ambiguous -6. **small_talk** - Greetings, jokes, non-business conversation -7. **irrelevant** - Questions outside the current dashboard's scope, including requests about other dashboards - -## Classification Guidelines - -**query_with_sql** examples: -- "How many students are in the EcoChamps program?" -- "Show me session completion trends over time" -- "Top 10 schools by assessment performance" -- "Compare reading comprehension by city" -- "What's the monthly breakdown of planned vs conducted sessions?" - -**query_without_sql** examples: -- "What does 'planned_session' mean?" -- "How is reading comprehension calculated?" -- "Which dataset powers the student count chart?" -- "What metrics are available in this dashboard?" -- "Explain what this chart shows" -- "What is the mission and vision of Bhumi?" -- "Summarize the Bhumi programs described in the context file" - -**follow_up_sql** examples (requires previous SQL context): -- "Now split by chapter" (add dimension) -- "Filter to CGI donors only" (add filter) -- "Same but for last quarter" (modify timeframe) -- "Show weekly instead" (change aggregation) -- "Which districts are these facilitators from?" (use the facilitators returned in the previous result) -- "Which programs are those students in?" (expand the previously returned entity set with a new dimension) -- "Which states are they from?" (resolve the pronoun from the immediately previous result set) - -**follow_up_context** examples (requires previous context): -- "Explain that metric" -- "How is that calculated?" -- "What does that mean?" -- "Tell me more about that" - -**needs_clarification** examples: -- "Is performance improving?" (missing: which metric, time period) -- "Show me the data" (missing: which data, program) -- "What's the biggest issue?" (missing: context, metric) - -## Follow-up Detection - -When conversation history is available, classify as follow-up **only if the new query depends on the previous turn**. Use all three tests: -1. Explicit reference to prior output ("that", "same", "those results", "the previous query"). -2. Modification language applied to prior query ("now split by", "filter that", "same but", "add chapter", "remove donor"). -3. Explanations about prior output ("explain that", "what does that mean"). - -If the question can stand alone and be answered without previous context, treat it as a new `query_with_sql` or `query_without_sql`, **not** follow_up_sql/follow_up_context. - -If so, classify as follow_up_sql or follow_up_context based on whether SQL modification is needed. - -## Current-Dashboard Boundary - -- Treat requests about "other dashboards", "related dashboards", "similar dashboards", or "which dashboard should I look at" as **irrelevant**. -- Treat requests that compare this dashboard to some other dashboard as **irrelevant** unless the question can be answered entirely from the current dashboard's own data and context. -- The assistant is scoped to one dashboard only. - -## Output Format - -Respond with valid JSON only: - -For new queries: -```json -{ - "intent": "query_with_sql", - "confidence": 0.9, - "reason": "User is asking for specific numbers requiring data analysis", - "force_tool_usage": true, - "follow_up_context": { - "is_follow_up": false, - "follow_up_type": null, - "reusable_elements": {}, - "modification_instruction": null - } -} -``` - -For follow-up queries: -```json -{ - "intent": "follow_up_sql", - "confidence": 0.95, - "reason": "User wants to modify previous query by adding dimension", - "force_tool_usage": true, - "follow_up_context": { - "is_follow_up": true, - "follow_up_type": "add_dimension", - "reusable_elements": { - "previous_sql": "from conversation context", - "previous_tables": ["staging.eco_student25_26_stg"], - "add_instruction": "group by chapter" - }, - "modification_instruction": "split by chapter" - } -} -``` - -## Tool Usage Rules - -Set `force_tool_usage: true` for: -- All query_with_sql intents -- All follow_up_sql intents -- query_without_sql when specific chart/dataset lookup needed - -Set `force_tool_usage: false` for: -- small_talk, needs_clarification, irrelevant -- query_without_sql for general explanation questions - -## Context Awareness - -Use conversation history to: -- Detect follow-up patterns -- Understand context references ("that metric", "same query") -- Resolve referential follow-ups that point to the immediately previous result set - ("these facilitators", "those students", "they", "them", "that result") -- Determine if SQL modification or explanation is needed -- Extract reusable elements (tables, metrics, filters) from previous queries - -Classify the following user query:""" - - -def update_intent_classification_prompt(apps, schema_editor): - DashboardChatPromptTemplate = apps.get_model("ddpui", "DashboardChatPromptTemplate") - DashboardChatPromptTemplate.objects.update_or_create( - key="intent_classification", - defaults={"prompt": INTENT_CLASSIFICATION_PROMPT}, - ) - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0158_dashboardchatmessage_response_latency_ms_and_more"), - ] - - operations = [ - migrations.RunPython( - update_intent_classification_prompt, - migrations.RunPython.noop, - ), - ] diff --git a/ddpui/migrations/0160_dashboardchatturn.py b/ddpui/migrations/0160_dashboardchatturn.py deleted file mode 100644 index 9ff84e224..000000000 --- a/ddpui/migrations/0160_dashboardchatturn.py +++ /dev/null @@ -1,116 +0,0 @@ -# Generated by Django 4.2 on 2026-04-06 15:30 - -import django.db.models.deletion -from django.db import migrations, models -import django.utils.timezone - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0156_add_comment_snapshot_index"), - ("ddpui", "0159_dashboardchatprompttemplate_intent_follow_up_refs"), - ] - - operations = [ - migrations.CreateModel( - name="DashboardChatTurn", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "status", - models.CharField( - choices=[ - ("queued", "Queued"), - ("running", "Running"), - ("cancel_requested", "Cancel Requested"), - ("cancelled", "Cancelled"), - ("completed", "Completed"), - ("failed", "Failed"), - ], - default="queued", - max_length=32, - ), - ), - ( - "progress_label", - models.CharField(blank=True, default="", max_length=255), - ), - ( - "error_message", - models.TextField(blank=True, default=""), - ), - ( - "cancel_requested_at", - models.DateTimeField(blank=True, null=True), - ), - ( - "started_at", - models.DateTimeField(blank=True, null=True), - ), - ( - "completed_at", - models.DateTimeField(blank=True, null=True), - ), - ( - "created_at", - models.DateTimeField(default=django.utils.timezone.now), - ), - ( - "updated_at", - models.DateTimeField(auto_now=True), - ), - ( - "assistant_message", - models.OneToOneField( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="assistant_turn", - to="ddpui.dashboardchatmessage", - ), - ), - ( - "session", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="turns", - to="ddpui.dashboardchatsession", - ), - ), - ( - "user_message", - models.OneToOneField( - on_delete=django.db.models.deletion.CASCADE, - related_name="turn", - to="ddpui.dashboardchatmessage", - ), - ), - ], - options={ - "db_table": "dashboard_chat_turn", - "ordering": ["created_at"], - }, - ), - migrations.AddIndex( - model_name="dashboardchatturn", - index=models.Index( - fields=["session", "status"], - name="dchat_turn_session_status_idx", - ), - ), - migrations.AddIndex( - model_name="dashboardchatturn", - index=models.Index( - fields=["created_at"], - name="dchat_turn_created_idx", - ), - ), - ] diff --git a/ddpui/migrations/0161_dashboardchatmessage_feedback.py b/ddpui/migrations/0161_dashboardchatmessage_feedback.py deleted file mode 100644 index ef1d18982..000000000 --- a/ddpui/migrations/0161_dashboardchatmessage_feedback.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 4.2 on 2026-04-06 06:52 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ddpui", "0160_dashboardchatturn"), - ] - - operations = [ - migrations.AddField( - model_name="dashboardchatmessage", - name="feedback", - field=models.CharField( - blank=True, - choices=[("thumbs_up", "Thumbs Up"), ("thumbs_down", "Thumbs Down")], - max_length=16, - null=True, - ), - ), - ] diff --git a/ddpui/models/dashboard_chat.py b/ddpui/models/dashboard_chat.py index 6c2cd13ac..97bbeb599 100644 --- a/ddpui/models/dashboard_chat.py +++ b/ddpui/models/dashboard_chat.py @@ -76,6 +76,7 @@ class DashboardChatPromptTemplate(models.Model): updated_at = models.DateTimeField(auto_now=True) class Meta: + db_table = "dashboard_chat_prompt_template" ordering = ["key"] @@ -93,6 +94,9 @@ class OrgAIContext(models.Model): ) updated_at = models.DateTimeField(null=True, blank=True) + class Meta: + db_table = "org_ai_context" + class DashboardAIContext(models.Model): """Dashboard-level markdown context used by dashboard chat.""" @@ -112,6 +116,9 @@ class DashboardAIContext(models.Model): ) updated_at = models.DateTimeField(null=True, blank=True) + class Meta: + db_table = "dashboard_ai_context" + class DashboardChatSession(models.Model): """Groups dashboard chat messages under one org/dashboard conversation.""" @@ -159,6 +166,7 @@ class DashboardChatMessage(models.Model): created_at = models.DateTimeField(default=timezone.now) class Meta: + db_table = "dashboard_chat_message" ordering = ["sequence_number"] constraints = [ models.UniqueConstraint( diff --git a/docs/docs/features/dashboard-chat-chromadb-sizing.md b/docs/docs/features/dashboard-chat-chromadb-sizing.md new file mode 100644 index 000000000..8cc863eb8 --- /dev/null +++ b/docs/docs/features/dashboard-chat-chromadb-sizing.md @@ -0,0 +1,80 @@ +# ChromaDB Sizing — Dashboard Chat Feature + +## Context + +- Embedding model: `text-embedding-3-small` — **1536 dimensions** +- One ChromaDB collection per org per version (multi-tenant) +- Collection name format: `org_{id}__{timestamp}` — new collection created on every dbt docs rebuild +- Active sessions pin their collection — up to **2 collections per org** can be live simultaneously (old pinned by active sessions + new from latest rebuild) +- Old collections are garbage collected after **24 hours** of session inactivity +- Concurrent users: **~4 per org** +- Target scale: **50 orgs** +- ChromaDB runs as a remote HTTP server (`AI_DASHBOARD_CHAT_CHROMA_HOST`) + +--- + +## Memory Calculation (50 orgs) + +``` +total vectors = 50 orgs × 300 docs × 2 collections = 30,000 vectors +raw payload = 30,000 × 1,536 dims × 4 bytes = ~185 MB +with +30% overhead = ~240 MB +ChromaDB process base = ~300 MB +OS reservation = ~1 GB +───────────────────────────────────────────────────────────────── +Total = ~1.5 GB +``` + +--- + +## CPU Calculation (50 orgs, peak) + +``` +4 users/org × 10 active orgs × 3 queries/turn = 120 concurrent queries +``` + +Chroma queries parallelise up to vCPU count. Beyond that, queries queue. + +--- + +## Machine Recommendation + +### Recommended: `t3.medium` (2 vCPU, 4 GB RAM, ~$31/month) + +- 4 GB RAM comfortably covers 1.5 GB with headroom +- 2 vCPUs means queries queue at peak but requests do not fail — just slower during bursts +- **Schedule dbt rebuilds off-hours** — rebuild is bursty (all dbt models ingested at once) and will saturate 2 vCPUs while it runs + +### If query latency at peak matters: `t3.xlarge` (4 vCPU, 16 GB RAM, ~$122/month) + +- 4 vCPUs eliminates most queuing at 120 concurrent queries +- ~$90/month more than t3.medium + +--- + +## Why a Separate Machine + +1. **Platform isolation** — memory spikes during vector rebuild must not OOM the Django process serving all orgs +2. **Independent scaling** — memory grows linearly with org count; scale ChromaDB independently +3. **Already designed for it** — `AI_DASHBOARD_CHAT_CHROMA_HOST` and `AI_DASHBOARD_CHAT_CHROMA_PORT` env vars are in place + +--- + +## LRU Cache — Required + +Without LRU, all accessed collections stay in memory permanently. Must be configured: + +```bash +CHROMA_SEGMENT_CACHE_POLICY=LRU +CHROMA_MEMORY_LIMIT_BYTES=3000000000 # 3 GB +``` + +> **Warning:** LRU enforcement is unreliable in Chroma v0.5.x (GitHub issue #1323). Verify your version and monitor memory in production. + +--- + +## Sources + +- [ChromaDB Single-Node Performance Benchmarks](https://docs.trychroma.com/guides/deploy/performance) +- [ChromaDB Resource Requirements — Cookbook](https://cookbook.chromadb.dev/core/resources/) +- [ChromaDB Memory Management (LRU)](https://cookbook.chromadb.dev/strategies/memory-management/) From b7175358261d934150ab6bfe37431a3ce4eebb9e Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 8 Apr 2026 09:46:11 +0530 Subject: [PATCH 45/49] build: upgrade chromadb to 1.5.6 --- ddpui/utils/vector/backends/chroma.py | 11 +- pyproject.toml | 2 +- uv.lock | 325 ++++++++++++++++---------- 3 files changed, 203 insertions(+), 135 deletions(-) diff --git a/ddpui/utils/vector/backends/chroma.py b/ddpui/utils/vector/backends/chroma.py index f9a7e79ba..31602a53f 100644 --- a/ddpui/utils/vector/backends/chroma.py +++ b/ddpui/utils/vector/backends/chroma.py @@ -4,7 +4,7 @@ from typing import Any, Union from chromadb import ClientAPI, Collection, HttpClient -from chromadb.errors import InvalidCollectionException, NotFoundError +from chromadb.errors import NotFoundError from ddpui.utils.vector.interface import VectorStore, VectorQueryResult, VectorStoredDocument @@ -39,12 +39,9 @@ def create_collection( def load_collection(self, name: str) -> Union[Collection, None]: try: return self.client.get_collection(name=name) - # In chromadb==0.6.3, get_collection() can raise: - # - InvalidCollectionException when the named collection does not exist - # - NotFoundError when the HTTP/sysdb layer reports a missing resource - # while resolving that collection lookup - # For our store interface, both cases mean "this collection is absent". - except (InvalidCollectionException, NotFoundError): + # In recent Chroma releases, a missing collection resolves as NotFoundError. + # For our store interface, that means "this collection is absent". + except NotFoundError: return None def delete_collection(self, name: str) -> bool: diff --git a/pyproject.toml b/pyproject.toml index eefaeea1d..3ca8e105b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "channels==4.1.0", "channels-redis==4.2.0", "charset-normalizer==3.1.0", - "chromadb==0.6.3", + "chromadb==1.5.6", "click==8.1.3", "click-didyoumean==0.3.0", "click-plugins==1.1.1", diff --git a/uv.lock b/uv.lock index e3730807c..644767a9d 100644 --- a/uv.lock +++ b/uv.lock @@ -99,15 +99,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a3/e7b3b9d34239bae066df135060e225929d639731050c920fdc740d6b7897/amqp-5.1.1-py3-none-any.whl", hash = "sha256:6f0956d2c23d8fa6e7691934d8c3930eadb44972cbbd1a7ae3a520f735d43359", size = 50810, upload-time = "2022-04-17T06:39:09.3Z" }, ] -[[package]] -name = "annotated-doc" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -590,44 +581,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/81/14b3b8f01ddaddad6cdec97f2f599aa2fa466bd5ee9af99b08b7713ccd29/charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d", size = 46166, upload-time = "2023-03-06T09:49:36.848Z" }, ] -[[package]] -name = "chroma-hnswlib" -version = "0.7.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/73/09/10d57569e399ce9cbc5eee2134996581c957f63a9addfa6ca657daf006b8/chroma_hnswlib-0.7.6.tar.gz", hash = "sha256:4dce282543039681160259d29fcde6151cc9106c6461e0485f57cdccd83059b7", size = 32256, upload-time = "2024-07-22T20:19:29.259Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/74/b9dde05ea8685d2f8c4681b517e61c7887e974f6272bb24ebc8f2105875b/chroma_hnswlib-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f35192fbbeadc8c0633f0a69c3d3e9f1a4eab3a46b65458bbcbcabdd9e895c36", size = 195821, upload-time = "2024-07-22T20:18:26.163Z" }, - { url = "https://files.pythonhosted.org/packages/fd/58/101bfa6bc41bc6cc55fbb5103c75462a7bf882e1704256eb4934df85b6a8/chroma_hnswlib-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f007b608c96362b8f0c8b6b2ac94f67f83fcbabd857c378ae82007ec92f4d82", size = 183854, upload-time = "2024-07-22T20:18:27.6Z" }, - { url = "https://files.pythonhosted.org/packages/17/ff/95d49bb5ce134f10d6aa08d5f3bec624eaff945f0b17d8c3fce888b9a54a/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:456fd88fa0d14e6b385358515aef69fc89b3c2191706fd9aee62087b62aad09c", size = 2358774, upload-time = "2024-07-22T20:18:29.161Z" }, - { url = "https://files.pythonhosted.org/packages/3a/6d/27826180a54df80dbba8a4f338b022ba21c0c8af96fd08ff8510626dee8f/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dfaae825499c2beaa3b75a12d7ec713b64226df72a5c4097203e3ed532680da", size = 2392739, upload-time = "2024-07-22T20:18:30.938Z" }, - { url = "https://files.pythonhosted.org/packages/d6/63/ee3e8b7a8f931918755faacf783093b61f32f59042769d9db615999c3de0/chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:2487201982241fb1581be26524145092c95902cb09fc2646ccfbc407de3328ec", size = 150955, upload-time = "2024-07-22T20:18:32.268Z" }, - { url = "https://files.pythonhosted.org/packages/f5/af/d15fdfed2a204c0f9467ad35084fbac894c755820b203e62f5dcba2d41f1/chroma_hnswlib-0.7.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81181d54a2b1e4727369486a631f977ffc53c5533d26e3d366dda243fb0998ca", size = 196911, upload-time = "2024-07-22T20:18:33.46Z" }, - { url = "https://files.pythonhosted.org/packages/0d/19/aa6f2139f1ff7ad23a690ebf2a511b2594ab359915d7979f76f3213e46c4/chroma_hnswlib-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4ab4e11f1083dd0a11ee4f0e0b183ca9f0f2ed63ededba1935b13ce2b3606f", size = 185000, upload-time = "2024-07-22T20:18:36.16Z" }, - { url = "https://files.pythonhosted.org/packages/79/b1/1b269c750e985ec7d40b9bbe7d66d0a890e420525187786718e7f6b07913/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53db45cd9173d95b4b0bdccb4dbff4c54a42b51420599c32267f3abbeb795170", size = 2377289, upload-time = "2024-07-22T20:18:37.761Z" }, - { url = "https://files.pythonhosted.org/packages/c7/2d/d5663e134436e5933bc63516a20b5edc08b4c1b1588b9680908a5f1afd04/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c093f07a010b499c00a15bc9376036ee4800d335360570b14f7fe92badcdcf9", size = 2411755, upload-time = "2024-07-22T20:18:39.949Z" }, - { url = "https://files.pythonhosted.org/packages/3e/79/1bce519cf186112d6d5ce2985392a89528c6e1e9332d680bf752694a4cdf/chroma_hnswlib-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:0540b0ac96e47d0aa39e88ea4714358ae05d64bbe6bf33c52f316c664190a6a3", size = 151888, upload-time = "2024-07-22T20:18:45.003Z" }, - { url = "https://files.pythonhosted.org/packages/93/ac/782b8d72de1c57b64fdf5cb94711540db99a92768d93d973174c62d45eb8/chroma_hnswlib-0.7.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e87e9b616c281bfbe748d01705817c71211613c3b063021f7ed5e47173556cb7", size = 197804, upload-time = "2024-07-22T20:18:46.442Z" }, - { url = "https://files.pythonhosted.org/packages/32/4e/fd9ce0764228e9a98f6ff46af05e92804090b5557035968c5b4198bc7af9/chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec5ca25bc7b66d2ecbf14502b5729cde25f70945d22f2aaf523c2d747ea68912", size = 185421, upload-time = "2024-07-22T20:18:47.72Z" }, - { url = "https://files.pythonhosted.org/packages/d9/3d/b59a8dedebd82545d873235ef2d06f95be244dfece7ee4a1a6044f080b18/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305ae491de9d5f3c51e8bd52d84fdf2545a4a2bc7af49765cda286b7bb30b1d4", size = 2389672, upload-time = "2024-07-22T20:18:49.583Z" }, - { url = "https://files.pythonhosted.org/packages/74/1e/80a033ea4466338824974a34f418e7b034a7748bf906f56466f5caa434b0/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:822ede968d25a2c88823ca078a58f92c9b5c4142e38c7c8b4c48178894a0a3c5", size = 2436986, upload-time = "2024-07-22T20:18:51.872Z" }, -] - [[package]] name = "chromadb" -version = "0.6.3" +version = "1.5.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "bcrypt" }, { name = "build" }, - { name = "chroma-hnswlib" }, - { name = "fastapi" }, { name = "grpcio" }, { name = "httpx" }, { name = "importlib-resources" }, + { name = "jsonschema" }, { name = "kubernetes" }, { name = "mmh3" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -635,12 +599,12 @@ dependencies = [ { name = "onnxruntime" }, { name = "opentelemetry-api" }, { name = "opentelemetry-exporter-otlp-proto-grpc" }, - { name = "opentelemetry-instrumentation-fastapi" }, { name = "opentelemetry-sdk" }, { name = "orjson" }, { name = "overrides" }, - { name = "posthog" }, + { name = "pybase64" }, { name = "pydantic" }, + { name = "pydantic-settings" }, { name = "pypika" }, { name = "pyyaml" }, { name = "rich" }, @@ -651,9 +615,13 @@ dependencies = [ { name = "typing-extensions" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/39/cd/f0f2de3f466ff514fb6b58271c14f6d22198402bb5b71b8d890231265946/chromadb-0.6.3.tar.gz", hash = "sha256:c8f34c0b704b9108b04491480a36d42e894a960429f87c6516027b5481d59ed3", size = 29297929, upload-time = "2025-01-14T22:20:40.184Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/52/73280325b5cabb4a9fb5c37a3b57144f6cc8c5863748444c52f47ae736f3/chromadb-1.5.6.tar.gz", hash = "sha256:fff5ea5c93d3ec2058619db652715fdc521bbe5c7bac7cc26647bcb937f75c4c", size = 2475230, upload-time = "2026-04-07T03:00:23.959Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/8e/5c186c77bf749b6fe0528385e507e463f1667543328d76fd00a49e1a4e6a/chromadb-0.6.3-py3-none-any.whl", hash = "sha256:4851258489a3612b558488d98d09ae0fe0a28d5cad6bd1ba64b96fdc419dc0e5", size = 611129, upload-time = "2025-01-14T22:20:33.784Z" }, + { url = "https://files.pythonhosted.org/packages/79/bf/66dcbe7f387ed4a39c89c2816496f74d7c7007b742df1b06fea8f48a478e/chromadb-1.5.6-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:1a6469c57e0bf8f4c3ac918ae6f2e598af505648135ed23d716cf5993d8660b7", size = 21647992, upload-time = "2026-04-07T03:00:21.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/e8/66cc0c8cbc65257002223eb2f050a703128b40e5d997c03df6400b8f63fa/chromadb-1.5.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:8027d891d5ff0f99c19403f5133cbddd31a19b0537a7d2e974a85ed5af461be2", size = 20821587, upload-time = "2026-04-07T03:00:18.408Z" }, + { url = "https://files.pythonhosted.org/packages/93/68/d80940279ed39c6a9b280811434f89932df7a49dde7b4b46358fd46cd91d/chromadb-1.5.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f849351f3ebe17ee4da218e98f5611975c4b8c2265ee3c98b15df8a0bab6519", size = 21827142, upload-time = "2026-04-07T03:00:11.411Z" }, + { url = "https://files.pythonhosted.org/packages/7b/61/0705612adbc8435dbb60bfdc613c9bdec3cafcc59b2a5b161fe633cf4238/chromadb-1.5.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c00b1d60e665243cfbc6bfa44e6deb855faa654f80e79f2c5ff02456f245ed5", size = 22447796, upload-time = "2026-04-07T03:00:14.683Z" }, + { url = "https://files.pythonhosted.org/packages/3d/60/5cff40b4e04ae6ad39c2d16371f13cc63029d16ef9967e3c233d6b385974/chromadb-1.5.6-cp39-abi3-win_amd64.whl", hash = "sha256:4bff2dd1a9e07178df149f955ea239ee801596eb1eb628a65a88a67fc4c8dda1", size = 22490932, upload-time = "2026-04-07T03:00:26Z" }, ] [[package]] @@ -1401,7 +1369,7 @@ requires-dist = [ { name = "channels", specifier = "==4.1.0" }, { name = "channels-redis", specifier = "==4.2.0" }, { name = "charset-normalizer", specifier = "==3.1.0" }, - { name = "chromadb", specifier = "==0.6.3" }, + { name = "chromadb", specifier = "==1.5.6" }, { name = "click", specifier = "==8.1.3" }, { name = "click-didyoumean", specifier = "==0.3.0" }, { name = "click-plugins", specifier = "==1.1.1" }, @@ -1883,22 +1851,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/8a/7e22279c61f3caee0f99776d8b2dfd412480d0998dad6e31552837e9550b/Faker-17.6.0-py3-none-any.whl", hash = "sha256:5aaa16fa9cfde7d117eef70b6b293a705021e57158f3fa6b44ed1b70202d2065", size = 1698455, upload-time = "2023-03-03T16:59:14.857Z" }, ] -[[package]] -name = "fastapi" -version = "0.135.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "annotated-doc" }, - { name = "pydantic" }, - { name = "starlette" }, - { name = "typing-extensions" }, - { name = "typing-inspection" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/e6/7adb4c5fa231e82c35b8f5741a9f2d055f520c29af5546fd70d3e8e1cd2e/fastapi-0.135.3.tar.gz", hash = "sha256:bd6d7caf1a2bdd8d676843cdcd2287729572a1ef524fc4d65c17ae002a1be654", size = 396524, upload-time = "2026-04-01T16:23:58.188Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/a4/5caa2de7f917a04ada20018eccf60d6cc6145b0199d55ca3711b0fc08312/fastapi-0.135.3-py3-none-any.whl", hash = "sha256:9b0f590c813acd13d0ab43dd8494138eb58e484bfac405db1f3187cfc5810d98", size = 117734, upload-time = "2026-04-01T16:23:59.328Z" }, -] - [[package]] name = "filelock" version = "3.16.1" @@ -3717,52 +3669,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/80/32217460c2c64c0568cea38410124ff680a9b65f6732867bbf857c4d8626/opentelemetry_exporter_otlp_proto_grpc-1.27.0-py3-none-any.whl", hash = "sha256:56b5bbd5d61aab05e300d9d62a6b3c134827bbd28d0b12f2649c2da368006c9e", size = 18541, upload-time = "2024-08-28T21:35:06.493Z" }, ] -[[package]] -name = "opentelemetry-instrumentation" -version = "0.48b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "setuptools" }, - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/04/0e/d9394839af5d55c8feb3b22cd11138b953b49739b20678ca96289e30f904/opentelemetry_instrumentation-0.48b0.tar.gz", hash = "sha256:94929685d906380743a71c3970f76b5f07476eea1834abd5dd9d17abfe23cc35", size = 24724, upload-time = "2024-08-28T21:27:42.82Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/7f/405c41d4f359121376c9d5117dcf68149b8122d3f6c718996d037bd4d800/opentelemetry_instrumentation-0.48b0-py3-none-any.whl", hash = "sha256:a69750dc4ba6a5c3eb67986a337185a25b739966d80479befe37b546fc870b44", size = 29449, upload-time = "2024-08-28T21:26:31.288Z" }, -] - -[[package]] -name = "opentelemetry-instrumentation-asgi" -version = "0.48b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "asgiref" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-instrumentation" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "opentelemetry-util-http" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/44/ac/fd3d40bab3234ec3f5c052a815100676baaae1832fa1067935f11e5c59c6/opentelemetry_instrumentation_asgi-0.48b0.tar.gz", hash = "sha256:04c32174b23c7fa72ddfe192dad874954968a6a924608079af9952964ecdf785", size = 23435, upload-time = "2024-08-28T21:27:47.276Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/74/a0e0d38622856597dd8e630f2bd793760485eb165708e11b8be1696bbb5a/opentelemetry_instrumentation_asgi-0.48b0-py3-none-any.whl", hash = "sha256:ddb1b5fc800ae66e85a4e2eca4d9ecd66367a8c7b556169d9e7b57e10676e44d", size = 15958, upload-time = "2024-08-28T21:26:38.139Z" }, -] - -[[package]] -name = "opentelemetry-instrumentation-fastapi" -version = "0.48b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-instrumentation" }, - { name = "opentelemetry-instrumentation-asgi" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "opentelemetry-util-http" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/58/20/43477da5850ef2cd3792715d442aecd051e885e0603b6ee5783b2104ba8f/opentelemetry_instrumentation_fastapi-0.48b0.tar.gz", hash = "sha256:21a72563ea412c0b535815aeed75fc580240f1f02ebc72381cfab672648637a2", size = 18497, upload-time = "2024-08-28T21:28:01.14Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/50/745ab075a3041b7a5f29a579d2c28eaad54f64b4589d8f9fd364c62cf0f3/opentelemetry_instrumentation_fastapi-0.48b0-py3-none-any.whl", hash = "sha256:afeb820a59e139d3e5d96619600f11ce0187658b8ae9e3480857dd790bc024f2", size = 11777, upload-time = "2024-08-28T21:26:57.457Z" }, -] - [[package]] name = "opentelemetry-proto" version = "1.27.0" @@ -3802,15 +3708,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/7a/4f0063dbb0b6c971568291a8bc19a4ca70d3c185db2d956230dd67429dfc/opentelemetry_semantic_conventions-0.48b0-py3-none-any.whl", hash = "sha256:a0de9f45c413a8669788a38569c7e0a11ce6ce97861a628cca785deecdc32a1f", size = 149685, upload-time = "2024-08-28T21:35:25.983Z" }, ] -[[package]] -name = "opentelemetry-util-http" -version = "0.48b0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/d7/185c494754340e0a3928fd39fde2616ee78f2c9d66253affaad62d5b7935/opentelemetry_util_http-0.48b0.tar.gz", hash = "sha256:60312015153580cc20f322e5cdc3d3ecad80a71743235bdb77716e742814623c", size = 7863, upload-time = "2024-08-28T21:28:27.266Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/2e/36097c0a4d0115b8c7e377c90bab7783ac183bc5cb4071308f8959454311/opentelemetry_util_http-0.48b0-py3-none-any.whl", hash = "sha256:76f598af93aab50328d2a69c786beaedc8b6a7770f7a818cc307eb353debfffb", size = 6946, upload-time = "2024-08-28T21:27:37.975Z" }, -] - [[package]] name = "ordered-set" version = "4.1.0" @@ -4476,6 +4373,179 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, ] +[[package]] +name = "pybase64" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/b8/4ed5c7ad5ec15b08d35cc79ace6145d5c1ae426e46435f4987379439dfea/pybase64-1.4.3.tar.gz", hash = "sha256:c2ed274c9e0ba9c8f9c4083cfe265e66dd679126cd9c2027965d807352f3f053", size = 137272, upload-time = "2025-12-06T13:27:04.013Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/47/16d7af6fae7803f4c691856bc0d8d433ccf30e106432e2ef7707ee19a38a/pybase64-1.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f63aa7f29139b8a05ce5f97cdb7fad63d29071e5bdc8a638a343311fe996112a", size = 38241, upload-time = "2025-12-06T13:22:27.396Z" }, + { url = "https://files.pythonhosted.org/packages/4d/3e/268beb8d2240ab55396af4d1b45d2494935982212549b92a5f5b57079bd3/pybase64-1.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5943ec1ae87a8b4fe310905bb57205ea4330c75e2c628433a7d9dd52295b588", size = 31672, upload-time = "2025-12-06T13:22:28.854Z" }, + { url = "https://files.pythonhosted.org/packages/80/14/4365fa33222edcc46b6db4973f9e22bda82adfb6ab2a01afff591f1e41c8/pybase64-1.4.3-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:5f2b8aef86f35cd5894c13681faf433a1fffc5b2e76544dcb5416a514a1a8347", size = 65978, upload-time = "2025-12-06T13:22:30.191Z" }, + { url = "https://files.pythonhosted.org/packages/1c/22/e89739d8bc9b96c68ead44b4eec42fe555683d9997e4ba65216d384920fc/pybase64-1.4.3-cp310-cp310-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6ec7e53dd09b0a8116ccf5c3265c7c7fce13c980747525be76902aef36a514a", size = 68903, upload-time = "2025-12-06T13:22:31.29Z" }, + { url = "https://files.pythonhosted.org/packages/77/e1/7e59a19f8999cdefe9eb0d56bfd701dd38263b0f6fb4a4d29fce165a1b36/pybase64-1.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7528604cd69c538e1dbaafded46e9e4915a2adcd6f2a60fcef6390d87ca922ea", size = 57516, upload-time = "2025-12-06T13:22:32.395Z" }, + { url = "https://files.pythonhosted.org/packages/42/ad/f47dc7e6fe32022b176868b88b671a32dab389718c8ca905cab79280aaaf/pybase64-1.4.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:4ec645f32b50593879031e09158f8681a1db9f5df0f72af86b3969a1c5d1fa2b", size = 54533, upload-time = "2025-12-06T13:22:33.457Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/7ab312b5a324833953b00e47b23eb4f83d45bd5c5c854b4b4e51b2a0cf5b/pybase64-1.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:634a000c5b3485ccc18bb9b244e0124f74b6fbc7f43eade815170237a7b34c64", size = 57187, upload-time = "2025-12-06T13:22:34.566Z" }, + { url = "https://files.pythonhosted.org/packages/2c/84/80acab1fcbaaae103e6b862ef5019192c8f2cd8758433595a202179a0d1d/pybase64-1.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:309ea32ad07639a485580af1be0ad447a434deb1924e76adced63ac2319cfe15", size = 57730, upload-time = "2025-12-06T13:22:35.581Z" }, + { url = "https://files.pythonhosted.org/packages/1f/24/84256d472400ea3163d7d69c44bb7e2e1027f0f1d4d20c47629a7dc4578e/pybase64-1.4.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:d10d517566b748d3f25f6ac7162af779360c1c6426ad5f962927ee205990d27c", size = 53036, upload-time = "2025-12-06T13:22:36.621Z" }, + { url = "https://files.pythonhosted.org/packages/a3/0f/33aecbed312ee0431798a73fa25e00dedbffdd91389ee23121fed397c550/pybase64-1.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a74cc0f4d835400857cc5c6d27ec854f7949491e07a04e6d66e2137812831f4c", size = 56321, upload-time = "2025-12-06T13:22:37.7Z" }, + { url = "https://files.pythonhosted.org/packages/dc/1c/a341b050746658cbec8cab3c733aeb3ef52ce8f11e60d0d47adbdf729ebf/pybase64-1.4.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1b591d774ac09d5eb73c156a03277cb271438fbd8042bae4109ff3a827cd218c", size = 50114, upload-time = "2025-12-06T13:22:38.752Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d3/f7e6680ae6dc4ddff39112ad66e0fa6b2ec346e73881bafc08498c560bc0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5eb588d35a04302ef6157d17db62354a787ac6f8b1585dd0b90c33d63a97a550", size = 66570, upload-time = "2025-12-06T13:22:40.221Z" }, + { url = "https://files.pythonhosted.org/packages/4c/71/774748eecc7fe23869b7e5df028e3c4c2efa16b506b83ea3fa035ea95dc2/pybase64-1.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:df8b122d5be2c96962231cc4831d9c2e1eae6736fb12850cec4356d8b06fe6f8", size = 55700, upload-time = "2025-12-06T13:22:41.289Z" }, + { url = "https://files.pythonhosted.org/packages/b3/91/dd15075bb2fe0086193e1cd4bad80a43652c38d8a572f9218d46ba721802/pybase64-1.4.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:31b7a85c661fc591bbcce82fb8adaebe2941e6a83b08444b0957b77380452a4b", size = 52491, upload-time = "2025-12-06T13:22:42.628Z" }, + { url = "https://files.pythonhosted.org/packages/7b/27/f357d63ea3774c937fc47160e040419ed528827aa3d4306d5ec9826259c0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e6d7beaae65979fef250e25e66cf81c68a8f81910bcda1a2f43297ab486a7e4e", size = 53957, upload-time = "2025-12-06T13:22:44.615Z" }, + { url = "https://files.pythonhosted.org/packages/b3/c3/243693771701a54e67ff5ccbf4c038344f429613f5643169a7befc51f007/pybase64-1.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4a6276bc3a3962d172a2b5aba544d89881c4037ea954517b86b00892c703d007", size = 68422, upload-time = "2025-12-06T13:22:45.641Z" }, + { url = "https://files.pythonhosted.org/packages/75/95/f987081bf6bc1d1eda3012dae1b06ad427732ef9933a632cb8b58f9917f8/pybase64-1.4.3-cp310-cp310-win32.whl", hash = "sha256:4bdd07ef017515204ee6eaab17e1ad05f83c0ccb5af8ae24a0fe6d9cb5bb0b7a", size = 33622, upload-time = "2025-12-06T13:22:47.348Z" }, + { url = "https://files.pythonhosted.org/packages/79/28/c169a769fe90128f16d394aad87b2096dd4bf2f035ae0927108a46b617df/pybase64-1.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:5db0b6bbda15110db2740c61970a8fda3bf9c93c3166a3f57f87c7865ed1125c", size = 35799, upload-time = "2025-12-06T13:22:48.731Z" }, + { url = "https://files.pythonhosted.org/packages/ab/f2/bdbe6af0bd4f3fe5bc70e77ead7f7d523bb9d3ca3ad50ac42b9adbb9ca14/pybase64-1.4.3-cp310-cp310-win_arm64.whl", hash = "sha256:f96367dfc82598569aa02b1103ebd419298293e59e1151abda2b41728703284b", size = 31158, upload-time = "2025-12-06T13:22:50.021Z" }, + { url = "https://files.pythonhosted.org/packages/2b/63/21e981e9d3f1f123e0b0ee2130112b1956cad9752309f574862c7ae77c08/pybase64-1.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:70b0d4a4d54e216ce42c2655315378b8903933ecfa32fced453989a92b4317b2", size = 38237, upload-time = "2025-12-06T13:22:52.159Z" }, + { url = "https://files.pythonhosted.org/packages/92/fb/3f448e139516404d2a3963915cc10dc9dde7d3a67de4edba2f827adfef17/pybase64-1.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8127f110cdee7a70e576c5c9c1d4e17e92e76c191869085efbc50419f4ae3c72", size = 31673, upload-time = "2025-12-06T13:22:53.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/bb06a5b9885e7d853ac1e801c4d8abfdb4c8506deee33e53d55aa6690e67/pybase64-1.4.3-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f9ef0388878bc15a084bd9bf73ec1b2b4ee513d11009b1506375e10a7aae5032", size = 68331, upload-time = "2025-12-06T13:22:54.197Z" }, + { url = "https://files.pythonhosted.org/packages/64/15/8d60b9ec5e658185fc2ee3333e01a6e30d717cf677b24f47cbb3a859d13c/pybase64-1.4.3-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95a57cccf106352a72ed8bc8198f6820b16cc7d55aa3867a16dea7011ae7c218", size = 71370, upload-time = "2025-12-06T13:22:55.517Z" }, + { url = "https://files.pythonhosted.org/packages/ac/29/a3e5c1667cc8c38d025a4636855de0fc117fc62e2afeb033a3c6f12c6a22/pybase64-1.4.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cd1c47dfceb9c7bd3de210fb4e65904053ed2d7c9dce6d107f041ff6fbd7e21", size = 59834, upload-time = "2025-12-06T13:22:56.682Z" }, + { url = "https://files.pythonhosted.org/packages/a9/00/8ffcf9810bd23f3984698be161cf7edba656fd639b818039a7be1d6405d4/pybase64-1.4.3-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:9fe9922698f3e2f72874b26890d53a051c431d942701bb3a37aae94da0b12107", size = 56652, upload-time = "2025-12-06T13:22:57.724Z" }, + { url = "https://files.pythonhosted.org/packages/81/62/379e347797cdea4ab686375945bc77ad8d039c688c0d4d0cfb09d247beb9/pybase64-1.4.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:af5f4bd29c86b59bb4375e0491d16ec8a67548fa99c54763aaedaf0b4b5a6632", size = 59382, upload-time = "2025-12-06T13:22:58.758Z" }, + { url = "https://files.pythonhosted.org/packages/c6/f2/9338ffe2f487086f26a2c8ca175acb3baa86fce0a756ff5670a0822bb877/pybase64-1.4.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c302f6ca7465262908131411226e02100f488f531bb5e64cb901aa3f439bccd9", size = 59990, upload-time = "2025-12-06T13:23:01.007Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a4/85a6142b65b4df8625b337727aa81dc199642de3d09677804141df6ee312/pybase64-1.4.3-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2f3f439fa4d7fde164ebbbb41968db7d66b064450ab6017c6c95cef0afa2b349", size = 54923, upload-time = "2025-12-06T13:23:02.369Z" }, + { url = "https://files.pythonhosted.org/packages/ac/00/e40215d25624012bf5b7416ca37f168cb75f6dd15acdb91ea1f2ea4dc4e7/pybase64-1.4.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a23c6866551043f8b681a5e1e0d59469148b2920a3b4fc42b1275f25ea4217a", size = 58664, upload-time = "2025-12-06T13:23:03.378Z" }, + { url = "https://files.pythonhosted.org/packages/b0/73/d7e19a63e795c13837f2356268d95dc79d1180e756f57ced742a1e52fdeb/pybase64-1.4.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:56e6526f8565642abc5f84338cc131ce298a8ccab696b19bdf76fa6d7dc592ef", size = 52338, upload-time = "2025-12-06T13:23:04.458Z" }, + { url = "https://files.pythonhosted.org/packages/f2/32/3c746d7a310b69bdd9df77ffc85c41b80bce00a774717596f869b0d4a20e/pybase64-1.4.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6a792a8b9d866ffa413c9687d9b611553203753987a3a582d68cbc51cf23da45", size = 68993, upload-time = "2025-12-06T13:23:05.526Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b3/63cec68f9d6f6e4c0b438d14e5f1ef536a5fe63ce14b70733ac5e31d7ab8/pybase64-1.4.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:62ad29a5026bb22cfcd1ca484ec34b0a5ced56ddba38ceecd9359b2818c9c4f9", size = 58055, upload-time = "2025-12-06T13:23:06.931Z" }, + { url = "https://files.pythonhosted.org/packages/d5/cb/7acf7c3c06f9692093c07f109668725dc37fb9a3df0fa912b50add645195/pybase64-1.4.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:11b9d1d2d32ec358c02214363b8fc3651f6be7dd84d880ecd597a6206a80e121", size = 54430, upload-time = "2025-12-06T13:23:07.936Z" }, + { url = "https://files.pythonhosted.org/packages/33/39/4eb33ff35d173bfff4002e184ce8907f5d0a42d958d61cd9058ef3570179/pybase64-1.4.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0aebaa7f238caa0a0d373616016e2040c6c879ebce3ba7ab3c59029920f13640", size = 56272, upload-time = "2025-12-06T13:23:09.253Z" }, + { url = "https://files.pythonhosted.org/packages/19/97/a76d65c375a254e65b730c6f56bf528feca91305da32eceab8bcc08591e6/pybase64-1.4.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e504682b20c63c2b0c000e5f98a80ea867f8d97642e042a5a39818e44ba4d599", size = 70904, upload-time = "2025-12-06T13:23:10.336Z" }, + { url = "https://files.pythonhosted.org/packages/5e/2c/8338b6d3da3c265002839e92af0a80d6db88385c313c73f103dfb800c857/pybase64-1.4.3-cp311-cp311-win32.whl", hash = "sha256:e9a8b81984e3c6fb1db9e1614341b0a2d98c0033d693d90c726677db1ffa3a4c", size = 33639, upload-time = "2025-12-06T13:23:11.9Z" }, + { url = "https://files.pythonhosted.org/packages/39/dc/32efdf2f5927e5449cc341c266a1bbc5fecd5319a8807d9c5405f76e6d02/pybase64-1.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:a90a8fa16a901fabf20de824d7acce07586e6127dc2333f1de05f73b1f848319", size = 35797, upload-time = "2025-12-06T13:23:13.174Z" }, + { url = "https://files.pythonhosted.org/packages/da/59/eda4f9cb0cbce5a45f0cd06131e710674f8123a4d570772c5b9694f88559/pybase64-1.4.3-cp311-cp311-win_arm64.whl", hash = "sha256:61d87de5bc94d143622e94390ec3e11b9c1d4644fe9be3a81068ab0f91056f59", size = 31160, upload-time = "2025-12-06T13:23:15.696Z" }, + { url = "https://files.pythonhosted.org/packages/86/a7/efcaa564f091a2af7f18a83c1c4875b1437db56ba39540451dc85d56f653/pybase64-1.4.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:18d85e5ab8b986bb32d8446aca6258ed80d1bafe3603c437690b352c648f5967", size = 38167, upload-time = "2025-12-06T13:23:16.821Z" }, + { url = "https://files.pythonhosted.org/packages/db/c7/c7ad35adff2d272bf2930132db2b3eea8c44bb1b1f64eb9b2b8e57cde7b4/pybase64-1.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3f5791a3491d116d0deaf4d83268f48792998519698f8751efb191eac84320e9", size = 31673, upload-time = "2025-12-06T13:23:17.835Z" }, + { url = "https://files.pythonhosted.org/packages/43/1b/9a8cab0042b464e9a876d5c65fe5127445a2436da36fda64899b119b1a1b/pybase64-1.4.3-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f0b3f200c3e06316f6bebabd458b4e4bcd4c2ca26af7c0c766614d91968dee27", size = 68210, upload-time = "2025-12-06T13:23:18.813Z" }, + { url = "https://files.pythonhosted.org/packages/62/f7/965b79ff391ad208b50e412b5d3205ccce372a2d27b7218ae86d5295b105/pybase64-1.4.3-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb632edfd132b3eaf90c39c89aa314beec4e946e210099b57d40311f704e11d4", size = 71599, upload-time = "2025-12-06T13:23:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/03/4b/a3b5175130b3810bbb8ccfa1edaadbd3afddb9992d877c8a1e2f274b476e/pybase64-1.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:356ef1d74648ce997f5a777cf8f1aefecc1c0b4fe6201e0ef3ec8a08170e1b54", size = 59922, upload-time = "2025-12-06T13:23:21.487Z" }, + { url = "https://files.pythonhosted.org/packages/da/5d/c38d1572027fc601b62d7a407721688b04b4d065d60ca489912d6893e6cf/pybase64-1.4.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:c48361f90db32bacaa5518419d4eb9066ba558013aaf0c7781620279ecddaeb9", size = 56712, upload-time = "2025-12-06T13:23:22.77Z" }, + { url = "https://files.pythonhosted.org/packages/e7/d4/4e04472fef485caa8f561d904d4d69210a8f8fc1608ea15ebd9012b92655/pybase64-1.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:702bcaa16ae02139d881aeaef5b1c8ffb4a3fae062fe601d1e3835e10310a517", size = 59300, upload-time = "2025-12-06T13:23:24.543Z" }, + { url = "https://files.pythonhosted.org/packages/86/e7/16e29721b86734b881d09b7e23dfd7c8408ad01a4f4c7525f3b1088e25ec/pybase64-1.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:53d0ffe1847b16b647c6413d34d1de08942b7724273dd57e67dcbdb10c574045", size = 60278, upload-time = "2025-12-06T13:23:25.608Z" }, + { url = "https://files.pythonhosted.org/packages/b1/02/18515f211d7c046be32070709a8efeeef8a0203de4fd7521e6b56404731b/pybase64-1.4.3-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:9a1792e8b830a92736dae58f0c386062eb038dfe8004fb03ba33b6083d89cd43", size = 54817, upload-time = "2025-12-06T13:23:26.633Z" }, + { url = "https://files.pythonhosted.org/packages/e7/be/14e29d8e1a481dbff151324c96dd7b5d2688194bb65dc8a00ca0e1ad1e86/pybase64-1.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1d468b1b1ac5ad84875a46eaa458663c3721e8be5f155ade356406848d3701f6", size = 58611, upload-time = "2025-12-06T13:23:27.684Z" }, + { url = "https://files.pythonhosted.org/packages/b4/8a/a2588dfe24e1bbd742a554553778ab0d65fdf3d1c9a06d10b77047d142aa/pybase64-1.4.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e97b7bdbd62e71898cd542a6a9e320d9da754ff3ebd02cb802d69087ee94d468", size = 52404, upload-time = "2025-12-06T13:23:28.714Z" }, + { url = "https://files.pythonhosted.org/packages/27/fc/afcda7445bebe0cbc38cafdd7813234cdd4fc5573ff067f1abf317bb0cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b33aeaa780caaa08ffda87fc584d5eab61e3d3bbb5d86ead02161dc0c20d04bc", size = 68817, upload-time = "2025-12-06T13:23:30.079Z" }, + { url = "https://files.pythonhosted.org/packages/d3/3a/87c3201e555ed71f73e961a787241a2438c2bbb2ca8809c29ddf938a3157/pybase64-1.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c0efcf78f11cf866bed49caa7b97552bc4855a892f9cc2372abcd3ed0056f0d", size = 57854, upload-time = "2025-12-06T13:23:31.17Z" }, + { url = "https://files.pythonhosted.org/packages/fd/7d/931c2539b31a7b375e7d595b88401eeb5bd6c5ce1059c9123f9b608aaa14/pybase64-1.4.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:66e3791f2ed725a46593f8bd2761ff37d01e2cdad065b1dceb89066f476e50c6", size = 54333, upload-time = "2025-12-06T13:23:32.422Z" }, + { url = "https://files.pythonhosted.org/packages/de/5e/537601e02cc01f27e9d75f440f1a6095b8df44fc28b1eef2cd739aea8cec/pybase64-1.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:72bb0b6bddadab26e1b069bb78e83092711a111a80a0d6b9edcb08199ad7299b", size = 56492, upload-time = "2025-12-06T13:23:33.515Z" }, + { url = "https://files.pythonhosted.org/packages/96/97/2a2e57acf8f5c9258d22aba52e71f8050e167b29ed2ee1113677c1b600c1/pybase64-1.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5b3365dbcbcdb0a294f0f50af0c0a16b27a232eddeeb0bceeefd844ef30d2a23", size = 70974, upload-time = "2025-12-06T13:23:36.27Z" }, + { url = "https://files.pythonhosted.org/packages/75/2e/a9e28941c6dab6f06e6d3f6783d3373044be9b0f9a9d3492c3d8d2260ac0/pybase64-1.4.3-cp312-cp312-win32.whl", hash = "sha256:7bca1ed3a5df53305c629ca94276966272eda33c0d71f862d2d3d043f1e1b91a", size = 33686, upload-time = "2025-12-06T13:23:37.848Z" }, + { url = "https://files.pythonhosted.org/packages/83/e3/507ab649d8c3512c258819c51d25c45d6e29d9ca33992593059e7b646a33/pybase64-1.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:9f2da8f56d9b891b18b4daf463a0640eae45a80af548ce435be86aa6eff3603b", size = 35833, upload-time = "2025-12-06T13:23:38.877Z" }, + { url = "https://files.pythonhosted.org/packages/bc/8a/6eba66cd549a2fc74bb4425fd61b839ba0ab3022d3c401b8a8dc2cc00c7a/pybase64-1.4.3-cp312-cp312-win_arm64.whl", hash = "sha256:0631d8a2d035de03aa9bded029b9513e1fee8ed80b7ddef6b8e9389ffc445da0", size = 31185, upload-time = "2025-12-06T13:23:39.908Z" }, + { url = "https://files.pythonhosted.org/packages/3a/50/b7170cb2c631944388fe2519507fe3835a4054a6a12a43f43781dae82be1/pybase64-1.4.3-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:ea4b785b0607d11950b66ce7c328f452614aefc9c6d3c9c28bae795dc7f072e1", size = 33901, upload-time = "2025-12-06T13:23:40.951Z" }, + { url = "https://files.pythonhosted.org/packages/48/8b/69f50578e49c25e0a26e3ee72c39884ff56363344b79fc3967f5af420ed6/pybase64-1.4.3-cp313-cp313-android_21_x86_64.whl", hash = "sha256:6a10b6330188c3026a8b9c10e6b9b3f2e445779cf16a4c453d51a072241c65a2", size = 40807, upload-time = "2025-12-06T13:23:42.006Z" }, + { url = "https://files.pythonhosted.org/packages/5c/8d/20b68f11adfc4c22230e034b65c71392e3e338b413bf713c8945bd2ccfb3/pybase64-1.4.3-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:27fdff227a0c0e182e0ba37a99109645188978b920dfb20d8b9c17eeee370d0d", size = 30932, upload-time = "2025-12-06T13:23:43.348Z" }, + { url = "https://files.pythonhosted.org/packages/f7/79/b1b550ac6bff51a4880bf6e089008b2e1ca16f2c98db5e039a08ac3ad157/pybase64-1.4.3-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2a8204f1fdfec5aa4184249b51296c0de95445869920c88123978304aad42df1", size = 31394, upload-time = "2025-12-06T13:23:44.317Z" }, + { url = "https://files.pythonhosted.org/packages/82/70/b5d7c5932bf64ee1ec5da859fbac981930b6a55d432a603986c7f509c838/pybase64-1.4.3-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:874fc2a3777de6baf6aa921a7aa73b3be98295794bea31bd80568a963be30767", size = 38078, upload-time = "2025-12-06T13:23:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/56/fe/e66fe373bce717c6858427670736d54297938dad61c5907517ab4106bd90/pybase64-1.4.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2dc64a94a9d936b8e3449c66afabbaa521d3cc1a563d6bbaaa6ffa4535222e4b", size = 38158, upload-time = "2025-12-06T13:23:46.872Z" }, + { url = "https://files.pythonhosted.org/packages/80/a9/b806ed1dcc7aed2ea3dd4952286319e6f3a8b48615c8118f453948e01999/pybase64-1.4.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e48f86de1c145116ccf369a6e11720ce696c2ec02d285f440dfb57ceaa0a6cb4", size = 31672, upload-time = "2025-12-06T13:23:47.88Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c9/24b3b905cf75e23a9a4deaf203b35ffcb9f473ac0e6d8257f91a05dfce62/pybase64-1.4.3-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:1d45c8fe8fe82b65c36b227bb4a2cf623d9ada16bed602ce2d3e18c35285b72a", size = 68244, upload-time = "2025-12-06T13:23:49.026Z" }, + { url = "https://files.pythonhosted.org/packages/f8/cd/d15b0c3e25e5859fab0416dc5b96d34d6bd2603c1c96a07bb2202b68ab92/pybase64-1.4.3-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ad70c26ba091d8f5167e9d4e1e86a0483a5414805cdb598a813db635bd3be8b8", size = 71620, upload-time = "2025-12-06T13:23:50.081Z" }, + { url = "https://files.pythonhosted.org/packages/0d/31/4ca953cc3dcde2b3711d6bfd70a6f4ad2ca95a483c9698076ba605f1520f/pybase64-1.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e98310b7c43145221e7194ac9fa7fffc84763c87bfc5e2f59f9f92363475bdc1", size = 59930, upload-time = "2025-12-06T13:23:51.68Z" }, + { url = "https://files.pythonhosted.org/packages/60/55/e7f7bdcd0fd66e61dda08db158ffda5c89a306bbdaaf5a062fbe4e48f4a1/pybase64-1.4.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:398685a76034e91485a28aeebcb49e64cd663212fd697b2497ac6dfc1df5e671", size = 56425, upload-time = "2025-12-06T13:23:52.732Z" }, + { url = "https://files.pythonhosted.org/packages/cb/65/b592c7f921e51ca1aca3af5b0d201a98666d0a36b930ebb67e7c2ed27395/pybase64-1.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7e46400a6461187ccb52ed75b0045d937529e801a53a9cd770b350509f9e4d50", size = 59327, upload-time = "2025-12-06T13:23:53.856Z" }, + { url = "https://files.pythonhosted.org/packages/23/95/1613d2fb82dbb1548595ad4179f04e9a8451bfa18635efce18b631eabe3f/pybase64-1.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1b62b9f2f291d94f5e0b76ab499790b7dcc78a009d4ceea0b0428770267484b6", size = 60294, upload-time = "2025-12-06T13:23:54.937Z" }, + { url = "https://files.pythonhosted.org/packages/9d/73/40431f37f7d1b3eab4673e7946ff1e8f5d6bd425ec257e834dae8a6fc7b0/pybase64-1.4.3-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:f30ceb5fa4327809dede614be586efcbc55404406d71e1f902a6fdcf322b93b2", size = 54858, upload-time = "2025-12-06T13:23:56.031Z" }, + { url = "https://files.pythonhosted.org/packages/a7/84/f6368bcaf9f743732e002a9858646fd7a54f428490d427dd6847c5cfe89e/pybase64-1.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0d5f18ed53dfa1d4cf8b39ee542fdda8e66d365940e11f1710989b3cf4a2ed66", size = 58629, upload-time = "2025-12-06T13:23:57.12Z" }, + { url = "https://files.pythonhosted.org/packages/43/75/359532f9adb49c6b546cafc65c46ed75e2ccc220d514ba81c686fbd83965/pybase64-1.4.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:119d31aa4b58b85a8ebd12b63c07681a138c08dfc2fe5383459d42238665d3eb", size = 52448, upload-time = "2025-12-06T13:23:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/92/6c/ade2ba244c3f33ed920a7ed572ad772eb0b5f14480b72d629d0c9e739a40/pybase64-1.4.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3cf0218b0e2f7988cf7d738a73b6a1d14f3be6ce249d7c0f606e768366df2cce", size = 68841, upload-time = "2025-12-06T13:23:59.886Z" }, + { url = "https://files.pythonhosted.org/packages/a0/51/b345139cd236be382f2d4d4453c21ee6299e14d2f759b668e23080f8663f/pybase64-1.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:12f4ee5e988bc5c0c1106b0d8fc37fb0508f12dab76bac1b098cb500d148da9d", size = 57910, upload-time = "2025-12-06T13:24:00.994Z" }, + { url = "https://files.pythonhosted.org/packages/1a/b8/9f84bdc4f1c4f0052489396403c04be2f9266a66b70c776001eaf0d78c1f/pybase64-1.4.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:937826bc7b6b95b594a45180e81dd4d99bd4dd4814a443170e399163f7ff3fb6", size = 54335, upload-time = "2025-12-06T13:24:02.046Z" }, + { url = "https://files.pythonhosted.org/packages/d0/c7/be63b617d284de46578a366da77ede39c8f8e815ed0d82c7c2acca560fab/pybase64-1.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:88995d1460971ef80b13e3e007afbe4b27c62db0508bc7250a2ab0a0b4b91362", size = 56486, upload-time = "2025-12-06T13:24:03.141Z" }, + { url = "https://files.pythonhosted.org/packages/5e/96/f252c8f9abd6ded3ef1ccd3cdbb8393a33798007f761b23df8de1a2480e6/pybase64-1.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:72326fe163385ed3e1e806dd579d47fde5d8a59e51297a60fc4e6cbc1b4fc4ed", size = 70978, upload-time = "2025-12-06T13:24:04.221Z" }, + { url = "https://files.pythonhosted.org/packages/af/51/0f5714af7aeef96e30f968e4371d75ad60558aaed3579d7c6c8f1c43c18a/pybase64-1.4.3-cp313-cp313-win32.whl", hash = "sha256:b1623730c7892cf5ed0d6355e375416be6ef8d53ab9b284f50890443175c0ac3", size = 33684, upload-time = "2025-12-06T13:24:05.29Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ad/0cea830a654eb08563fb8214150ef57546ece1cc421c09035f0e6b0b5ea9/pybase64-1.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:8369887590f1646a5182ca2fb29252509da7ae31d4923dbb55d3e09da8cc4749", size = 35832, upload-time = "2025-12-06T13:24:06.35Z" }, + { url = "https://files.pythonhosted.org/packages/b4/0d/eec2a8214989c751bc7b4cad1860eb2c6abf466e76b77508c0f488c96a37/pybase64-1.4.3-cp313-cp313-win_arm64.whl", hash = "sha256:860b86bca71e5f0237e2ab8b2d9c4c56681f3513b1bf3e2117290c1963488390", size = 31175, upload-time = "2025-12-06T13:24:07.419Z" }, + { url = "https://files.pythonhosted.org/packages/db/c9/e23463c1a2913686803ef76b1a5ae7e6fac868249a66e48253d17ad7232c/pybase64-1.4.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eb51db4a9c93215135dccd1895dca078e8785c357fabd983c9f9a769f08989a9", size = 38497, upload-time = "2025-12-06T13:24:08.873Z" }, + { url = "https://files.pythonhosted.org/packages/71/83/343f446b4b7a7579bf6937d2d013d82f1a63057cf05558e391ab6039d7db/pybase64-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a03ef3f529d85fd46b89971dfb00c634d53598d20ad8908fb7482955c710329d", size = 32076, upload-time = "2025-12-06T13:24:09.975Z" }, + { url = "https://files.pythonhosted.org/packages/46/fc/cb64964c3b29b432f54d1bce5e7691d693e33bbf780555151969ffd95178/pybase64-1.4.3-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:2e745f2ce760c6cf04d8a72198ef892015ddb89f6ceba489e383518ecbdb13ab", size = 72317, upload-time = "2025-12-06T13:24:11.129Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b7/fab2240da6f4e1ad46f71fa56ec577613cf5df9dce2d5b4cfaa4edd0e365/pybase64-1.4.3-cp313-cp313t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fac217cd9de8581a854b0ac734c50fd1fa4b8d912396c1fc2fce7c230efe3a7", size = 75534, upload-time = "2025-12-06T13:24:12.433Z" }, + { url = "https://files.pythonhosted.org/packages/91/3b/3e2f2b6e68e3d83ddb9fa799f3548fb7449765daec9bbd005a9fbe296d7f/pybase64-1.4.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:da1ee8fa04b283873de2d6e8fa5653e827f55b86bdf1a929c5367aaeb8d26f8a", size = 65399, upload-time = "2025-12-06T13:24:13.928Z" }, + { url = "https://files.pythonhosted.org/packages/6b/08/476ac5914c3b32e0274a2524fc74f01cbf4f4af4513d054e41574eb018f6/pybase64-1.4.3-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:b0bf8e884ee822ca7b1448eeb97fa131628fe0ff42f60cae9962789bd562727f", size = 60487, upload-time = "2025-12-06T13:24:15.177Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b8/618a92915330cc9cba7880299b546a1d9dab1a21fd6c0292ee44a4fe608c/pybase64-1.4.3-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1bf749300382a6fd1f4f255b183146ef58f8e9cb2f44a077b3a9200dfb473a77", size = 63959, upload-time = "2025-12-06T13:24:16.854Z" }, + { url = "https://files.pythonhosted.org/packages/a5/52/af9d8d051652c3051862c442ec3861259c5cdb3fc69774bc701470bd2a59/pybase64-1.4.3-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:153a0e42329b92337664cfc356f2065248e6c9a1bd651bbcd6dcaf15145d3f06", size = 64874, upload-time = "2025-12-06T13:24:18.328Z" }, + { url = "https://files.pythonhosted.org/packages/e4/51/5381a7adf1f381bd184d33203692d3c57cf8ae9f250f380c3fecbdbe554b/pybase64-1.4.3-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:86ee56ac7f2184ca10217ed1c655c1a060273e233e692e9086da29d1ae1768db", size = 58572, upload-time = "2025-12-06T13:24:19.417Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f0/578ee4ffce5818017de4fdf544e066c225bc435e73eb4793cde28a689d0b/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0e71a4db76726bf830b47477e7d830a75c01b2e9b01842e787a0836b0ba741e3", size = 63636, upload-time = "2025-12-06T13:24:20.497Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ad/8ae94814bf20159ea06310b742433e53d5820aa564c9fdf65bf2d79f8799/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2ba7799ec88540acd9861b10551d24656ca3c2888ecf4dba2ee0a71544a8923f", size = 56193, upload-time = "2025-12-06T13:24:21.559Z" }, + { url = "https://files.pythonhosted.org/packages/d1/31/6438cfcc3d3f0fa84d229fa125c243d5094e72628e525dfefadf3bcc6761/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2860299e4c74315f5951f0cf3e72ba0f201c3356c8a68f95a3ab4e620baf44e9", size = 72655, upload-time = "2025-12-06T13:24:22.673Z" }, + { url = "https://files.pythonhosted.org/packages/a3/0d/2bbc9e9c3fc12ba8a6e261482f03a544aca524f92eae0b4908c0a10ba481/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:bb06015db9151f0c66c10aae8e3603adab6b6cd7d1f7335a858161d92fc29618", size = 62471, upload-time = "2025-12-06T13:24:23.8Z" }, + { url = "https://files.pythonhosted.org/packages/2c/0b/34d491e7f49c1dbdb322ea8da6adecda7c7cd70b6644557c6e4ca5c6f7c7/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:242512a070817272865d37c8909059f43003b81da31f616bb0c391ceadffe067", size = 58119, upload-time = "2025-12-06T13:24:24.994Z" }, + { url = "https://files.pythonhosted.org/packages/ce/17/c21d0cde2a6c766923ae388fc1f78291e1564b0d38c814b5ea8a0e5e081c/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5d8277554a12d3e3eed6180ebda62786bf9fc8d7bb1ee00244258f4a87ca8d20", size = 60791, upload-time = "2025-12-06T13:24:26.046Z" }, + { url = "https://files.pythonhosted.org/packages/92/b2/eaa67038916a48de12b16f4c384bcc1b84b7ec731b23613cb05f27673294/pybase64-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f40b7ddd698fc1e13a4b64fbe405e4e0e1279e8197e37050e24154655f5f7c4e", size = 74701, upload-time = "2025-12-06T13:24:27.466Z" }, + { url = "https://files.pythonhosted.org/packages/42/10/abb7757c330bb869ebb95dab0c57edf5961ffbd6c095c8209cbbf75d117d/pybase64-1.4.3-cp313-cp313t-win32.whl", hash = "sha256:46d75c9387f354c5172582a9eaae153b53a53afeb9c19fcf764ea7038be3bd8b", size = 33965, upload-time = "2025-12-06T13:24:28.548Z" }, + { url = "https://files.pythonhosted.org/packages/63/a0/2d4e5a59188e9e6aed0903d580541aaea72dcbbab7bf50fb8b83b490b6c3/pybase64-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:d7344625591d281bec54e85cbfdab9e970f6219cac1570f2aa140b8c942ccb81", size = 36207, upload-time = "2025-12-06T13:24:29.646Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/95b902e8f567b4d4b41df768ccc438af618f8d111e54deaf57d2df46bd76/pybase64-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:28a3c60c55138e0028313f2eccd321fec3c4a0be75e57a8d3eb883730b1b0880", size = 31505, upload-time = "2025-12-06T13:24:30.687Z" }, + { url = "https://files.pythonhosted.org/packages/e4/80/4bd3dff423e5a91f667ca41982dc0b79495b90ec0c0f5d59aca513e50f8c/pybase64-1.4.3-cp314-cp314-android_24_arm64_v8a.whl", hash = "sha256:015bb586a1ea1467f69d57427abe587469392215f59db14f1f5c39b52fdafaf5", size = 33835, upload-time = "2025-12-06T13:24:31.767Z" }, + { url = "https://files.pythonhosted.org/packages/45/60/a94d94cc1e3057f602e0b483c9ebdaef40911d84a232647a2fe593ab77bb/pybase64-1.4.3-cp314-cp314-android_24_x86_64.whl", hash = "sha256:d101e3a516f837c3dcc0e5a0b7db09582ebf99ed670865223123fb2e5839c6c0", size = 40673, upload-time = "2025-12-06T13:24:32.82Z" }, + { url = "https://files.pythonhosted.org/packages/e3/71/cf62b261d431857e8e054537a5c3c24caafa331de30daede7b2c6c558501/pybase64-1.4.3-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8f183ac925a48046abe047360fe3a1b28327afb35309892132fe1915d62fb282", size = 30939, upload-time = "2025-12-06T13:24:34.001Z" }, + { url = "https://files.pythonhosted.org/packages/24/3e/d12f92a3c1f7c6ab5d53c155bff9f1084ba997a37a39a4f781ccba9455f3/pybase64-1.4.3-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30bf3558e24dcce4da5248dcf6d73792adfcf4f504246967e9db155be4c439ad", size = 31401, upload-time = "2025-12-06T13:24:35.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3d/9c27440031fea0d05146f8b70a460feb95d8b4e3d9ca8f45c972efb4c3d3/pybase64-1.4.3-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:a674b419de318d2ce54387dd62646731efa32b4b590907800f0bd40675c1771d", size = 38075, upload-time = "2025-12-06T13:24:36.53Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d4/6c0e0cf0efd53c254173fbcd84a3d8fcbf5e0f66622473da425becec32a5/pybase64-1.4.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:720104fd7303d07bac302be0ff8f7f9f126f2f45c1edb4f48fdb0ff267e69fe1", size = 38257, upload-time = "2025-12-06T13:24:38.049Z" }, + { url = "https://files.pythonhosted.org/packages/50/eb/27cb0b610d5cd70f5ad0d66c14ad21c04b8db930f7139818e8fbdc14df4d/pybase64-1.4.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:83f1067f73fa5afbc3efc0565cecc6ed53260eccddef2ebe43a8ce2b99ea0e0a", size = 31685, upload-time = "2025-12-06T13:24:40.327Z" }, + { url = "https://files.pythonhosted.org/packages/db/26/b136a4b65e5c94ff06217f7726478df3f31ab1c777c2c02cf698e748183f/pybase64-1.4.3-cp314-cp314-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:b51204d349a4b208287a8aa5b5422be3baa88abf6cc8ff97ccbda34919bbc857", size = 68460, upload-time = "2025-12-06T13:24:41.735Z" }, + { url = "https://files.pythonhosted.org/packages/68/6d/84ce50e7ee1ae79984d689e05a9937b2460d4efa1e5b202b46762fb9036c/pybase64-1.4.3-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:30f2fd53efecbdde4bdca73a872a68dcb0d1bf8a4560c70a3e7746df973e1ef3", size = 71688, upload-time = "2025-12-06T13:24:42.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/57/6743e420416c3ff1b004041c85eb0ebd9c50e9cf05624664bfa1dc8b5625/pybase64-1.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0932b0c5cfa617091fd74f17d24549ce5de3628791998c94ba57be808078eeaf", size = 60040, upload-time = "2025-12-06T13:24:44.37Z" }, + { url = "https://files.pythonhosted.org/packages/3b/68/733324e28068a89119af2921ce548e1c607cc5c17d354690fc51c302e326/pybase64-1.4.3-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:acb61f5ab72bec808eb0d4ce8b87ec9f38d7d750cb89b1371c35eb8052a29f11", size = 56478, upload-time = "2025-12-06T13:24:45.815Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9e/f3f4aa8cfe3357a3cdb0535b78eb032b671519d3ecc08c58c4c6b72b5a91/pybase64-1.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:2bc2d5bc15168f5c04c53bdfe5a1e543b2155f456ed1e16d7edce9ce73842021", size = 59463, upload-time = "2025-12-06T13:24:46.938Z" }, + { url = "https://files.pythonhosted.org/packages/aa/d1/53286038e1f0df1cf58abcf4a4a91b0f74ab44539c2547b6c31001ddd054/pybase64-1.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:8a7bc3cd23880bdca59758bcdd6f4ef0674f2393782763910a7466fab35ccb98", size = 60360, upload-time = "2025-12-06T13:24:48.039Z" }, + { url = "https://files.pythonhosted.org/packages/00/9a/5cc6ce95db2383d27ff4d790b8f8b46704d360d701ab77c4f655bcfaa6a7/pybase64-1.4.3-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ad15acf618880d99792d71e3905b0e2508e6e331b76a1b34212fa0f11e01ad28", size = 54999, upload-time = "2025-12-06T13:24:49.547Z" }, + { url = "https://files.pythonhosted.org/packages/64/e7/c3c1d09c3d7ae79e3aa1358c6d912d6b85f29281e47aa94fc0122a415a2f/pybase64-1.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448158d417139cb4851200e5fee62677ae51f56a865d50cda9e0d61bda91b116", size = 58736, upload-time = "2025-12-06T13:24:50.641Z" }, + { url = "https://files.pythonhosted.org/packages/db/d5/0baa08e3d8119b15b588c39f0d39fd10472f0372e3c54ca44649cbefa256/pybase64-1.4.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:9058c49b5a2f3e691b9db21d37eb349e62540f9f5fc4beabf8cbe3c732bead86", size = 52298, upload-time = "2025-12-06T13:24:51.791Z" }, + { url = "https://files.pythonhosted.org/packages/00/87/fc6f11474a1de7e27cd2acbb8d0d7508bda3efa73dfe91c63f968728b2a3/pybase64-1.4.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ce561724f6522907a66303aca27dce252d363fcd85884972d348f4403ba3011a", size = 69049, upload-time = "2025-12-06T13:24:53.253Z" }, + { url = "https://files.pythonhosted.org/packages/69/9d/7fb5566f669ac18b40aa5fc1c438e24df52b843c1bdc5da47d46d4c1c630/pybase64-1.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:63316560a94ac449fe86cb8b9e0a13714c659417e92e26a5cbf085cd0a0c838d", size = 57952, upload-time = "2025-12-06T13:24:54.342Z" }, + { url = "https://files.pythonhosted.org/packages/de/cc/ceb949232dbbd3ec4ee0190d1df4361296beceee9840390a63df8bc31784/pybase64-1.4.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:7ecd796f2ac0be7b73e7e4e232b8c16422014de3295d43e71d2b19fd4a4f5368", size = 54484, upload-time = "2025-12-06T13:24:55.774Z" }, + { url = "https://files.pythonhosted.org/packages/a7/69/659f3c8e6a5d7b753b9c42a4bd9c42892a0f10044e9c7351a4148d413a33/pybase64-1.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d01e102a12fb2e1ed3dc11611c2818448626637857ec3994a9cf4809dfd23477", size = 56542, upload-time = "2025-12-06T13:24:57Z" }, + { url = "https://files.pythonhosted.org/packages/85/2c/29c9e6c9c82b72025f9676f9e82eb1fd2339ad038cbcbf8b9e2ac02798fc/pybase64-1.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ebff797a93c2345f22183f454fd8607a34d75eca5a3a4a969c1c75b304cee39d", size = 71045, upload-time = "2025-12-06T13:24:58.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/84/5a3dce8d7a0040a5c0c14f0fe1311cd8db872913fa04438071b26b0dac04/pybase64-1.4.3-cp314-cp314-win32.whl", hash = "sha256:28b2a1bb0828c0595dc1ea3336305cd97ff85b01c00d81cfce4f92a95fb88f56", size = 34200, upload-time = "2025-12-06T13:24:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/57/bc/ce7427c12384adee115b347b287f8f3cf65860b824d74fe2c43e37e81c1f/pybase64-1.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:33338d3888700ff68c3dedfcd49f99bfc3b887570206130926791e26b316b029", size = 36323, upload-time = "2025-12-06T13:25:01.708Z" }, + { url = "https://files.pythonhosted.org/packages/9a/1b/2b8ffbe9a96eef7e3f6a5a7be75995eebfb6faaedc85b6da6b233e50c778/pybase64-1.4.3-cp314-cp314-win_arm64.whl", hash = "sha256:62725669feb5acb186458da2f9353e88ae28ef66bb9c4c8d1568b12a790dfa94", size = 31584, upload-time = "2025-12-06T13:25:02.801Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d8/6824c2e6fb45b8fa4e7d92e3c6805432d5edc7b855e3e8e1eedaaf6efb7c/pybase64-1.4.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:153fe29be038948d9372c3e77ae7d1cab44e4ba7d9aaf6f064dbeea36e45b092", size = 38601, upload-time = "2025-12-06T13:25:04.222Z" }, + { url = "https://files.pythonhosted.org/packages/ea/e5/10d2b3a4ad3a4850be2704a2f70cd9c0cf55725c8885679872d3bc846c67/pybase64-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f7fe3decaa7c4a9e162327ec7bd81ce183d2b16f23c6d53b606649c6e0203e9e", size = 32078, upload-time = "2025-12-06T13:25:05.362Z" }, + { url = "https://files.pythonhosted.org/packages/43/04/8b15c34d3c2282f1c1b0850f1113a249401b618a382646a895170bc9b5e7/pybase64-1.4.3-cp314-cp314t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:a5ae04ea114c86eb1da1f6e18d75f19e3b5ae39cb1d8d3cd87c29751a6a22780", size = 72474, upload-time = "2025-12-06T13:25:06.434Z" }, + { url = "https://files.pythonhosted.org/packages/42/00/f34b4d11278f8fdc68bc38f694a91492aa318f7c6f1bd7396197ac0f8b12/pybase64-1.4.3-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1755b3dce3a2a5c7d17ff6d4115e8bee4a1d5aeae74469db02e47c8f477147da", size = 75706, upload-time = "2025-12-06T13:25:07.636Z" }, + { url = "https://files.pythonhosted.org/packages/bb/5d/71747d4ad7fe16df4c4c852bdbdeb1f2cf35677b48d7c34d3011a7a6ad3a/pybase64-1.4.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb852f900e27ffc4ec1896817535a0fa19610ef8875a096b59f21d0aa42ff172", size = 65589, upload-time = "2025-12-06T13:25:08.809Z" }, + { url = "https://files.pythonhosted.org/packages/49/b1/d1e82bd58805bb5a3a662864800bab83a83a36ba56e7e3b1706c708002a5/pybase64-1.4.3-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:9cf21ea8c70c61eddab3421fbfce061fac4f2fb21f7031383005a1efdb13d0b9", size = 60670, upload-time = "2025-12-06T13:25:10.04Z" }, + { url = "https://files.pythonhosted.org/packages/15/67/16c609b7a13d1d9fc87eca12ba2dce5e67f949eeaab61a41bddff843cbb0/pybase64-1.4.3-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:afff11b331fdc27692fc75e85ae083340a35105cea1a3c4552139e2f0e0d174f", size = 64194, upload-time = "2025-12-06T13:25:11.48Z" }, + { url = "https://files.pythonhosted.org/packages/3c/11/37bc724e42960f0106c2d33dc957dcec8f760c91a908cc6c0df7718bc1a8/pybase64-1.4.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9a5143df542c1ce5c1f423874b948c4d689b3f05ec571f8792286197a39ba02", size = 64984, upload-time = "2025-12-06T13:25:12.645Z" }, + { url = "https://files.pythonhosted.org/packages/6e/66/b2b962a6a480dd5dae3029becf03ea1a650d326e39bf1c44ea3db78bb010/pybase64-1.4.3-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:d62e9861019ad63624b4a7914dff155af1cc5d6d79df3be14edcaedb5fdad6f9", size = 58750, upload-time = "2025-12-06T13:25:13.848Z" }, + { url = "https://files.pythonhosted.org/packages/2b/15/9b6d711035e29b18b2e1c03d47f41396d803d06ef15b6c97f45b75f73f04/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:84cfd4d92668ef5766cc42a9c9474b88960ac2b860767e6e7be255c6fddbd34a", size = 63816, upload-time = "2025-12-06T13:25:15.356Z" }, + { url = "https://files.pythonhosted.org/packages/b4/21/e2901381ed0df62e2308380f30d9c4d87d6b74e33a84faed3478d33a7197/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:60fc025437f9a7c2cc45e0c19ed68ed08ba672be2c5575fd9d98bdd8f01dd61f", size = 56348, upload-time = "2025-12-06T13:25:16.559Z" }, + { url = "https://files.pythonhosted.org/packages/c4/16/3d788388a178a0407aa814b976fe61bfa4af6760d9aac566e59da6e4a8b4/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:edc8446196f04b71d3af76c0bd1fe0a45066ac5bffecca88adb9626ee28c266f", size = 72842, upload-time = "2025-12-06T13:25:18.055Z" }, + { url = "https://files.pythonhosted.org/packages/a6/63/c15b1f8bd47ea48a5a2d52a4ec61f037062932ea6434ab916107b58e861e/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e99f6fa6509c037794da57f906ade271f52276c956d00f748e5b118462021d48", size = 62651, upload-time = "2025-12-06T13:25:19.191Z" }, + { url = "https://files.pythonhosted.org/packages/bd/b8/f544a2e37c778d59208966d4ef19742a0be37c12fc8149ff34483c176616/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:d94020ef09f624d841aa9a3a6029df8cf65d60d7a6d5c8687579fa68bd679b65", size = 58295, upload-time = "2025-12-06T13:25:20.822Z" }, + { url = "https://files.pythonhosted.org/packages/03/99/1fae8a3b7ac181e36f6e7864a62d42d5b1f4fa7edf408c6711e28fba6b4d/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:f64ce70d89942a23602dee910dec9b48e5edf94351e1b378186b74fcc00d7f66", size = 60960, upload-time = "2025-12-06T13:25:22.099Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9e/cd4c727742345ad8384569a4466f1a1428f4e5cc94d9c2ab2f53d30be3fe/pybase64-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8ea99f56e45c469818b9781903be86ba4153769f007ba0655fa3b46dc332803d", size = 74863, upload-time = "2025-12-06T13:25:23.442Z" }, + { url = "https://files.pythonhosted.org/packages/28/86/a236ecfc5b494e1e922da149689f690abc84248c7c1358f5605b8c9fdd60/pybase64-1.4.3-cp314-cp314t-win32.whl", hash = "sha256:343b1901103cc72362fd1f842524e3bb24978e31aea7ff11e033af7f373f66ab", size = 34513, upload-time = "2025-12-06T13:25:24.592Z" }, + { url = "https://files.pythonhosted.org/packages/56/ce/ca8675f8d1352e245eb012bfc75429ee9cf1f21c3256b98d9a329d44bf0f/pybase64-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:57aff6f7f9dea6705afac9d706432049642de5b01080d3718acc23af87c5af76", size = 36702, upload-time = "2025-12-06T13:25:25.72Z" }, + { url = "https://files.pythonhosted.org/packages/3b/30/4a675864877397179b09b720ee5fcb1cf772cf7bebc831989aff0a5f79c1/pybase64-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:e906aa08d4331e799400829e0f5e4177e76a3281e8a4bc82ba114c6b30e405c9", size = 31904, upload-time = "2025-12-06T13:25:26.826Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7c/545fd4935a0e1ddd7147f557bf8157c73eecec9cffd523382fa7af2557de/pybase64-1.4.3-graalpy311-graalpy242_311_native-macosx_10_9_x86_64.whl", hash = "sha256:d27c1dfdb0c59a5e758e7a98bd78eaca5983c22f4a811a36f4f980d245df4611", size = 38393, upload-time = "2025-12-06T13:26:19.535Z" }, + { url = "https://files.pythonhosted.org/packages/c3/ca/ae7a96be9ddc96030d4e9dffc43635d4e136b12058b387fd47eb8301b60f/pybase64-1.4.3-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:0f1a0c51d6f159511e3431b73c25db31095ee36c394e26a4349e067c62f434e5", size = 32109, upload-time = "2025-12-06T13:26:20.72Z" }, + { url = "https://files.pythonhosted.org/packages/bf/44/d4b7adc7bf4fd5b52d8d099121760c450a52c390223806b873f0b6a2d551/pybase64-1.4.3-graalpy311-graalpy242_311_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a492518f3078a4e3faaef310697d21df9c6bc71908cebc8c2f6fbfa16d7d6b1f", size = 43227, upload-time = "2025-12-06T13:26:21.845Z" }, + { url = "https://files.pythonhosted.org/packages/08/86/2ba2d8734ef7939debeb52cf9952e457ba7aa226cae5c0e6dd631f9b851f/pybase64-1.4.3-graalpy311-graalpy242_311_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae1a0f47784fd16df90d8acc32011c8d5fcdd9ab392c9ec49543e5f6a9c43a4", size = 35804, upload-time = "2025-12-06T13:26:23.149Z" }, + { url = "https://files.pythonhosted.org/packages/4f/5b/19c725dc3aaa6281f2ce3ea4c1628d154a40dd99657d1381995f8096768b/pybase64-1.4.3-graalpy311-graalpy242_311_native-win_amd64.whl", hash = "sha256:03cea70676ffbd39a1ab7930a2d24c625b416cacc9d401599b1d29415a43ab6a", size = 35880, upload-time = "2025-12-06T13:26:24.663Z" }, + { url = "https://files.pythonhosted.org/packages/17/45/92322aec1b6979e789b5710f73c59f2172bc37c8ce835305434796824b7b/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_10_13_x86_64.whl", hash = "sha256:2baaa092f3475f3a9c87ac5198023918ea8b6c125f4c930752ab2cbe3cd1d520", size = 38746, upload-time = "2025-12-06T13:26:25.869Z" }, + { url = "https://files.pythonhosted.org/packages/11/94/f1a07402870388fdfc2ecec0c718111189732f7d0f2d7fe1386e19e8fad0/pybase64-1.4.3-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:cde13c0764b1af07a631729f26df019070dad759981d6975527b7e8ecb465b6c", size = 32573, upload-time = "2025-12-06T13:26:27.792Z" }, + { url = "https://files.pythonhosted.org/packages/fa/8f/43c3bb11ca9bacf81cb0b7a71500bb65b2eda6d5fe07433c09b543de97f3/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5c29a582b0ea3936d02bd6fe9bf674ab6059e6e45ab71c78404ab2c913224414", size = 43461, upload-time = "2025-12-06T13:26:28.906Z" }, + { url = "https://files.pythonhosted.org/packages/2d/4c/2a5258329200be57497d3972b5308558c6de42e3749c6cc2aa1cbe34b25a/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6b664758c804fa919b4f1257aa8cf68e95db76fc331de5f70bfc3a34655afe1", size = 36058, upload-time = "2025-12-06T13:26:30.092Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6d/41faa414cde66ec023b0ca8402a8f11cb61731c3dc27c082909cbbd1f929/pybase64-1.4.3-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:f7537fa22ae56a0bf51e4b0ffc075926ad91c618e1416330939f7ef366b58e3b", size = 36231, upload-time = "2025-12-06T13:26:31.656Z" }, + { url = "https://files.pythonhosted.org/packages/2a/cf/6e712491bd665ea8633efb0b484121893ea838d8e830e06f39f2aae37e58/pybase64-1.4.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94cf50c36bb2f8618982ee5a978c4beed9db97d35944fa96e8586dd953c7994a", size = 38007, upload-time = "2025-12-06T13:26:32.804Z" }, + { url = "https://files.pythonhosted.org/packages/38/c0/9272cae1c49176337dcdbd97511e2843faae1aaf5a5fb48569093c6cd4ce/pybase64-1.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:01bc3ff5ca1341685c6d2d945b035f442f7b9c3b068a5c6ee8408a41fda5754e", size = 31538, upload-time = "2025-12-06T13:26:34.001Z" }, + { url = "https://files.pythonhosted.org/packages/20/f2/17546f97befe429c73f622bbd869ceebb518c40fdb0dec4c4f98312e80a5/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:03d0aa3761a99034960496280c02aa063f856a3cc9b33771bc4eab0e4e72b5c2", size = 40682, upload-time = "2025-12-06T13:26:35.168Z" }, + { url = "https://files.pythonhosted.org/packages/92/a0/464b36d5dfb61f3da17858afaeaa876a9342d58e9f17803ce7f28b5de9e8/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ca5b1ce768520acd6440280cdab35235b27ad2faacfcec064bc9c3377066ef1", size = 41306, upload-time = "2025-12-06T13:26:36.351Z" }, + { url = "https://files.pythonhosted.org/packages/07/c9/a748dfc0969a8d960ecf1e82c8a2a16046ffec22f8e7ece582aa3b1c6cf9/pybase64-1.4.3-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3caa1e2ddad1c50553ffaaa1c86b74b3f9fbd505bea9970326ab88fc68c4c184", size = 35452, upload-time = "2025-12-06T13:26:37.772Z" }, + { url = "https://files.pythonhosted.org/packages/95/b7/4d37bd3577d1aa6c732dc099087fe027c48873e223de3784b095e5653f8b/pybase64-1.4.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd47076f736b27a8b0f9b30d93b6bb4f5af01b0dc8971f883ed3b75934f39a99", size = 36125, upload-time = "2025-12-06T13:26:39.78Z" }, + { url = "https://files.pythonhosted.org/packages/b2/76/160dded493c00d3376d4ad0f38a2119c5345de4a6693419ad39c3565959b/pybase64-1.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:277de6e03cc9090fb359365c686a2a3036d23aee6cd20d45d22b8c89d1247f17", size = 37939, upload-time = "2025-12-06T13:26:41.014Z" }, + { url = "https://files.pythonhosted.org/packages/b7/b8/a0f10be8d648d6f8f26e560d6e6955efa7df0ff1e009155717454d76f601/pybase64-1.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab1dd8b1ed2d1d750260ed58ab40defaa5ba83f76a30e18b9ebd5646f6247ae5", size = 31466, upload-time = "2025-12-06T13:26:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/d3/22/832a2f9e76cdf39b52e01e40d8feeb6a04cf105494f2c3e3126d0149717f/pybase64-1.4.3-pp311-pypy311_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:bd4d2293de9fd212e294c136cec85892460b17d24e8c18a6ba18750928037750", size = 40681, upload-time = "2025-12-06T13:26:43.782Z" }, + { url = "https://files.pythonhosted.org/packages/12/d7/6610f34a8972415fab3bb4704c174a1cc477bffbc3c36e526428d0f3957d/pybase64-1.4.3-pp311-pypy311_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af6d0d3a691911cc4c9a625f3ddcd3af720738c21be3d5c72de05629139d393", size = 41294, upload-time = "2025-12-06T13:26:44.936Z" }, + { url = "https://files.pythonhosted.org/packages/64/25/ed24400948a6c974ab1374a233cb7e8af0a5373cea0dd8a944627d17c34a/pybase64-1.4.3-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5cfc8c49a28322d82242088378f8542ce97459866ba73150b062a7073e82629d", size = 35447, upload-time = "2025-12-06T13:26:46.098Z" }, + { url = "https://files.pythonhosted.org/packages/ee/2b/e18ee7c5ee508a82897f021c1981533eca2940b5f072fc6ed0906c03a7a7/pybase64-1.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:debf737e09b8bf832ba86f5ecc3d3dbd0e3021d6cd86ba4abe962d6a5a77adb3", size = 36134, upload-time = "2025-12-06T13:26:47.35Z" }, +] + [[package]] name = "pycparser" version = "2.21" @@ -4587,6 +4657,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, ] +[[package]] +name = "pydantic-settings" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, +] + [[package]] name = "pydeck" version = "0.9.1" @@ -5432,19 +5516,6 @@ version = "2.2.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a3/f8/a6091be6a60ed4df9ac806c89fbc5fe1a3416d0284f3ba70aa09a3419428/starkbank-ecdsa-2.2.0.tar.gz", hash = "sha256:9399c3371b899d4a235b68a1ed7919d202fbf024bd2c863ae8ebdad343c2a63a", size = 14690, upload-time = "2022-10-24T18:36:05.27Z" } -[[package]] -name = "starlette" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, -] - [[package]] name = "streamlit" version = "1.40.0" From c97519c906a548671f876d97854c9fa48272ad2a Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 8 Apr 2026 09:48:07 +0530 Subject: [PATCH 46/49] docs: clarify chromadb 1.5.6 missing collection handling --- ddpui/utils/vector/backends/chroma.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ddpui/utils/vector/backends/chroma.py b/ddpui/utils/vector/backends/chroma.py index 31602a53f..420d9f590 100644 --- a/ddpui/utils/vector/backends/chroma.py +++ b/ddpui/utils/vector/backends/chroma.py @@ -39,7 +39,8 @@ def create_collection( def load_collection(self, name: str) -> Union[Collection, None]: try: return self.client.get_collection(name=name) - # In recent Chroma releases, a missing collection resolves as NotFoundError. + # In chromadb==1.5.6, a missing collection resolves as NotFoundError in + # both the local Segment API and the HTTP client error mapper. # For our store interface, that means "this collection is absent". except NotFoundError: return None From 2b06140a135f2eff7cd53365b1254828e92c9852 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 8 Apr 2026 10:35:58 +0530 Subject: [PATCH 47/49] build: upgrade chroma compose image --- docker-compose.chroma.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.chroma.yml b/docker-compose.chroma.yml index b889af2dd..188eec7ec 100644 --- a/docker-compose.chroma.yml +++ b/docker-compose.chroma.yml @@ -2,7 +2,7 @@ name: dalgo-vector-store services: chroma: - image: chromadb/chroma:0.6.3 + image: chromadb/chroma:1.5.6 ports: - "8003:8000" volumes: From 0491689c5b0e0091bd32a0f43bb4aed21d6de501 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 8 Apr 2026 15:04:24 +0530 Subject: [PATCH 48/49] Trim whitespace from login usernames --- ddpui/models/org_user.py | 7 ++- ddpui/tests/api_tests/test_user_org_api.py | 50 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/ddpui/models/org_user.py b/ddpui/models/org_user.py index 5fec79fd2..2e41e8877 100644 --- a/ddpui/models/org_user.py +++ b/ddpui/models/org_user.py @@ -9,7 +9,7 @@ from django.contrib.auth.models import User from ninja import Schema -from pydantic import SecretStr, BaseModel +from pydantic import SecretStr, BaseModel, field_validator from ddpui.models.org import Org from ddpui.models.role_based_access import Role @@ -216,6 +216,11 @@ class LoginPayload(BaseModel): username: str password: str + @field_validator("username") + @classmethod + def strip_username_whitespace(cls, value: str) -> str: + return value.strip() + class LogoutPayload(BaseModel): """the payload for the login workflow""" diff --git a/ddpui/tests/api_tests/test_user_org_api.py b/ddpui/tests/api_tests/test_user_org_api.py index c5decc450..88b4f9e9e 100644 --- a/ddpui/tests/api_tests/test_user_org_api.py +++ b/ddpui/tests/api_tests/test_user_org_api.py @@ -15,6 +15,8 @@ from ddpui.api.user_org_api import ( get_current_user_v2, post_organization_user, + post_login, + post_login_v2, get_organization_users, delete_organization_users_v1, put_organization_user_self_v1, @@ -49,6 +51,7 @@ ResetPasswordSchema, VerifyEmailSchema, DeleteOrgUserPayload, + LoginPayload, ) from ddpui.schemas.org_warehouse_schema import OrgWarehouseSchema from ddpui.auth import ( @@ -330,6 +333,53 @@ def test_post_organization_user_success_lowercase_email(orguser): the_authuser.delete() +# ================================================================================ +def test_post_login_strips_username_whitespace(): + request = mock_request() + payload = LoginPayload(username=" tempuseremail ", password="tempuserpassword") + serializer = Mock() + serializer.validated_data = {"access": "access-token", "refresh": "refresh-token"} + + with patch("ddpui.api.user_org_api.CustomTokenObtainSerializer", return_value=serializer) as mock_serializer: + with patch( + "ddpui.api.user_org_api.orguserfunctions.lookup_user", + return_value={"email": "tempuseremail"}, + ) as mock_lookup_user: + response = post_login(request, payload) + + mock_serializer.assert_called_once_with( + data={"username": "tempuseremail", "password": "tempuserpassword"} + ) + serializer.is_valid.assert_called_once_with(raise_exception=True) + mock_lookup_user.assert_called_once_with("tempuseremail") + assert response["email"] == "tempuseremail" + assert response["token"] == "access-token" + assert response["refresh_token"] == "refresh-token" + + +def test_post_login_v2_strips_username_whitespace(): + request = mock_request() + payload = LoginPayload(username=" tempuseremail ", password="tempuserpassword") + serializer = Mock() + serializer.validated_data = {"access": "access-token", "refresh": "refresh-token"} + + with patch("ddpui.api.user_org_api.CustomTokenObtainSerializer", return_value=serializer) as mock_serializer: + with patch( + "ddpui.api.user_org_api.orguserfunctions.lookup_user", + return_value={"email": "tempuseremail"}, + ) as mock_lookup_user: + response = post_login_v2(request, payload) + + mock_serializer.assert_called_once_with( + data={"username": "tempuseremail", "password": "tempuserpassword"} + ) + serializer.is_valid.assert_called_once_with(raise_exception=True) + mock_lookup_user.assert_called_once_with("tempuseremail") + assert response.status_code == 200 + assert response.cookies["access_token"].value == "access-token" + assert response.cookies["refresh_token"].value == "refresh-token" + + # ================================================================================ def test_get_organization_users_no_org(orguser): """a failing test, requestor has no associated org""" From 33b941a9301d481fb800d9b37c1d082501646a02 Mon Sep 17 00:00:00 2001 From: Pratiksha Rao Date: Wed, 8 Apr 2026 15:16:28 +0530 Subject: [PATCH 49/49] Move login username trimming into API handlers --- ddpui/api/user_org_api.py | 10 ++++++---- ddpui/models/org_user.py | 7 +------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ddpui/api/user_org_api.py b/ddpui/api/user_org_api.py index aa79d3f67..b4386047d 100644 --- a/ddpui/api/user_org_api.py +++ b/ddpui/api/user_org_api.py @@ -149,15 +149,16 @@ def post_organization_user(request, payload: OrgUserCreate): # pylint: disable= @user_org_router.post("/login/", auth=None) def post_login(request, payload: LoginPayload): """Uses the username and password in the request to return a JWT auth token""" + username = payload.username.strip() serializer = CustomTokenObtainSerializer( data={ - "username": payload.username, + "username": username, "password": payload.password, } ) serializer.is_valid(raise_exception=True) token_data = serializer.validated_data - retval = orguserfunctions.lookup_user(payload.username) + retval = orguserfunctions.lookup_user(username) retval["token"] = token_data["access"] retval["refresh_token"] = token_data["refresh"] return retval @@ -633,9 +634,10 @@ def get_organization_wren(request): @user_org_router.post("/v2/login/", auth=None) def post_login_v2(request, payload: LoginPayload): """Login endpoint that sets httpOnly cookies instead of returning tokens in response""" + username = payload.username.strip() serializer = CustomTokenObtainSerializer( data={ - "username": payload.username, + "username": username, "password": payload.password, } ) @@ -643,7 +645,7 @@ def post_login_v2(request, payload: LoginPayload): token_data = serializer.validated_data # Get user data (same as v1) - retval = orguserfunctions.lookup_user(payload.username) + retval = orguserfunctions.lookup_user(username) # Create JsonResponse and set cookies response = JsonResponse(retval) diff --git a/ddpui/models/org_user.py b/ddpui/models/org_user.py index 2e41e8877..5fec79fd2 100644 --- a/ddpui/models/org_user.py +++ b/ddpui/models/org_user.py @@ -9,7 +9,7 @@ from django.contrib.auth.models import User from ninja import Schema -from pydantic import SecretStr, BaseModel, field_validator +from pydantic import SecretStr, BaseModel from ddpui.models.org import Org from ddpui.models.role_based_access import Role @@ -216,11 +216,6 @@ class LoginPayload(BaseModel): username: str password: str - @field_validator("username") - @classmethod - def strip_username_whitespace(cls, value: str) -> str: - return value.strip() - class LogoutPayload(BaseModel): """the payload for the login workflow"""