diff --git a/examples/crew_pbi_tool.json b/examples/crew_pbi_tool.json new file mode 100644 index 00000000..9447b9d3 --- /dev/null +++ b/examples/crew_pbi_tool.json @@ -0,0 +1,160 @@ +{ + "id": "1a6b7844-c651-4e88-b95d-84fcef04231a", + "name": "pbi_tool", + "agent_ids": [ + "b6e6638e-fbae-465d-b64e-031a7491f6a3" + ], + "task_ids": [ + "82defb03-9686-408b-a81f-e32be0d4ec4d" + ], + "nodes": [ + { + "id": "task-82defb03-9686-408b-a81f-e32be0d4ec4d", + "type": "taskNode", + "position": { + "x": 368, + "y": 68 + }, + "data": { + "label": "Run Job with Custom Parameters", + "role": null, + "goal": null, + "backstory": null, + "tools": [ + "71" + ], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + }, + "agentId": null, + "taskId": "82defb03-9686-408b-a81f-e32be0d4ec4d", + "llm": null, + "function_calling_llm": null, + "max_iter": null, + "max_rpm": null, + "max_execution_time": null, + "verbose": null, + "allow_delegation": null, + "cache": null, + "memory": true, + "embedder_config": null, + "system_template": null, + "prompt_template": null, + "response_template": null, + "allow_code_execution": null, + "code_execution_mode": null, + "max_retry_limit": null, + "use_system_prompt": null, + "respect_context_window": null, + "type": "task", + "description": "Execute job ID 365257288725339 ONE TIME. \n\nExecute the job with those parameters:\n- question: {question}\n- sample_size: 100\n- metadata: 'json'\n- databricks_host: 'https://e2-demo-field-eng.cloud.databricks.com/'\n\nI don't want you to list or get the job; I want you to run it once; you are not allowed to run more than once. Use PowerBITool to execute this query.\n\nIMPORTANT: You will make sure that the action run will only trigger 1 time and not more.", + "expected_output": "A job execution result containing the result data from running job ID 365257288725339 with the custom parameters. The output will include any result_data and various other parameters.", + "icon": null, + "advanced_config": null, + "config": { + "cache_response": false, + "cache_ttl": 3600, + "retry_on_fail": true, + "max_retries": 3, + "timeout": null, + "priority": 1, + "error_handling": "default", + "output_file": null, + "output_json": null, + "output_pydantic": null, + "validation_function": null, + "callback_function": null, + "human_input": false, + "markdown": false + }, + "context": [], + "async_execution": false, + "knowledge_sources": null, + "markdown": false + }, + "width": null, + "height": null, + "selected": null, + "positionAbsolute": null, + "dragging": null, + "style": null + }, + { + "id": "agent-b6e6638e-fbae-465d-b64e-031a7491f6a3", + "type": "agentNode", + "position": { + "x": 68, + "y": 73 + }, + "data": { + "label": "PowerBI Job Orchestrator", + "role": "PowerBI Job Manager", + "goal": "Orchestrate and manage PowerBI jobs efficiently.\n\nYou will make sure that the action run will only trigger 1 time and not more.", + "backstory": "Experienced in managing and optimizing Databricks workflows for PowerBI, with expertise in job scheduling and execution.", + "tools": [ + "72" + ], + "tool_configs": null, + "agentId": "b6e6638e-fbae-465d-b64e-031a7491f6a3", + "taskId": null, + "llm": "databricks-llama-4-maverick", + "function_calling_llm": null, + "max_iter": 25, + "max_rpm": 1, + "max_execution_time": 300, + "verbose": false, + "allow_delegation": false, + "cache": true, + "memory": true, + "embedder_config": { + "provider": "databricks", + "config": { + "model": "databricks-gte-large-en" + } + }, + "system_template": null, + "prompt_template": null, + "response_template": null, + "allow_code_execution": false, + "code_execution_mode": "safe", + "max_retry_limit": 3, + "use_system_prompt": true, + "respect_context_window": true, + "type": "agent", + "description": null, + "expected_output": null, + "icon": null, + "advanced_config": null, + "config": null, + "context": [], + "async_execution": false, + "knowledge_sources": [], + "markdown": false + }, + "width": null, + "height": null, + "selected": null, + "positionAbsolute": null, + "dragging": null, + "style": null + } + ], + "edges": [ + { + "source": "agent-b6e6638e-fbae-465d-b64e-031a7491f6a3", + "target": "task-82defb03-9686-408b-a81f-e32be0d4ec4d", + "id": "reactflow__edge-agent-2d21de66-438d-4a70-8be0-f16dd0a8d4aa-task-c2c71f3e-89e8-4786-84dd-fd0e2f063ad1-right-left", + "sourceHandle": null, + "targetHandle": null + } + ], + "created_at": "2025-12-03T16:09:08.320279", + "updated_at": "2025-12-03T16:09:08.320281" +} \ No newline at end of file diff --git a/examples/powerbi_full_pipeline.ipynb b/examples/powerbi_full_pipeline.ipynb new file mode 100644 index 00000000..5076e152 --- /dev/null +++ b/examples/powerbi_full_pipeline.ipynb @@ -0,0 +1,919 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c9771f72-fb59-4879-962a-eb40f27dbecb", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nPower BI Full Pipeline - Metadata Extraction, DAX Generation, and Execution\\n\\nThis notebook provides an end-to-end Power BI integration in one place:\\n1. Extract metadata from Power BI semantic model\\n2. Generate DAX query from natural language question using LLM\\n3. Execute the generated DAX query\\n4. Return all results\\n\\nRequired Parameters (via job_params):\\n- workspace_id: Power BI workspace ID\\n- semantic_model_id: Power BI semantic model/dataset ID\\n- question: Natural language question (e.g., \"What is the total NSR per product?\")\\n- auth_method: \"device_code\" or \"service_principal\" (default: \"device_code\")\\n\\nFor Service Principal auth, also provide:\\n- client_id: Azure AD application client ID\\n- tenant_id: Azure AD tenant ID\\n- client_secret: Service principal secret\\n\\nFor DAX Generation:\\n- databricks_host: Databricks workspace URL (e.g., \"https://example.databricks.com\")\\n- databricks_token: Databricks personal access token for LLM API\\n- model_name: LLM model to use (default: \"databricks-meta-llama-3-1-405b-instruct\")\\n- temperature: LLM temperature (default: 0.1)\\n\\nOptional Parameters:\\n- sample_size: Number of rows to sample per table for type inference (default: 100)\\n- skip_metadata: Skip metadata extraction if metadata is provided directly (default: False)\\n- metadata: Pre-extracted metadata (JSON string) - use with skip_metadata=True\\n'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + "Power BI Full Pipeline - Metadata Extraction, DAX Generation, and Execution\n", + "\n", + "This notebook provides an end-to-end Power BI integration in one place:\n", + "1. Extract metadata from Power BI semantic model\n", + "2. Generate DAX query from natural language question using LLM\n", + "3. Execute the generated DAX query\n", + "4. Return all results\n", + "\n", + "Required Parameters (via job_params):\n", + "- workspace_id: Power BI workspace ID\n", + "- semantic_model_id: Power BI semantic model/dataset ID\n", + "- question: Natural language question (e.g., \"What is the total NSR per product?\")\n", + "- auth_method: \"device_code\" or \"service_principal\" (default: \"device_code\")\n", + "\n", + "For Service Principal auth, also provide:\n", + "- client_id: Azure AD application client ID\n", + "- tenant_id: Azure AD tenant ID\n", + "- client_secret: Service principal secret\n", + "\n", + "For DAX Generation:\n", + "- databricks_host: Databricks workspace URL (e.g., \"https://example.databricks.com\")\n", + "- databricks_token: Databricks personal access token for LLM API\n", + "- model_name: LLM model to use (default: \"databricks-meta-llama-3-1-405b-instruct\")\n", + "- temperature: LLM temperature (default: 0.1)\n", + "\n", + "Optional Parameters:\n", + "- sample_size: Number of rows to sample per table for type inference (default: 100)\n", + "- skip_metadata: Skip metadata extraction if metadata is provided directly (default: False)\n", + "- metadata: Pre-extracted metadata (JSON string) - use with skip_metadata=True\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "fd471c22-271e-4931-8943-fa5affabeb2a", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install azure-identity requests pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "cd3198d7-81bb-433f-bfd2-f1f2a99629db", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "7dc50d3b-0900-4707-bee5-4438ddc37bd3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import required libraries\n", + "import json\n", + "import os\n", + "import re\n", + "import requests\n", + "import pandas as pd\n", + "from datetime import datetime\n", + "from typing import Dict, List, Any, Optional\n", + "from azure.identity import DeviceCodeCredential, ClientSecretCredential" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "aa356b0e-65a1-4698-8a32-37912d170af3", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Configuration - Get Job Parameters" + } + }, + "outputs": [], + "source": [ + "# Default configuration\n", + "DEFAULT_TENANT_ID = \"9f37a392-f0ae-4280-9796-f1864a10effc\"\n", + "DEFAULT_CLIENT_ID = \"1950a258-227b-4e31-a9cf-717495945fc2\"\n", + "DEFAULT_MODEL_NAME = \"databricks-meta-llama-3-1-405b-instruct\"\n", + "DEFAULT_TEMPERATURE = 0.1\n", + "\n", + "try:\n", + " # Get job parameters\n", + " job_params = json.loads(dbutils.widgets.get(\"job_params\"))\n", + "\n", + " # Extract required parameters\n", + " WORKSPACE_ID = job_params.get(\"workspace_id\")\n", + " SEMANTIC_MODEL_ID = job_params.get(\"semantic_model_id\")\n", + " QUESTION = job_params.get(\"question\")\n", + "\n", + " # Authentication configuration\n", + " AUTH_METHOD = job_params.get(\"auth_method\", \"device_code\")\n", + " TENANT_ID = job_params.get(\"tenant_id\", DEFAULT_TENANT_ID)\n", + " CLIENT_ID = job_params.get(\"client_id\", DEFAULT_CLIENT_ID)\n", + " CLIENT_SECRET = job_params.get(\"client_secret\")\n", + "\n", + " # Databricks API configuration for LLM\n", + " DATABRICKS_HOST = job_params.get(\"databricks_host\")\n", + " DATABRICKS_TOKEN = job_params.get(\"databricks_token\")\n", + " MODEL_NAME = job_params.get(\"model_name\", DEFAULT_MODEL_NAME)\n", + " TEMPERATURE = job_params.get(\"temperature\", DEFAULT_TEMPERATURE)\n", + "\n", + " # Optional parameters\n", + " SAMPLE_SIZE = job_params.get(\"sample_size\", 100)\n", + " SKIP_METADATA = job_params.get(\"skip_metadata\", False)\n", + " METADATA_JSON = job_params.get(\"metadata\")\n", + "\n", + " print(\"=\" * 80)\n", + " print(\"Power BI Full Pipeline - Metadata → DAX Generation → Execution\")\n", + " print(\"=\" * 80)\n", + " print(f\"Execution Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + " print(f\"Workspace ID: {WORKSPACE_ID}\")\n", + " print(f\"Semantic Model ID: {SEMANTIC_MODEL_ID}\")\n", + " print(f\"Question: {QUESTION}\")\n", + " print(f\"Authentication Method: {AUTH_METHOD}\")\n", + " print(f\"LLM Model: {MODEL_NAME}\")\n", + " print(f\"Temperature: {TEMPERATURE}\")\n", + " print(f\"Skip Metadata Extraction: {SKIP_METADATA}\")\n", + " print(\"=\" * 80)\n", + "\n", + "except Exception as e:\n", + " print(f\"āŒ Error getting parameters: {str(e)}\")\n", + " print(\"\\nRequired parameters in job_params:\")\n", + " print(\"- workspace_id: Power BI workspace ID\")\n", + " print(\"- semantic_model_id: Power BI dataset/semantic model ID\")\n", + " print(\"- question: Natural language question\")\n", + " print(\"- databricks_host: Databricks workspace URL\")\n", + " print(\"- databricks_token: Databricks personal access token\")\n", + " print(\"\\nOptional parameters:\")\n", + " print(\"- auth_method: 'device_code' or 'service_principal' (default: 'device_code')\")\n", + " print(\"- model_name: LLM model (default: 'databricks-meta-llama-3-1-405b-instruct')\")\n", + " print(\"- temperature: LLM temperature (default: 0.1)\")\n", + " print(\"- sample_size: Rows to sample (default: 100)\")\n", + " print(\"- skip_metadata: Skip metadata extraction (default: False)\")\n", + " print(\"- metadata: Pre-extracted metadata JSON (use with skip_metadata=True)\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "510775f1-4d0c-4e98-993d-36cf78a02fb8", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Authentication Functions" + } + }, + "outputs": [], + "source": [ + "def generate_token_device_code(tenant_id: str, client_id: str) -> str:\n", + " \"\"\"Generate token using device code flow (DCF).\"\"\"\n", + " try:\n", + " credential = DeviceCodeCredential(\n", + " client_id=client_id,\n", + " tenant_id=tenant_id,\n", + " )\n", + "\n", + " print(\"\\nšŸ”„ Initiating Device Code Flow authentication...\")\n", + " print(\"āš ļø Follow the instructions above to authenticate\")\n", + "\n", + " token = credential.get_token(\"https://analysis.windows.net/powerbi/api/.default\")\n", + "\n", + " print(\"āœ… Token generated successfully\")\n", + " return token.token\n", + "\n", + " except Exception as e:\n", + " print(f\"āŒ Token generation failed: {str(e)}\")\n", + " raise\n", + "\n", + "\n", + "def generate_token_service_principal(tenant_id: str, client_id: str, client_secret: str) -> str:\n", + " \"\"\"Generate token using Service Principal.\"\"\"\n", + " try:\n", + " print(\"\\nšŸ”„ Authenticating with Service Principal...\")\n", + "\n", + " credential = ClientSecretCredential(\n", + " tenant_id=tenant_id,\n", + " client_id=client_id,\n", + " client_secret=os.getenv(client_secret)\n", + " )\n", + "\n", + " token = credential.get_token(\"https://analysis.windows.net/powerbi/api/.default\")\n", + "\n", + " print(\"āœ… Token generated successfully\")\n", + " return token.token\n", + "\n", + " except Exception as e:\n", + " print(f\"āŒ Service Principal authentication failed: {str(e)}\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "b4074990-9c83-4326-b163-df9786f92e64", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Generate Access Token" + } + }, + "outputs": [], + "source": [ + "try:\n", + " if AUTH_METHOD == \"service_principal\":\n", + " if not CLIENT_SECRET:\n", + " raise ValueError(\"client_secret is required for service_principal authentication\")\n", + "\n", + " access_token = generate_token_service_principal(\n", + " tenant_id=TENANT_ID,\n", + " client_id=CLIENT_ID,\n", + " client_secret=CLIENT_SECRET\n", + " )\n", + " else: # device_code (default)\n", + " access_token = generate_token_device_code(\n", + " tenant_id=TENANT_ID,\n", + " client_id=CLIENT_ID\n", + " )\n", + "\n", + " print(f\"\\nāœ… Authentication successful!\")\n", + "\n", + "except Exception as e:\n", + " print(f\"\\nāŒ Authentication failed: {str(e)}\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "500967a8-75c4-4de9-8b8c-97f399276f8f", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Power BI API Functions" + } + }, + "outputs": [], + "source": [ + "def get_dataset_info(token: str, dataset_id: str) -> dict:\n", + " \"\"\"Get basic dataset information.\"\"\"\n", + " url = f\"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}\"\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " print(f\"šŸ”„ Fetching dataset information...\")\n", + " response = requests.get(url, headers=headers)\n", + "\n", + " if response.status_code == 200:\n", + " print(\"āœ… Dataset information retrieved\")\n", + " return response.json()\n", + " else:\n", + " print(f\"āŒ Failed to get dataset info: {response.text}\")\n", + " return {}\n", + "\n", + "\n", + "def execute_dax_for_metadata(token: str, dataset_id: str, dax_query: str) -> pd.DataFrame:\n", + " \"\"\"Execute a DAX query to retrieve metadata.\"\"\"\n", + " url = f\"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/executeQueries\"\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " body = {\n", + " \"queries\": [{\"query\": dax_query}],\n", + " \"serializerSettings\": {\"includeNulls\": True}\n", + " }\n", + "\n", + " print(f\" Executing query: {dax_query[:50]}...\")\n", + " response = requests.post(url, headers=headers, json=body, timeout=60)\n", + " print(f\" Response status: {response.status_code}\")\n", + "\n", + " if response.status_code == 200:\n", + " results = response.json().get(\"results\", [])\n", + " if results and results[0].get(\"tables\"):\n", + " rows = results[0][\"tables\"][0].get(\"rows\", [])\n", + " if rows:\n", + " return pd.DataFrame(rows)\n", + " else:\n", + " print(f\" āŒ Query failed: {response.text}\")\n", + "\n", + " return pd.DataFrame()\n", + "\n", + "\n", + "def extract_table_metadata_from_data(token: str, dataset_id: str, table_names: List[str], sample_size: int = 100) -> List[Dict[str, Any]]:\n", + " \"\"\"Extract column metadata by querying actual data from each table.\"\"\"\n", + " print(f\"\\nšŸ”„ Extracting metadata for {len(table_names)} tables...\")\n", + " print(f\" Using sample size: {sample_size} rows per table\\n\")\n", + "\n", + " tables_metadata = []\n", + "\n", + " for table_name in table_names:\n", + " print(f\" šŸ“Š Processing table: {table_name}\")\n", + "\n", + " # Query sample data to get column names and types\n", + " query = f\"EVALUATE TOPN({sample_size}, '{table_name}')\"\n", + " df = execute_dax_for_metadata(token, dataset_id, query)\n", + "\n", + " if df.empty:\n", + " print(f\" āš ļø Could not query table: {table_name} (may be empty or not exist)\")\n", + " continue\n", + "\n", + " # Extract column information from DataFrame\n", + " columns = []\n", + " for col_name in df.columns:\n", + " # Remove the table name prefix and square brackets from column names\n", + " clean_name = col_name.strip(table_name).strip('[').strip(']')\n", + "\n", + " # Infer data type from pandas dtype based on actual data\n", + " dtype = str(df[col_name].dtype)\n", + "\n", + " if 'object' in dtype or 'string' in dtype:\n", + " data_type = 'string'\n", + " elif 'int' in dtype:\n", + " data_type = 'int'\n", + " elif 'float' in dtype or 'decimal' in dtype:\n", + " data_type = 'decimal'\n", + " elif 'datetime' in dtype:\n", + " data_type = 'datetime'\n", + " elif 'bool' in dtype:\n", + " data_type = 'boolean'\n", + " else:\n", + " data_type = 'string'\n", + "\n", + " columns.append({\n", + " \"name\": clean_name,\n", + " \"data_type\": data_type\n", + " })\n", + "\n", + " tables_metadata.append({\n", + " \"name\": table_name,\n", + " \"columns\": columns\n", + " })\n", + "\n", + " print(f\" āœ… Found {len(columns)} columns in '{table_name}'\")\n", + "\n", + " print(f\"\\nāœ… Successfully processed {len(tables_metadata)}/{len(table_names)} tables\")\n", + "\n", + " return tables_metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "4d165432-620a-495b-ae1a-9424e4b2ebe2", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "STEP 1: Extract Metadata (if not skipped)" + } + }, + "outputs": [], + "source": [ + "if SKIP_METADATA and METADATA_JSON:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"STEP 1: Using Pre-Extracted Metadata\")\n", + " print(\"=\" * 80)\n", + " metadata = json.loads(METADATA_JSON)\n", + " tables_metadata = metadata.get(\"tables\", [])\n", + " print(f\"āœ… Loaded metadata with {len(tables_metadata)} tables\")\n", + "\n", + " # Create compact metadata\n", + " compact_metadata = {\n", + " \"tables\": [\n", + " {\n", + " \"name\": table[\"name\"],\n", + " \"columns\": [\n", + " {\"name\": col[\"name\"], \"data_type\": col[\"data_type\"]}\n", + " for col in table[\"columns\"]\n", + " ]\n", + " }\n", + " for table in tables_metadata\n", + " ]\n", + " }\n", + "\n", + " dataset_info = {\"name\": \"pre-loaded\"}\n", + " total_columns = sum(len(table[\"columns\"]) for table in tables_metadata)\n", + "\n", + "else:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"STEP 1: Extracting Metadata from Power BI\")\n", + " print(\"=\" * 80)\n", + "\n", + " try:\n", + " # Get dataset info\n", + " dataset_info = get_dataset_info(access_token, SEMANTIC_MODEL_ID)\n", + "\n", + " if dataset_info:\n", + " print(f\"Dataset Name: {dataset_info.get('name', 'N/A')}\")\n", + "\n", + " # Discover tables\n", + " print(\"\\nšŸ”„ Discovering tables in dataset...\")\n", + " tables_df = execute_dax_for_metadata(access_token, SEMANTIC_MODEL_ID, \"EVALUATE INFO.VIEW.TABLES()\")\n", + "\n", + " if tables_df.empty:\n", + " raise ValueError(\"No tables found in dataset\")\n", + "\n", + " # Extract unique table names\n", + " table_names = list(set(tables_df['[Name]']))\n", + " print(f\"Found {len(table_names)} tables: {', '.join(table_names[:5])}\")\n", + "\n", + " # Extract metadata\n", + " tables_metadata = extract_table_metadata_from_data(\n", + " token=access_token,\n", + " dataset_id=SEMANTIC_MODEL_ID,\n", + " table_names=table_names,\n", + " sample_size=SAMPLE_SIZE\n", + " )\n", + "\n", + " # Build metadata structure\n", + " metadata = {\"tables\": tables_metadata}\n", + "\n", + " # Create compact metadata\n", + " compact_metadata = {\n", + " \"tables\": [\n", + " {\n", + " \"name\": table[\"name\"],\n", + " \"columns\": [\n", + " {\"name\": col[\"name\"], \"data_type\": col[\"data_type\"]}\n", + " for col in table[\"columns\"]\n", + " ]\n", + " }\n", + " for table in tables_metadata\n", + " ]\n", + " }\n", + "\n", + " total_columns = sum(len(table[\"columns\"]) for table in tables_metadata)\n", + "\n", + " print(f\"\\nāœ… STEP 1 Complete: Extracted {len(tables_metadata)} tables, {total_columns} columns\")\n", + "\n", + " except Exception as e:\n", + " print(f\"āŒ Error in metadata extraction: {str(e)}\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "a75da11e-cd60-4bf2-b9a5-6add3541cdbd", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Metadata Formatting for LLM" + } + }, + "outputs": [], + "source": [ + "def format_metadata_for_llm(metadata: Dict[str, Any]) -> str:\n", + " \"\"\"Format metadata as a readable string for LLM context.\"\"\"\n", + " tables = metadata.get('tables', [])\n", + " if not tables:\n", + " return \"No metadata available\"\n", + "\n", + " output = \"Power BI Dataset Structure:\\n\\n\"\n", + "\n", + " for table in tables:\n", + " table_name = table.get('name', 'Unknown')\n", + " columns = table.get('columns', [])\n", + "\n", + " output += f\"Table: {table_name}\\n\"\n", + "\n", + " if columns:\n", + " output += \"Columns:\\n\"\n", + " for col in columns:\n", + " col_name = col.get('name', 'Unknown')\n", + " col_type = col.get('data_type', 'Unknown')\n", + " output += f\" - {col_name} ({col_type})\\n\"\n", + "\n", + " output += \"\\n\"\n", + "\n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "1553277d-abd8-4685-a8b3-c58db2e8211e", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "DAX Query Cleaning Utility" + } + }, + "outputs": [], + "source": [ + "def clean_dax_query(dax_query: str) -> str:\n", + " \"\"\"Remove HTML/XML tags and other artifacts from DAX queries.\"\"\"\n", + " # Remove HTML/XML tags like , , etc.\n", + " cleaned = re.sub(r\"<[^>]+>\", \"\", dax_query)\n", + " # Collapse extra whitespace\n", + " cleaned = \" \".join(cleaned.split())\n", + " return cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "1c2d1a10-fec5-45ef-97b5-726b2691d605", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "STEP 2: Generate DAX Query from Question" + } + }, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 2: Generating DAX Query from Natural Language Question\")\n", + "print(\"=\" * 80)\n", + "print(f\"Question: {QUESTION}\")\n", + "\n", + "try:\n", + " # Format metadata for LLM\n", + " metadata_str = format_metadata_for_llm(metadata)\n", + "\n", + " # Build prompt for DAX generation\n", + " prompt = f\"\"\"You are a Power BI DAX expert. Generate a DAX query to answer the following question.\n", + "\n", + "Available dataset structure:\n", + "{metadata_str}\n", + "\n", + "User question: {QUESTION}\n", + "\n", + "IMPORTANT RULES:\n", + "1. Generate only the DAX query without any explanation or markdown\n", + "2. Do NOT use any HTML or XML tags in the query\n", + "3. Do NOT use angle brackets < or > except for DAX operators\n", + "4. Use only valid DAX syntax\n", + "5. Reference only columns and measures that exist in the schema\n", + "6. The query should be executable as-is\n", + "7. Use proper DAX functions like EVALUATE, SUMMARIZE, FILTER, CALCULATE, etc.\n", + "8. Start the query with EVALUATE\n", + "\n", + "Example format:\n", + "EVALUATE SUMMARIZE(Sales, Product[Category], \"Total Revenue\", SUM(Sales[Amount]))\n", + "\n", + "Now generate the DAX query for the user's question:\"\"\"\n", + "\n", + " print(f\"\\nšŸ”„ Calling LLM: {MODEL_NAME}\")\n", + " print(f\" Temperature: {TEMPERATURE}\")\n", + "\n", + " # Call Databricks LLM API\n", + " llm_url = f\"{DATABRICKS_HOST}/serving-endpoints/{MODEL_NAME}/invocations\"\n", + " llm_headers = {\n", + " \"Authorization\": f\"Bearer {os.getenv(DATABRICKS_TOKEN)}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " llm_body = {\n", + " \"messages\": [\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ],\n", + " \"temperature\": TEMPERATURE,\n", + " \"max_tokens\": 2000\n", + " }\n", + "\n", + " print(f\" Endpoint: {llm_url}\")\n", + " llm_response = requests.post(llm_url, headers=llm_headers, json=llm_body, timeout=120)\n", + "\n", + " if llm_response.status_code != 200:\n", + " raise Exception(f\"LLM API call failed: {llm_response.text}\")\n", + "\n", + " llm_result = llm_response.json()\n", + "\n", + " # Extract content from response\n", + " if \"choices\" in llm_result and len(llm_result[\"choices\"]) > 0:\n", + " raw_dax = llm_result[\"choices\"][0][\"message\"][\"content\"]\n", + " else:\n", + " raise Exception(f\"Unexpected LLM response format: {llm_result}\")\n", + "\n", + " print(\"āœ… LLM response received\")\n", + "\n", + " # Clean the response\n", + " cleaned_dax = clean_dax_query(raw_dax)\n", + "\n", + " # Remove markdown code blocks if present\n", + " if \"```\" in cleaned_dax:\n", + " parts = cleaned_dax.split(\"```\")\n", + " for part in parts:\n", + " if \"EVALUATE\" in part.upper():\n", + " cleaned_dax = part.strip()\n", + " # Remove language identifier if present\n", + " if cleaned_dax.startswith(\"dax\\n\") or cleaned_dax.startswith(\"DAX\\n\"):\n", + " cleaned_dax = cleaned_dax[4:].strip()\n", + " break\n", + "\n", + " # Ensure query starts with EVALUATE\n", + " if not cleaned_dax.strip().upper().startswith(\"EVALUATE\"):\n", + " lines = cleaned_dax.split(\"\\n\")\n", + " for i, line in enumerate(lines):\n", + " if \"EVALUATE\" in line.upper():\n", + " cleaned_dax = \"\\n\".join(lines[i:])\n", + " break\n", + "\n", + " DAX_QUERY = cleaned_dax.strip()\n", + "\n", + " print(\"\\n\" + \"-\" * 80)\n", + " print(\"Generated DAX Query:\")\n", + " print(\"-\" * 80)\n", + " print(DAX_QUERY)\n", + " print(\"-\" * 80)\n", + "\n", + " print(f\"\\nāœ… STEP 2 Complete: DAX query generated successfully\")\n", + "\n", + "except Exception as e:\n", + " print(f\"āŒ Error in DAX generation: {str(e)}\")\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "16b2b932-dc5a-497c-a135-a971cbf233a2", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "STEP 3: Execute DAX Query" + } + }, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"STEP 3: Executing DAX Query\")\n", + "print(\"=\" * 80)\n", + "\n", + "def execute_dax_query(token: str, dataset_id: str, dax_query: str) -> pd.DataFrame:\n", + " \"\"\"Execute a DAX query against the Power BI dataset using REST API.\"\"\"\n", + " url = f\"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/executeQueries\"\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " body = {\n", + " \"queries\": [\n", + " {\n", + " \"query\": dax_query\n", + " }\n", + " ],\n", + " \"serializerSettings\": {\n", + " \"includeNulls\": True\n", + " }\n", + " }\n", + "\n", + " print(f\"\\nšŸ”„ Executing DAX query...\")\n", + " print(f\" Endpoint: {url}\")\n", + " print(f\" Query preview: {dax_query[:100]}...\")\n", + "\n", + " response = requests.post(url, headers=headers, json=body, timeout=60)\n", + " print(f\" Response status: {response.status_code}\")\n", + "\n", + " if response.status_code == 200:\n", + " results = response.json().get(\"results\", [])\n", + "\n", + " if results and results[0].get(\"tables\"):\n", + " rows = results[0][\"tables\"][0].get(\"rows\", [])\n", + "\n", + " if rows:\n", + " df = pd.DataFrame(rows)\n", + " print(f\"āœ… Query successful: {len(df)} rows returned\")\n", + " print(f\" Columns: {list(df.columns)}\")\n", + " return df\n", + " else:\n", + " print(\"āš ļø Query returned no rows\")\n", + " return pd.DataFrame()\n", + " else:\n", + " print(\"āš ļø No tables in response\")\n", + " return pd.DataFrame()\n", + " else:\n", + " print(f\"āŒ Query failed: {response.text}\")\n", + " return pd.DataFrame()\n", + "\n", + "try:\n", + " df_result = execute_dax_query(access_token, SEMANTIC_MODEL_ID, DAX_QUERY)\n", + "\n", + " if not df_result.empty:\n", + " print(\"\\n\" + \"=\" * 80)\n", + " print(\"Query Results\")\n", + " print(\"=\" * 80)\n", + " print(f\"Total rows: {len(df_result)}\")\n", + " print(f\"Columns: {list(df_result.columns)}\")\n", + " print(\"=\" * 80)\n", + "\n", + " # Display first few rows\n", + " print(\"\\nSample Results (First 10 rows):\")\n", + " print(\"-\" * 80)\n", + " display(df_result.head(10))\n", + "\n", + " if len(df_result) > 10:\n", + " print(f\"\\n... and {len(df_result) - 10} more rows\")\n", + "\n", + " # Convert to Spark DataFrame\n", + " print(\"\\nšŸ”„ Converting results to Spark DataFrame...\")\n", + " spark_df = spark.createDataFrame(df_result)\n", + " print(f\"āœ… Spark DataFrame created successfully\")\n", + "\n", + " print(f\"\\nāœ… STEP 3 Complete: Query executed, {len(df_result)} rows returned\")\n", + "\n", + " else:\n", + " print(\"āš ļø Query returned empty DataFrame\")\n", + " spark_df = None\n", + " print(f\"\\nāš ļø STEP 3 Complete: No results returned\")\n", + "\n", + "except Exception as e:\n", + " print(f\"āŒ Error executing DAX query: {str(e)}\")\n", + " df_result = pd.DataFrame()\n", + " spark_df = None\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "5b65608b-334e-4475-9b47-82cf92507ee6", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Execution Summary" + } + }, + "outputs": [], + "source": [ + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"FULL PIPELINE EXECUTION SUMMARY\")\n", + "print(\"=\" * 80)\n", + "print(f\"āœ… Pipeline Completed Successfully\")\n", + "print(f\" Execution Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + "print(f\" Workspace ID: {WORKSPACE_ID}\")\n", + "print(f\" Semantic Model ID: {SEMANTIC_MODEL_ID}\")\n", + "print(f\" Dataset Name: {dataset_info.get('name', 'N/A')}\")\n", + "print(\"\")\n", + "print(f\"STEP 1 - Metadata Extraction:\")\n", + "print(f\" Tables: {len(tables_metadata)}\")\n", + "print(f\" Total Columns: {total_columns}\")\n", + "print(\"\")\n", + "print(f\"STEP 2 - DAX Generation:\")\n", + "print(f\" Question: {QUESTION}\")\n", + "print(f\" Model: {MODEL_NAME}\")\n", + "print(f\" Generated Query Length: {len(DAX_QUERY)} characters\")\n", + "print(\"\")\n", + "print(f\"STEP 3 - DAX Execution:\")\n", + "print(f\" Authentication: {AUTH_METHOD}\")\n", + "print(f\" Rows Returned: {len(df_result) if not df_result.empty else 0}\")\n", + "print(f\" Columns: {list(df_result.columns) if not df_result.empty else 'N/A'}\")\n", + "print(\"=\" * 80)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "5d663111-5ea7-4b8a-afb2-3bc8e176b428", + "showTitle": true, + "tableResultSettingsMap": {}, + "title": "Return Results as JSON" + } + }, + "outputs": [], + "source": [ + "# Convert DataFrame to JSON for output\n", + "if not df_result.empty:\n", + " # Convert to list of dictionaries\n", + " result_data = df_result.to_dict(orient='records')\n", + "else:\n", + " result_data = []\n", + "\n", + "# Build complete result summary\n", + "result_summary = {\n", + " \"status\": \"success\",\n", + " \"execution_time\": datetime.now().isoformat(),\n", + " \"pipeline_steps\": {\n", + " \"step_1_metadata\": {\n", + " \"tables_count\": len(tables_metadata),\n", + " \"columns_count\": total_columns,\n", + " \"metadata\": metadata,\n", + " \"compact_metadata\": compact_metadata\n", + " },\n", + " \"step_2_dax_generation\": {\n", + " \"question\": QUESTION,\n", + " \"model_name\": MODEL_NAME,\n", + " \"temperature\": TEMPERATURE,\n", + " \"generated_dax\": DAX_QUERY\n", + " },\n", + " \"step_3_execution\": {\n", + " \"workspace_id\": WORKSPACE_ID,\n", + " \"semantic_model_id\": SEMANTIC_MODEL_ID,\n", + " \"auth_method\": AUTH_METHOD,\n", + " \"rows_returned\": len(df_result) if not df_result.empty else 0,\n", + " \"columns\": list(df_result.columns) if not df_result.empty else [],\n", + " \"result_data\": result_data[:1000] # Limit to first 1000 rows for JSON output\n", + " }\n", + " },\n", + " \"dataset_name\": dataset_info.get('name', 'unknown')\n", + "}\n", + "\n", + "# Exit with complete results\n", + "dbutils.notebook.exit(json.dumps(result_summary))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "powerbi_full_pipeline", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/src/backend/migrations/versions/20251006_add_powerbi_config.py b/src/backend/migrations/versions/20251006_add_powerbi_config.py new file mode 100644 index 00000000..0511a3a1 --- /dev/null +++ b/src/backend/migrations/versions/20251006_add_powerbi_config.py @@ -0,0 +1,52 @@ +"""Add PowerBI configuration table + +Revision ID: 20251006_add_powerbi +Revises: 92fd57c71d02 +Create Date: 2025-10-06 00:00:00.000000 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import inspect + + +# revision identifiers, used by Alembic. +revision = '20251006_add_powerbi' +down_revision = '92fd57c71d02' +branch_labels = None +depends_on = None + + +def upgrade(): + """Create powerbiconfig table for Power BI integration.""" + + conn = op.get_bind() + inspector = sa.inspect(conn) + + if 'powerbiconfig' not in inspector.get_table_names(): + op.create_table('powerbiconfig', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('tenant_id', sa.String(), nullable=False), + sa.Column('client_id', sa.String(), nullable=False), + sa.Column('encrypted_client_secret', sa.String(), nullable=True), + sa.Column('workspace_id', sa.String(), nullable=True), + sa.Column('semantic_model_id', sa.String(), nullable=True), + sa.Column('encrypted_username', sa.String(), nullable=True), + sa.Column('encrypted_password', sa.String(), nullable=True), + sa.Column('is_active', sa.Boolean(), default=True), + sa.Column('is_enabled', sa.Boolean(), default=True), + sa.Column('group_id', sa.String(length=100), nullable=True), + sa.Column('created_by_email', sa.String(length=255), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True) + ) + op.create_index('ix_powerbiconfig_group_id', 'powerbiconfig', ['group_id'], unique=False) + op.create_index('ix_powerbiconfig_created_by_email', 'powerbiconfig', ['created_by_email'], unique=False) + + +def downgrade(): + """Remove powerbiconfig table.""" + + op.drop_index('ix_powerbiconfig_created_by_email', table_name='powerbiconfig') + op.drop_index('ix_powerbiconfig_group_id', table_name='powerbiconfig') + op.drop_table('powerbiconfig') diff --git a/src/backend/migrations/versions/538129a124e7_add_auth_method_to_powerbi_config.py b/src/backend/migrations/versions/538129a124e7_add_auth_method_to_powerbi_config.py new file mode 100644 index 00000000..887cff87 --- /dev/null +++ b/src/backend/migrations/versions/538129a124e7_add_auth_method_to_powerbi_config.py @@ -0,0 +1,28 @@ +"""add_auth_method_to_powerbi_config + +Revision ID: 538129a124e7 +Revises: 20251006_add_powerbi +Create Date: 2025-10-07 11:40:53.597866 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '538129a124e7' +down_revision: Union[str, None] = '20251006_add_powerbi' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add auth_method column to powerbiconfig table + op.add_column('powerbiconfig', sa.Column('auth_method', sa.String(), nullable=False, server_default='username_password')) + + +def downgrade() -> None: + # Remove auth_method column from powerbiconfig table + op.drop_column('powerbiconfig', 'auth_method') \ No newline at end of file diff --git a/src/backend/src/api/__init__.py b/src/backend/src/api/__init__.py index 5989abc7..856d1d77 100644 --- a/src/backend/src/api/__init__.py +++ b/src/backend/src/api/__init__.py @@ -6,6 +6,7 @@ from src.api.crews_export_router import router as crews_export_router from src.api.databricks_router import router as databricks_router from src.api.databricks_knowledge_router import router as databricks_knowledge_router +from src.api.powerbi_router import router as powerbi_router from src.api.flows_router import router as flows_router from src.api.healthcheck_router import router as healthcheck_router from src.api.logs_router import router as logs_router @@ -56,6 +57,7 @@ api_router.include_router(crews_export_router) api_router.include_router(databricks_router) api_router.include_router(databricks_knowledge_router) +api_router.include_router(powerbi_router) api_router.include_router(flows_router) api_router.include_router(healthcheck_router) api_router.include_router(logs_router) @@ -105,6 +107,7 @@ "crews_export_router", "databricks_router", "databricks_knowledge_router", + "powerbi_router", "flows_router", "healthcheck_router", "logs_router", diff --git a/src/backend/src/api/powerbi_router.py b/src/backend/src/api/powerbi_router.py new file mode 100644 index 00000000..7a501736 --- /dev/null +++ b/src/backend/src/api/powerbi_router.py @@ -0,0 +1,211 @@ +from typing import Dict, Annotated +import logging + +from fastapi import APIRouter, Depends, HTTPException + +from src.schemas.powerbi_config import ( + PowerBIConfigCreate, + PowerBIConfigResponse, + DAXQueryRequest, + DAXQueryResponse +) +from src.services.powerbi_service import PowerBIService +from src.core.dependencies import SessionDep, GroupContextDep +from src.core.permissions import is_workspace_admin + +router = APIRouter( + prefix="/powerbi", + tags=["powerbi"], + responses={404: {"description": "Not found"}}, +) + +logger = logging.getLogger(__name__) + + +# Dependency to get PowerBIService +def get_powerbi_service( + session: SessionDep, + group_context: GroupContextDep +) -> PowerBIService: + """ + Get a properly initialized PowerBIService instance with group context. + + Args: + session: Database session from dependency injection + group_context: Group context for multi-tenant filtering + + Returns: + Initialized PowerBIService with all dependencies + """ + # Get group_id from context + group_id = group_context.primary_group_id if group_context else None + + # Create service with session and group context + service = PowerBIService(session, group_id=group_id) + + return service + + +# Type alias for cleaner function signatures +PowerBIServiceDep = Annotated[PowerBIService, Depends(get_powerbi_service)] + + +@router.post("/config", response_model=Dict) +async def set_powerbi_config( + request: PowerBIConfigCreate, + group_context: GroupContextDep, + service: PowerBIServiceDep, +): + """ + Set Power BI configuration. + Only workspace admins can set Power BI configuration for their workspace. + + Args: + request: Configuration data + group_context: Group context for multi-tenant operations + service: Power BI service + + Returns: + Success response with configuration + """ + # Check permissions - only workspace admins can set Power BI configuration + if not is_workspace_admin(group_context): + raise HTTPException( + status_code=403, + detail="Only workspace admins can set Power BI configuration" + ) + + try: + # Get user email from group context + created_by_email = group_context.group_email if group_context else None + + # Get group ID + group_id = group_context.primary_group_id if group_context else None + + # Create configuration data + config_data = request.model_dump() + config_data['group_id'] = group_id + config_data['created_by_email'] = created_by_email + + # Create configuration using repository + config = await service.repository.create_config(config_data) + + return { + "message": "Power BI configuration saved successfully", + "config": { + "tenant_id": config.tenant_id, + "client_id": config.client_id, + "workspace_id": config.workspace_id, + "semantic_model_id": config.semantic_model_id, + "is_enabled": config.is_enabled, + "is_active": config.is_active, + } + } + except Exception as e: + logger.error(f"Error setting Power BI configuration: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Error setting Power BI configuration: {str(e)}") + + +@router.get("/config", response_model=PowerBIConfigResponse) +async def get_powerbi_config( + group_context: GroupContextDep, + service: PowerBIServiceDep, +): + """ + Get current Power BI configuration. + + Args: + group_context: Group context for multi-tenant operations + service: Power BI service + + Returns: + Current Power BI configuration + """ + try: + config = await service.repository.get_active_config(group_id=service.group_id) + if not config: + # Return a default empty configuration + return PowerBIConfigResponse( + tenant_id="", + client_id="", + workspace_id=None, + semantic_model_id=None, + enabled=False + ) + + return PowerBIConfigResponse( + tenant_id=config.tenant_id, + client_id=config.client_id, + workspace_id=config.workspace_id, + semantic_model_id=config.semantic_model_id, + enabled=config.is_enabled + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting Power BI configuration: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Error getting Power BI configuration: {str(e)}") + + +@router.post("/query", response_model=DAXQueryResponse) +async def execute_dax_query( + request: DAXQueryRequest, + group_context: GroupContextDep, + service: PowerBIServiceDep, +): + """ + Execute a DAX query against a Power BI semantic model. + + Args: + request: DAX query request with query and optional semantic model ID + group_context: Group context for multi-tenant operations + service: Power BI service + + Returns: + Query execution results + """ + try: + response = await service.execute_dax_query(request) + return response + except HTTPException: + raise + except Exception as e: + logger.error(f"Error executing DAX query: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Error executing DAX query: {str(e)}") + + +@router.get("/status", response_model=Dict) +async def check_powerbi_status( + group_context: GroupContextDep, + service: PowerBIServiceDep, +): + """ + Check Power BI integration status. + + Args: + group_context: Group context for multi-tenant operations + service: Power BI service + + Returns: + Status information about Power BI integration + """ + try: + config = await service.repository.get_active_config(group_id=service.group_id) + + if not config: + return { + "configured": False, + "enabled": False, + "message": "Power BI is not configured. Please configure connection settings." + } + + return { + "configured": True, + "enabled": config.is_enabled, + "workspace_id": config.workspace_id, + "semantic_model_id": config.semantic_model_id, + "message": "Power BI is configured and ready" if config.is_enabled else "Power BI is configured but disabled" + } + except Exception as e: + logger.error(f"Error checking Power BI status: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Error checking Power BI status: {str(e)}") diff --git a/src/backend/src/core/unit_of_work.py b/src/backend/src/core/unit_of_work.py index 7992912b..7169bde7 100644 --- a/src/backend/src/core/unit_of_work.py +++ b/src/backend/src/core/unit_of_work.py @@ -15,6 +15,7 @@ from src.repositories.task_tracking_repository import TaskTrackingRepository from src.repositories.schema_repository import SchemaRepository from src.repositories.databricks_config_repository import DatabricksConfigRepository +from src.repositories.powerbi_config_repository import PowerBIConfigRepository from src.repositories.mcp_repository import MCPServerRepository, MCPSettingsRepository from src.repositories.engine_config_repository import EngineConfigRepository from src.repositories.memory_backend_repository import MemoryBackendRepository @@ -40,6 +41,7 @@ def __init__(self): self.task_tracking_repository: Optional[TaskTrackingRepository] = None self.schema_repository: Optional[SchemaRepository] = None self.databricks_config_repository: Optional[DatabricksConfigRepository] = None + self.powerbi_config_repository: Optional[PowerBIConfigRepository] = None self.mcp_server_repository: Optional[MCPServerRepository] = None self.mcp_settings_repository: Optional[MCPSettingsRepository] = None self.engine_config_repository: Optional[EngineConfigRepository] = None @@ -65,6 +67,7 @@ async def __aenter__(self): self.task_tracking_repository = TaskTrackingRepository(session) self.schema_repository = SchemaRepository(session) self.databricks_config_repository = DatabricksConfigRepository(session) + self.powerbi_config_repository = PowerBIConfigRepository(session) self.mcp_server_repository = MCPServerRepository(session) self.mcp_settings_repository = MCPSettingsRepository(session) self.engine_config_repository = EngineConfigRepository(session) @@ -110,6 +113,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): self.task_tracking_repository = None self.schema_repository = None self.databricks_config_repository = None + self.powerbi_config_repository = None self.mcp_server_repository = None self.mcp_settings_repository = None self.engine_config_repository = None @@ -159,6 +163,7 @@ def __init__(self): self.task_tracking_repository = None self.schema_repository = None self.databricks_config_repository = None + self.powerbi_config_repository = None self.mcp_server_repository = None self.mcp_settings_repository = None self.engine_config_repository = None @@ -180,6 +185,7 @@ def initialize(self): self.task_tracking_repository = TaskTrackingRepository(None) self.schema_repository = SchemaRepository(self._session) self.databricks_config_repository = DatabricksConfigRepository(self._session) + self.powerbi_config_repository = PowerBIConfigRepository(self._session) self.mcp_server_repository = MCPServerRepository(self._session) self.mcp_settings_repository = MCPSettingsRepository(self._session) self.engine_config_repository = EngineConfigRepository(self._session) diff --git a/src/backend/src/engines/crewai/helpers/task_helpers.py b/src/backend/src/engines/crewai/helpers/task_helpers.py index cbae0dbf..bf090dd5 100644 --- a/src/backend/src/engines/crewai/helpers/task_helpers.py +++ b/src/backend/src/engines/crewai/helpers/task_helpers.py @@ -309,7 +309,7 @@ async def create_task( tool_override = task_tool_configs.get(tool_name, {}) # Debug logging for tool configs - if tool_name in ["GenieTool", "SerperDevTool", "DatabricksKnowledgeSearchTool"]: + if tool_name in ["GenieTool", "SerperDevTool", "DatabricksKnowledgeSearchTool", "PowerBIAnalysisTool"]: logger.info(f"Task {task_key} - {tool_name} task_tool_configs: {task_tool_configs}") logger.info(f"Task {task_key} - {tool_name} tool_override: {tool_override}") diff --git a/src/backend/src/engines/crewai/logging_config.py b/src/backend/src/engines/crewai/logging_config.py index 407f9a72..55ce120f 100644 --- a/src/backend/src/engines/crewai/logging_config.py +++ b/src/backend/src/engines/crewai/logging_config.py @@ -573,6 +573,8 @@ def configure_subprocess_logging(execution_id: str, process_type: str = "crew"): 'src.services.trace_queue', # Add trace queue logger 'src.engines.crewai.execution_runner', # Add execution runner logger 'src.services.databricks_knowledge_service', # Add knowledge service logger for search debugging + 'src.engines.crewai.tools.custom.powerbi_analysis_tool', # Add PowerBI tool logger + 'src.engines.crewai.tools.custom.databricks_jobs_tool', # Add Databricks jobs tool logger '__main__' # For any direct logging in subprocess ]: module_logger = get_logger(logger_name) diff --git a/src/backend/src/engines/crewai/tools/custom/databricks_jobs_tool.py b/src/backend/src/engines/crewai/tools/custom/databricks_jobs_tool.py index 6e087556..196abbd8 100644 --- a/src/backend/src/engines/crewai/tools/custom/databricks_jobs_tool.py +++ b/src/backend/src/engines/crewai/tools/custom/databricks_jobs_tool.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Type, Union from datetime import datetime import time +from concurrent.futures import ThreadPoolExecutor import aiohttp from crewai.tools import BaseTool @@ -14,11 +15,45 @@ logger = logging.getLogger(__name__) +# Thread pool executor for running async operations from sync context +_EXECUTOR = ThreadPoolExecutor(max_workers=5) + # Global execution tracking dictionaries (outside of class to avoid Pydantic field interpretation) _GLOBAL_RUN_EXECUTIONS: Dict[str, str] = {} _GLOBAL_CREATE_EXECUTIONS: Dict[str, str] = {} +def _run_async_in_sync_context(coro): + """ + Safely run an async coroutine from a synchronous context. + + This handles the case where we're already in an event loop (e.g., FastAPI) + and need to execute async code from a sync function (e.g., CrewAI tool's _run method). + + Args: + coro: The coroutine to execute + + Returns: + The result of the coroutine execution + """ + try: + # Try to get the current running loop + loop = asyncio.get_running_loop() + # We're already in an async context, run in executor to avoid nested loop issues + logger.debug("Detected running event loop, using ThreadPoolExecutor") + future = _EXECUTOR.submit(asyncio.run, coro) + return future.result() + except RuntimeError: + # No event loop running, we can safely create one + logger.debug("No running event loop detected, creating new loop") + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + + class DatabricksJobsToolSchema(BaseModel): """Input schema for DatabricksJobsTool.""" @@ -641,80 +676,74 @@ def _run(self, **kwargs: Any) -> str: name_filter=name_filter, job_params=job_params ) - - # Execute the requested action - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - if action == "list": - result = loop.run_until_complete(self._list_jobs(limit, name_filter)) - self._action_usage_counts[action] += 1 - elif action == "list_my_jobs": - result = loop.run_until_complete(self._list_my_jobs(limit, name_filter)) - self._action_usage_counts[action] += 1 - elif action == "get": - result = loop.run_until_complete(self._get_job(job_id)) - self._action_usage_counts[action] += 1 - elif action == "get_notebook": - result = loop.run_until_complete(self._get_notebook_content(job_id)) - self._action_usage_counts[action] += 1 - elif action == "run": - result = loop.run_until_complete(self._run_job(job_id, job_params)) - - # SINGLE EXECUTION TRACKING: Track successful run execution - if result and "Successfully triggered job" in result: - # Extract run_id from the result - import re - run_id_match = re.search(r'Run ID: (\d+)', result) - if run_id_match: - new_run_id = run_id_match.group(1) - param_hash = DatabricksJobsTool._deterministic_hash(job_params) if job_params else 'no_params' - execution_key = f"run_{job_id}_{param_hash}" - _GLOBAL_RUN_EXECUTIONS[execution_key] = new_run_id - self.current_usage_count += 1 - self._action_usage_counts[action] += 1 - logger.info(f"[SINGLE_EXECUTION] Tracked successful run: job_id={job_id}, run_id={new_run_id}, action_usage={self._action_usage_counts[action]}, total_tracked_runs={len(_GLOBAL_RUN_EXECUTIONS)}") - - # Add execution tracking info to result - result += f"\n\nšŸ”’ EXECUTION TRACKING: This tool will prevent duplicate runs of this job with these parameters." - action_limit = self._action_limits.get(action) - if action_limit is not None: - result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/{action_limit}" - else: - result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/unlimited" - - elif action == "monitor": - result = loop.run_until_complete(self._monitor_run(run_id)) - self._action_usage_counts[action] += 1 - elif action == "create": - result = loop.run_until_complete(self._create_job(job_config)) - - # SINGLE EXECUTION TRACKING: Track successful job creation - if result and "Successfully created job" in result: - # Extract job_id from the result - import re - job_id_match = re.search(r'Job ID: (\d+)', result) - if job_id_match: - new_job_id = job_id_match.group(1) - config_hash = DatabricksJobsTool._deterministic_hash(job_config) if job_config else 'no_config' - execution_key = f"create_{config_hash}" - _GLOBAL_CREATE_EXECUTIONS[execution_key] = new_job_id - self.current_usage_count += 1 - self._action_usage_counts[action] += 1 - logger.info(f"[SINGLE_EXECUTION] Tracked successful creation: job_name={job_config.get('name', 'Unknown')}, job_id={new_job_id}, action_usage={self._action_usage_counts[action]}, total_tracked_creates={len(_GLOBAL_CREATE_EXECUTIONS)}") - - # Add execution tracking info to result - result += f"\n\nšŸ”’ EXECUTION TRACKING: This tool will prevent duplicate creation of jobs with this configuration." - action_limit = self._action_limits.get(action) - if action_limit is not None: - result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/{action_limit}" - else: - result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/unlimited" - else: - result = f"Error: Unknown action '{action}'" - finally: - loop.close() + + # Execute the requested action using helper function + if action == "list": + result = _run_async_in_sync_context(self._list_jobs(limit, name_filter)) + self._action_usage_counts[action] += 1 + elif action == "list_my_jobs": + result = _run_async_in_sync_context(self._list_my_jobs(limit, name_filter)) + self._action_usage_counts[action] += 1 + elif action == "get": + result = _run_async_in_sync_context(self._get_job(job_id)) + self._action_usage_counts[action] += 1 + elif action == "get_notebook": + result = _run_async_in_sync_context(self._get_notebook_content(job_id)) + self._action_usage_counts[action] += 1 + elif action == "run": + result = _run_async_in_sync_context(self._run_job(job_id, job_params)) + + # SINGLE EXECUTION TRACKING: Track successful run execution + if result and "Successfully triggered job" in result: + # Extract run_id from the result + import re + run_id_match = re.search(r'Run ID: (\d+)', result) + if run_id_match: + new_run_id = run_id_match.group(1) + param_hash = DatabricksJobsTool._deterministic_hash(job_params) if job_params else 'no_params' + execution_key = f"run_{job_id}_{param_hash}" + _GLOBAL_RUN_EXECUTIONS[execution_key] = new_run_id + self.current_usage_count += 1 + self._action_usage_counts[action] += 1 + logger.info(f"[SINGLE_EXECUTION] Tracked successful run: job_id={job_id}, run_id={new_run_id}, action_usage={self._action_usage_counts[action]}, total_tracked_runs={len(_GLOBAL_RUN_EXECUTIONS)}") + + # Add execution tracking info to result + result += f"\n\nšŸ”’ EXECUTION TRACKING: This tool will prevent duplicate runs of this job with these parameters." + action_limit = self._action_limits.get(action) + if action_limit is not None: + result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/{action_limit}" + else: + result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/unlimited" + + elif action == "monitor": + result = _run_async_in_sync_context(self._monitor_run(run_id)) + self._action_usage_counts[action] += 1 + elif action == "create": + result = _run_async_in_sync_context(self._create_job(job_config)) + + # SINGLE EXECUTION TRACKING: Track successful job creation + if result and "Successfully created job" in result: + # Extract job_id from the result + import re + job_id_match = re.search(r'Job ID: (\d+)', result) + if job_id_match: + new_job_id = job_id_match.group(1) + config_hash = DatabricksJobsTool._deterministic_hash(job_config) if job_config else 'no_config' + execution_key = f"create_{config_hash}" + _GLOBAL_CREATE_EXECUTIONS[execution_key] = new_job_id + self.current_usage_count += 1 + self._action_usage_counts[action] += 1 + logger.info(f"[SINGLE_EXECUTION] Tracked successful creation: job_name={job_config.get('name', 'Unknown')}, job_id={new_job_id}, action_usage={self._action_usage_counts[action]}, total_tracked_creates={len(_GLOBAL_CREATE_EXECUTIONS)}") + + # Add execution tracking info to result + result += f"\n\nšŸ”’ EXECUTION TRACKING: This tool will prevent duplicate creation of jobs with this configuration." + action_limit = self._action_limits.get(action) + if action_limit is not None: + result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/{action_limit}" + else: + result += f"\nšŸ“Š Action Usage: {action} {self._action_usage_counts[action]}/unlimited" + else: + result = f"Error: Unknown action '{action}'" total_time = time.time() - start_time logger.info(f"Action '{action}' completed in {total_time:.3f}s") diff --git a/src/backend/src/engines/crewai/tools/custom/powerbi_analysis_tool.py b/src/backend/src/engines/crewai/tools/custom/powerbi_analysis_tool.py new file mode 100644 index 00000000..9300ae9f --- /dev/null +++ b/src/backend/src/engines/crewai/tools/custom/powerbi_analysis_tool.py @@ -0,0 +1,871 @@ +import logging +import asyncio +import json +import time +import re +import os +from datetime import datetime +from typing import Any, Optional, Type +from concurrent.futures import ThreadPoolExecutor + +from crewai.tools import BaseTool +from pydantic import BaseModel, Field, PrivateAttr, model_validator + +logger = logging.getLogger(__name__) + +# Emergency debug logging to file (bypasses all logging config) +def _debug_log(msg: str): + """Write debug message directly to file, bypassing logger.""" + try: + debug_file = '/tmp/powerbi_tool_debug.log' + with open(debug_file, 'a') as f: + timestamp = datetime.now().isoformat() + f.write(f"[{timestamp}] {msg}\n") + f.flush() + except Exception: + pass # Silently fail if we can't write debug log + +# Thread pool executor for running async operations from sync context +_EXECUTOR = ThreadPoolExecutor(max_workers=5) + + +def _run_async_in_sync_context(coro): + """ + Safely run an async coroutine from a synchronous context. + + This handles the case where we're already in an event loop (e.g., FastAPI) + and need to execute async code from a sync function (e.g., CrewAI tool's _run method). + + Args: + coro: The coroutine to execute + + Returns: + The result of the coroutine execution + """ + try: + # Try to get the current running loop + loop = asyncio.get_running_loop() + # We're already in an async context, run in executor to avoid nested loop issues + logger.debug("Detected running event loop, using ThreadPoolExecutor") + future = _EXECUTOR.submit(asyncio.run, coro) + return future.result() + except RuntimeError: + # No event loop running, we can safely create one + logger.debug("No running event loop detected, creating new loop") + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + + +class PowerBIAnalysisToolSchema(BaseModel): + """Input schema for PowerBIAnalysisTool.""" + + dashboard_id: str = Field( + ..., description="Power BI dashboard/semantic model ID to analyze" + ) + questions: list = Field( + ..., description="Business questions to analyze using DAX" + ) + workspace_id: Optional[str] = Field( + None, description="Power BI workspace ID (uses default if not provided)" + ) + dax_statement: Optional[str] = Field( + None, description="Pre-generated DAX statement (optional, will be generated if not provided)" + ) + job_id: Optional[int] = Field( + None, description="Databricks job ID to execute (overrides configured job_id if provided)" + ) + additional_params: Optional[dict] = Field( + None, description=( + "Additional parameters for Power BI authentication and Databricks job execution. " + "Required fields: 'tenant_id' (Azure AD Tenant ID), 'client_id' (Azure AD Application ID). " + "Optional fields: 'auth_method' (default: 'service_principal'), 'sample_size', 'metadata', 'task_key'. " + "NOTE: Credentials auto-fetched from API Keys: POWERBI_CLIENT_SECRET, POWERBI_USERNAME, POWERBI_PASSWORD, DATABRICKS_TOKEN. " + "Databricks host auto-detected from environment. " + "Example: {'tenant_id': 'xxx', 'client_id': 'yyy', 'auth_method': 'service_principal'}" + ) + ) + + @model_validator(mode='after') + def validate_input(self) -> 'PowerBIAnalysisToolSchema': + """Validate the input parameters.""" + if not self.questions and not self.dax_statement: + raise ValueError("Either 'questions' or 'dax_statement' must be provided") + return self + + +class PowerBIAnalysisTool(BaseTool): + """ + A tool for complex Power BI analysis using Databricks job execution. + + This tool is designed for: + - Heavy computational analysis + - Long-running DAX queries + - Complex data transformations + - Result persistence to Databricks volumes + - Integration with other data sources + + + Architecture: + 1. Accepts business questions or DAX statements + 2. Triggers Databricks job with parameters + 3. Databricks notebook executes DAX against Power BI + 4. Results are processed and optionally stored + 5. Returns analysis results to agent + """ + + name: str = "Power BI Analysis (Databricks)" + description: str = ( + "Execute complex Power BI analysis using Databricks jobs. " + "Suitable for heavy computations, long-running queries, and advanced analytics. " + "\n\nREQUIRED PARAMETERS:\n" + "- 'job_id': Databricks job ID to execute (can be set in tool config as default)\n" + "- 'dashboard_id': Power BI semantic model ID to query\n" + "- 'questions': List of business questions to analyze\n" + "- 'additional_params': Dict with Power BI authentication:\n" + " - 'tenant_id': Azure AD Tenant ID (required)\n" + " - 'client_id': Azure AD Application ID (required)\n" + " - 'auth_method': Authentication method (default: 'service_principal')\n" + " - NOTE: Credentials auto-fetched from API Keys:\n" + " - POWERBI_CLIENT_SECRET (for service_principal auth)\n" + " - POWERBI_USERNAME (optional, for device_code auth)\n" + " - POWERBI_PASSWORD (optional, for device_code auth)\n" + " - DATABRICKS_TOKEN (for Databricks API access)\n" + "\n\nOPTIONAL PARAMETERS:\n" + "- 'workspace_id': Power BI workspace ID\n" + "- 'dax_statement': Pre-generated DAX query\n" + "- Additional params: 'databricks_host', 'databricks_token', 'sample_size', 'metadata', " + "'task_key' (default: 'pbi_e2e_pipeline' for multi-task jobs)\n" + "\n\nEXAMPLE:\n" + "job_id=365257288725339, dashboard_id='a17de62e-...', questions=['What is total NSR?'], " + "additional_params={'tenant_id': 'xxx-xxx', 'client_id': 'yyy-yyy', 'auth_method': 'service_principal'}" + ) + args_schema: Type[BaseModel] = PowerBIAnalysisToolSchema + + _group_id: Optional[str] = PrivateAttr(default=None) + _databricks_job_id: Optional[int] = PrivateAttr(default=None) + _tenant_id: Optional[str] = PrivateAttr(default=None) + _client_id: Optional[str] = PrivateAttr(default=None) + _workspace_id: Optional[str] = PrivateAttr(default=None) + _semantic_model_id: Optional[str] = PrivateAttr(default=None) + _auth_method: Optional[str] = PrivateAttr(default="service_principal") + + def __init__( + self, + group_id: Optional[str] = None, + databricks_job_id: Optional[int] = None, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + workspace_id: Optional[str] = None, + semantic_model_id: Optional[str] = None, + auth_method: Optional[str] = "service_principal", + **kwargs + ): + """ + Initialize PowerBIAnalysisTool. + + Args: + group_id: Group ID for multi-tenant support + databricks_job_id: Databricks job ID for Power BI analysis (if pre-configured) + tenant_id: Azure AD Tenant ID for Power BI authentication + client_id: Azure AD Application/Client ID for Power BI authentication + workspace_id: Default Power BI Workspace ID (optional, can be overridden per task) + semantic_model_id: Default Power BI Semantic Model ID (optional, can be overridden per task) + auth_method: Authentication method ("service_principal" or "device_code") + **kwargs: Additional keyword arguments for BaseTool + """ + super().__init__(**kwargs) + self._group_id = group_id + self._databricks_job_id = databricks_job_id + self._tenant_id = tenant_id + self._client_id = client_id + self._workspace_id = workspace_id + self._semantic_model_id = semantic_model_id + self._auth_method = auth_method + + # Clear debug log file at initialization + try: + debug_file = '/tmp/powerbi_tool_debug.log' + if os.path.exists(debug_file): + os.remove(debug_file) + except Exception: + pass + + _debug_log(f"PowerBIAnalysisTool initialized for group: {group_id or 'default'}, job_id: {databricks_job_id}") + logger.info(f"PowerBIAnalysisTool initialized for group: {group_id or 'default'}") + + def _run(self, **kwargs: Any) -> str: + """ + Execute a Power BI analysis action via Databricks job. + + Args: + job_id (Optional[int]): Databricks job ID to execute (overrides configured job_id) + dashboard_id (str): Power BI dashboard/semantic model ID + questions (list): Business questions to analyze + workspace_id (Optional[str]): Workspace ID + dax_statement (Optional[str]): Pre-generated DAX statement + additional_params (Optional[dict]): Additional parameters for the Databricks job + (e.g., auth_method, tenant_id, client_id, client_secret, sample_size, etc.) + + Returns: + str: Formatted analysis results + """ + _debug_log(f"PowerBI tool _run called with job_id={kwargs.get('job_id')}, dashboard_id={kwargs.get('dashboard_id')}") + logger.info(f"[POWERBI-TOOL] _run called with job_id={kwargs.get('job_id')}") + # Use helper function to safely run async code from sync context + result = _run_async_in_sync_context(self._execute_analysis(**kwargs)) + _debug_log(f"PowerBI tool _run returning result of type {type(result)}, length={len(str(result))}") + logger.info(f"[POWERBI-TOOL] _run returning result length={len(str(result))}") + return result + + async def _execute_analysis(self, **kwargs) -> str: + """ + Async implementation of analysis execution via Databricks. + + Args: + **kwargs: Analysis parameters including job_id, dashboard_id, questions, etc. + + Returns: + str: Formatted analysis results + """ + dashboard_id = kwargs.get('dashboard_id') + questions = kwargs.get('questions', []) + workspace_id = kwargs.get('workspace_id') + job_id = kwargs.get('job_id') # Get job_id from parameters + additional_params = kwargs.get('additional_params') # Get additional parameters + + _debug_log(f"_execute_analysis started: job_id={job_id}, dashboard_id={dashboard_id}, questions={questions}") + logger.info(f"[POWERBI-TOOL] _execute_analysis started") + + try: + # Import here to avoid circular dependency + from .databricks_jobs_tool import DatabricksJobsTool + + # Auto-detect Databricks configuration + databricks_host = None + databricks_token = None + tool_config = {} + + # 1. Try to get databricks_host from unified auth (auto-detect from environment) + try: + from src.utils.databricks_auth import get_auth_context + auth_context = await get_auth_context() + if auth_context and auth_context.workspace_url: + databricks_host = auth_context.workspace_url + logger.info(f"Auto-detected databricks_host from environment: {databricks_host}") + except Exception as e: + logger.debug(f"Could not auto-detect databricks_host from auth context: {e}") + + # 2. Override with additional_params if explicitly provided + if additional_params and 'databricks_host' in additional_params: + databricks_host = additional_params['databricks_host'] + logger.info(f"Using databricks_host from additional_params: {databricks_host}") + + # 3. Try to get databricks_token from API Keys first (secure) + try: + from src.services.api_keys_service import ApiKeysService + from src.db.session import async_session_factory + from src.utils.encryption_utils import EncryptionUtils + + async with async_session_factory() as session: + api_keys_service = ApiKeysService(session, group_id=self._group_id) + # Try both DATABRICKS_TOKEN and DATABRICKS_API_KEY + token_obj = await api_keys_service.find_by_name("DATABRICKS_TOKEN") + if not token_obj: + token_obj = await api_keys_service.find_by_name("DATABRICKS_API_KEY") + + if token_obj and token_obj.encrypted_value: + databricks_token = EncryptionUtils.decrypt_value(token_obj.encrypted_value) + logger.info("Retrieved databricks_token from API Keys") + except Exception as e: + logger.debug(f"Could not retrieve databricks_token from API Keys: {e}") + + # 4. Fall back to additional_params if not found in API Keys + if not databricks_token and additional_params and 'databricks_token' in additional_params: + databricks_token = additional_params['databricks_token'] + logger.info("Using databricks_token from additional_params") + + # 5. Build tool_config for DatabricksJobsTool + if databricks_token: + tool_config['DATABRICKS_API_KEY'] = databricks_token + + if databricks_host: + tool_config['DATABRICKS_HOST'] = databricks_host + + # Create DatabricksJobsTool instance with proper configuration + databricks_tool = DatabricksJobsTool( + databricks_host=databricks_host, + tool_config=tool_config, + token_required=False # We handle auth through tool_config + ) + + # Prepare job parameters with correct field names for Databricks job + # The job expects: + # - "question" (singular string, not "questions" array) + # - "semantic_model_id" (not "dashboard_id") + # - No "dax_statement" field + + # Convert questions array to single question string (take first question) + question_str = questions[0] if questions and len(questions) > 0 else "" + + # Determine semantic_model_id with priority: task config > kwargs > fallback + # PRIORITY 1: Use semantic_model_id from task config if available + effective_semantic_model_id = self._semantic_model_id if self._semantic_model_id else dashboard_id + + job_params = { + "question": question_str, # Singular, not plural + "semantic_model_id": effective_semantic_model_id, # Use prioritized value + "workspace_id": workspace_id, + # dax_statement is NOT sent to the job (internal to PowerBI tool) + } + + logger.info(f"Prepared job parameters with question: '{question_str[:50] if question_str else ''}...' and semantic_model_id: {effective_semantic_model_id}") + + # Build PowerBI configuration with precedence: tool config (from task) > additional_params (from LLM) + powerbi_config = {} + + # PRIORITY 1: Use tool initialization values (from task config) - HIGHEST PRIORITY + if self._tenant_id: + powerbi_config['tenant_id'] = self._tenant_id + if self._client_id: + powerbi_config['client_id'] = self._client_id + if self._auth_method: + powerbi_config['auth_method'] = self._auth_method + if self._workspace_id: + powerbi_config['workspace_id'] = self._workspace_id + + # PRIORITY 2: Fall back to additional_params only if not already set (from LLM - lower priority) + if additional_params: + if 'tenant_id' not in powerbi_config and 'tenant_id' in additional_params: + powerbi_config['tenant_id'] = additional_params['tenant_id'] + if 'client_id' not in powerbi_config and 'client_id' in additional_params: + powerbi_config['client_id'] = additional_params['client_id'] + if 'auth_method' not in powerbi_config and 'auth_method' in additional_params: + powerbi_config['auth_method'] = additional_params['auth_method'] + if 'workspace_id' not in powerbi_config and 'workspace_id' in additional_params: + powerbi_config['workspace_id'] = additional_params['workspace_id'] + + # Use workspace_id from kwargs if provided (override everything) + if workspace_id: + powerbi_config['workspace_id'] = workspace_id + + logger.info(f"PowerBI config (task-level overrides applied): {list(powerbi_config.keys())}") + + # Fetch PowerBI credentials from API Keys service (encrypted storage) + # These are sensitive and should never be stored in task config or passed in plain text + try: + from src.services.api_keys_service import ApiKeysService + from src.db.session import async_session_factory + from src.utils.encryption_utils import EncryptionUtils + + async with async_session_factory() as session: + # Use group_id for multi-tenant isolation + api_keys_service = ApiKeysService(session, group_id=self._group_id) + + # Fetch client_secret (required for service_principal auth) + client_secret_obj = await api_keys_service.find_by_name("POWERBI_CLIENT_SECRET") + if client_secret_obj and client_secret_obj.encrypted_value: + client_secret = EncryptionUtils.decrypt_value(client_secret_obj.encrypted_value) + powerbi_config['client_secret'] = client_secret + logger.info("Successfully retrieved POWERBI_CLIENT_SECRET from API Keys") + else: + logger.warning("POWERBI_CLIENT_SECRET not found in API Keys") + + # Fetch username (optional, for device_code or interactive auth) + username_obj = await api_keys_service.find_by_name("POWERBI_USERNAME") + if username_obj and username_obj.encrypted_value: + username = EncryptionUtils.decrypt_value(username_obj.encrypted_value) + powerbi_config['username'] = username + logger.info("Successfully retrieved POWERBI_USERNAME from API Keys") + + # Fetch password (optional, for device_code or interactive auth) + password_obj = await api_keys_service.find_by_name("POWERBI_PASSWORD") + if password_obj and password_obj.encrypted_value: + password = EncryptionUtils.decrypt_value(password_obj.encrypted_value) + powerbi_config['password'] = password + logger.info("Successfully retrieved POWERBI_PASSWORD from API Keys") + + except Exception as e: + logger.error(f"Error retrieving PowerBI credentials from API Keys: {e}") + # Continue without credentials - they might be provided via additional_params + + # Merge additional parameters (these will be passed to the Databricks notebook/job) + if additional_params: + # Create a copy to avoid modifying the original + job_additional_params = additional_params.copy() + + # Remove auth params that we already extracted (they're in powerbi_config or tool_config) + # This avoids duplication in job_params + for key in ['tenant_id', 'client_id', 'auth_method', 'client_secret', 'username', 'password', 'databricks_host', 'databricks_token']: + job_additional_params.pop(key, None) + + job_params.update(job_additional_params) + logger.info(f"Added {len(job_additional_params)} additional parameters to job_params") + + # Merge PowerBI config into job_params + job_params.update(powerbi_config) + + # Add Databricks configuration to job_params (needed by the notebook) + if databricks_host: + job_params['databricks_host'] = databricks_host + logger.info(f"Added databricks_host to job_params: {databricks_host}") + + if databricks_token: + job_params['databricks_token'] = databricks_token + logger.info("Added databricks_token to job_params (value hidden for security)") + + # Determine which job_id to use: parameter takes precedence over configured value + effective_job_id = job_id if job_id is not None else self._databricks_job_id + + # If job_id is available (from parameter or configuration), run it; otherwise, return instructions + if effective_job_id: + logger.info(f"Running Databricks job {effective_job_id} for Power BI analysis") + logger.info(f"Job parameters: {list(job_params.keys())}") + logger.info(f"Databricks host configured: {databricks_host}") + logger.info(f"Databricks token configured: {'Yes' if databricks_token else 'No'}") + + # Trigger job run + logger.info(f"šŸš€ Triggering Databricks job {effective_job_id} with {len(job_params)} parameters") + run_result = databricks_tool._run( + action="run", + job_id=effective_job_id, + job_params=job_params + ) + + logger.info(f"šŸ“ Job trigger result: {run_result[:200]}...") + + # Parse run_id from result + # Expected format: "āœ… Job run started successfully\nRun ID: 12345\n..." + run_id = self._extract_run_id(run_result) + logger.info(f"šŸ“ Extracted run_id: {run_id}") + + if run_id: + # Get the task_key from additional_params if provided, otherwise use default + task_key = additional_params.get('task_key', 'pbi_e2e_pipeline') if additional_params else 'pbi_e2e_pipeline' + _debug_log(f"Monitoring Databricks job run {run_id}, task: {task_key}") + logger.info(f"[POWERBI-TOOL] Monitoring Databricks job run {run_id}, task: {task_key}") + + # Poll for completion + max_wait = 300 # 5 minutes + poll_interval = 5 # 5 seconds + elapsed = 0 + + while elapsed < max_wait: + # Check task status directly (for multi-task jobs) + task_status = await self._check_task_status(databricks_tool, run_id, task_key) + + logger.warning(f"ā±ļø Task '{task_key}' status: {task_status} (elapsed: {elapsed}s/{max_wait}s)") + + if task_status == "SUCCESS": + # Task completed successfully - extract the notebook output + _debug_log(f"Task '{task_key}' SUCCESS - extracting output from run_id={run_id}") + logger.info(f"[POWERBI-TOOL] šŸŽÆ Task '{task_key}' completed successfully (run_id: {run_id}), extracting notebook output...") + + try: + # Get the notebook output by calling the Databricks API directly + _debug_log(f"Calling _get_notebook_output with run_id={run_id}, task_key={task_key}") + logger.info(f"[POWERBI-TOOL] Calling _get_notebook_output with run_id={run_id}, task_key={task_key}") + result_data = await self._get_notebook_output(databricks_tool, run_id, task_key) + _debug_log(f"_get_notebook_output returned: {type(result_data)} - has_data={bool(result_data)}") + logger.info(f"[POWERBI-TOOL] _get_notebook_output returned: {type(result_data)} - {bool(result_data)}") + + if result_data: + rows_count = result_data.get('rows_returned', 0) + _debug_log(f"Successfully extracted {rows_count} rows from task output") + logger.info(f"[POWERBI-TOOL] āœ… Successfully extracted {rows_count} rows from task output") + formatted_result = self._format_analysis_result( + dashboard_id, + question_str, + result_data + ) + _debug_log(f"Formatted result length: {len(formatted_result)} chars") + logger.info(f"[POWERBI-TOOL] Formatted result length: {len(formatted_result)} chars") + logger.info(f"[POWERBI-TOOL] Result preview: {formatted_result[:500]}") + _debug_log(f"Returning formatted result: {formatted_result[:200]}...") + return formatted_result + else: + # Fallback to basic success message if we can't extract data + # Return detailed debug info to help diagnose the issue + logger.error(f"[POWERBI-TOOL] āŒ result_data is None/empty") + logger.error(f"[POWERBI-TOOL] Failed to extract result data from task '{task_key}' (run_id: {run_id})") + + # Include debug information in the response + debug_info = f"āœ… Task '{task_key}' completed successfully but could not extract detailed results.\n\n" + debug_info += f"**Debug Information:**\n" + debug_info += f"- Run ID: {run_id}\n" + debug_info += f"- Task Key: {task_key}\n" + debug_info += f"- Extraction returned: None\n\n" + + # Include extraction debug steps if available + if hasattr(self, '_extraction_debug') and self._extraction_debug: + debug_info += f"**Extraction Steps:**\n" + for step in self._extraction_debug: + debug_info += f"- {step}\n" + debug_info += "\n" + + debug_info += f"**Troubleshooting:**\n" + debug_info += f"1. Check backend logs/console for '[POWERBI-TOOL]' messages\n" + debug_info += f"2. The notebook should exit with: 'Notebook exited: {{...}}' containing result_data\n" + debug_info += f"3. Verify the task '{task_key}' exists in the multi-task job\n" + + return debug_info + except Exception as e: + logger.error(f"[POWERBI-TOOL] āŒ EXCEPTION during result extraction: {str(e)}", exc_info=True) + return f"āœ… Task '{task_key}' completed successfully but extraction failed: {str(e)}\n\nRun ID: {run_id}" + elif task_status in ["FAILED", "CANCELED", "TIMEDOUT"]: + logger.error(f"Task '{task_key}' failed with status: {task_status}") + return f"āŒ Analysis Failed\n\nTask '{task_key}' status: {task_status}\nRun ID: {run_id}" + elif task_status in ["RUNNING", "PENDING", "BLOCKED"]: + # Still running, wait and retry + logger.info(f"Task '{task_key}' still {task_status}, waiting {poll_interval}s...") + time.sleep(poll_interval) + elapsed += poll_interval + else: + logger.warning(f"Unknown task status: {task_status}") + time.sleep(poll_interval) + elapsed += poll_interval + + logger.error(f"Task '{task_key}' did not complete within {max_wait} seconds") + return f"ā±ļø Analysis Timeout\n\nTask '{task_key}' did not complete within {max_wait} seconds.\nRun ID: {run_id}\nLast status: {task_status}" + else: + return f"āŒ Failed to extract run ID from result:\n{run_result}" + + else: + # No job configured - return instructions for setup + return self._format_setup_instructions(dashboard_id, question_str, job_params) + + except Exception as e: + logger.error(f"Error executing Power BI analysis: {e}", exc_info=True) + return f"āŒ Error executing analysis: {str(e)}" + + def _extract_run_id(self, result: str) -> Optional[int]: + """Extract run ID from Databricks job result.""" + try: + # Look for "Run ID: 12345" pattern + match = re.search(r'Run ID:\s*(\d+)', result) + if match: + return int(match.group(1)) + except Exception as e: + logger.error(f"Error extracting run ID: {e}") + return None + + async def _check_task_status(self, databricks_tool, run_id: int, task_key: str = "pbi_e2e_pipeline") -> str: + """ + Check the status of a specific task in a Databricks job run. + + Args: + databricks_tool: DatabricksJobsTool instance + run_id: The run ID to check + task_key: The task key/name to check status for + + Returns: + Task status string: SUCCESS, FAILED, RUNNING, PENDING, etc. + """ + try: + # Get run details + run_details_endpoint = f"/api/2.1/jobs/runs/get?run_id={run_id}" + run_details = await databricks_tool._make_api_call("GET", run_details_endpoint) + + # Check if this is a multi-task job + tasks = run_details.get('tasks', []) + + if tasks: + # Multi-task job - find the specific task + target_task = None + for task in tasks: + if task.get('task_key') == task_key: + target_task = task + break + + if target_task: + # Get the task's state + state = target_task.get('state', {}) + life_cycle_state = state.get('life_cycle_state', 'UNKNOWN') + result_state = state.get('result_state', '') + + # If task has completed, return the result_state (SUCCESS, FAILED, etc.) + if life_cycle_state in ['TERMINATED', 'INTERNAL_ERROR']: + return result_state if result_state else 'FAILED' + else: + # Task is still running/pending + return life_cycle_state + else: + logger.warning(f"Task '{task_key}' not found in run {run_id}") + # Return the main run status as fallback + state = run_details.get('state', {}) + life_cycle_state = state.get('life_cycle_state', 'UNKNOWN') + result_state = state.get('result_state', '') + return result_state if result_state else life_cycle_state + else: + # Single-task job - get the main run status + state = run_details.get('state', {}) + life_cycle_state = state.get('life_cycle_state', 'UNKNOWN') + result_state = state.get('result_state', '') + + # If run has completed, return the result_state + if life_cycle_state in ['TERMINATED', 'INTERNAL_ERROR']: + return result_state if result_state else 'FAILED' + else: + return life_cycle_state + + except Exception as e: + logger.error(f"Error checking task status: {e}", exc_info=True) + return "ERROR" + + def _format_success_result(self, result: str, semantic_model_id: str, question: str) -> str: + """Format successful analysis result.""" + output = f"āœ… Power BI Analysis Complete\n\n" + output += f"šŸ“Š Semantic Model: {semantic_model_id}\n" + output += f"ā“ Question Analyzed: {question}\n" + output += f"\n{result}\n" + return output + + async def _get_notebook_output(self, databricks_tool, run_id: int, task_key: str = "pbi_e2e_pipeline") -> Optional[dict]: + """ + Extract notebook output from a completed Databricks job run. + + For multi-task jobs, extracts output from the specified task. + + Args: + databricks_tool: DatabricksJobsTool instance + run_id: The run ID to get output from + task_key: The task key/name to get output from (default: "pbi_e2e_pipeline") + + Returns: + Parsed notebook output as dict, or None if extraction fails + """ + # Track extraction steps for debugging + self._extraction_debug = [] + + try: + _debug_log(f"_get_notebook_output: run_id={run_id}, task_key={task_key}") + self._extraction_debug.append(f"Starting extraction for run_id={run_id}, task_key={task_key}") + # For multi-task jobs, we need to get the run details first to find the task + logger.info(f"[POWERBI-TOOL] šŸ” Getting run details for run {run_id}, looking for task '{task_key}'") + + # First, get the run details to see if it's a multi-task job + run_details_endpoint = f"/api/2.1/jobs/runs/get?run_id={run_id}" + _debug_log(f"Making API call to: {run_details_endpoint}") + logger.info(f"[POWERBI-TOOL] Making API call to: {run_details_endpoint}") + self._extraction_debug.append(f"API call: {run_details_endpoint}") + run_details = await databricks_tool._make_api_call("GET", run_details_endpoint) + + _debug_log(f"Got run details with {len(run_details)} keys") + logger.info(f"[POWERBI-TOOL] šŸ“‹ Got run details with keys: {list(run_details.keys())}") + self._extraction_debug.append(f"Run details keys: {list(run_details.keys())}") + + # Check if this is a multi-task job + tasks = run_details.get('tasks', []) + _debug_log(f"Found {len(tasks)} tasks in run") + logger.info(f"[POWERBI-TOOL] šŸ”¢ Found {len(tasks)} tasks in run") + self._extraction_debug.append(f"Found {len(tasks)} tasks") + + if tasks: + # Multi-task job - find the specific task + task_keys = [t.get('task_key') for t in tasks] + logger.info(f"[POWERBI-TOOL] šŸ”Ž Multi-task job detected. Available tasks: {task_keys}") + self._extraction_debug.append(f"Available tasks: {task_keys}") + + target_task = None + for task in tasks: + if task.get('task_key') == task_key: + target_task = task + break + + if target_task: + # Get the task's run_id + task_run_id = target_task.get('run_id') + _debug_log(f"Found task '{task_key}' with run_id={task_run_id}") + logger.info(f"[POWERBI-TOOL] āœ… Found task '{task_key}' with run_id {task_run_id}") + self._extraction_debug.append(f"Found task '{task_key}' with run_id={task_run_id}") + + # Get the output for this specific task run + task_output_endpoint = f"/api/2.1/jobs/runs/get-output?run_id={task_run_id}" + _debug_log(f"Fetching task output from: {task_output_endpoint}") + logger.info(f"[POWERBI-TOOL] šŸ“„ Fetching task output from: {task_output_endpoint}") + self._extraction_debug.append(f"Fetching output: {task_output_endpoint}") + task_response = await databricks_tool._make_api_call("GET", task_output_endpoint) + + _debug_log(f"Task response has {len(task_response)} keys") + logger.info(f"[POWERBI-TOOL] šŸ“¦ Task response keys: {list(task_response.keys())}") + self._extraction_debug.append(f"Response keys: {list(task_response.keys())}") + notebook_output = task_response.get('notebook_output', {}) + _debug_log(f"Notebook output has {len(notebook_output)} keys") + logger.info(f"[POWERBI-TOOL] šŸ““ Notebook output keys: {list(notebook_output.keys())}") + self._extraction_debug.append(f"Notebook output keys: {list(notebook_output.keys())}") + result_text = notebook_output.get('result', '') + _debug_log(f"Result text length: {len(result_text)} chars, preview: {result_text[:100]}") + logger.info(f"[POWERBI-TOOL] šŸ“ Result text length: {len(result_text)} chars") + self._extraction_debug.append(f"Result text length: {len(result_text)} chars") + else: + logger.warning(f"Task '{task_key}' not found. Available tasks: {[t.get('task_key') for t in tasks]}") + # Fall back to getting output from the main run (might work for some jobs) + output_endpoint = f"/api/2.1/jobs/runs/get-output?run_id={run_id}" + response = await databricks_tool._make_api_call("GET", output_endpoint) + notebook_output = response.get('notebook_output', {}) + result_text = notebook_output.get('result', '') + else: + # Single-task job - get output directly + logger.info(f"Single-task job detected, getting output directly") + output_endpoint = f"/api/2.1/jobs/runs/get-output?run_id={run_id}" + response = await databricks_tool._make_api_call("GET", output_endpoint) + notebook_output = response.get('notebook_output', {}) + result_text = notebook_output.get('result', '') + + if not result_text: + logger.error(f"[POWERBI-TOOL] āŒ No notebook output result found in response") + self._extraction_debug.append("ERROR: No result_text found") + return None + + _debug_log(f"Result text preview: {result_text[:200]}") + logger.info(f"[POWERBI-TOOL] šŸ“„ Notebook output result (first 200 chars): {result_text[:200]}") + self._extraction_debug.append(f"Result preview: {result_text[:100]}...") + + # Try two parsing strategies: + # 1. Look for "Notebook exited: {...}" pattern (older format) + # 2. Parse result_text directly as JSON (current Databricks format from dbutils.notebook.exit) + + json_str = None + match = re.search(r'Notebook exited:\s*({.+})', result_text, re.DOTALL) + + if match: + _debug_log("Found 'Notebook exited:' pattern in output") + logger.info(f"[POWERBI-TOOL] šŸŽÆ Found 'Notebook exited:' pattern in output") + self._extraction_debug.append("Pattern matched: 'Notebook exited:'") + json_str = match.group(1) + else: + # Try parsing result_text directly as JSON (Databricks dbutils.notebook.exit format) + _debug_log("No 'Notebook exited:' pattern, trying direct JSON parse") + logger.info(f"[POWERBI-TOOL] No 'Notebook exited:' pattern found, trying direct JSON parse") + self._extraction_debug.append("No 'Notebook exited:' pattern - attempting direct JSON parse") + json_str = result_text.strip() + + if json_str: + _debug_log(f"Attempting to parse JSON (length: {len(json_str)} chars)") + logger.info(f"[POWERBI-TOOL] Found JSON in notebook output (length: {len(json_str)} chars)") + self._extraction_debug.append(f"JSON length: {len(json_str)} chars") + + try: + parsed_output = json.loads(json_str) + _debug_log(f"Successfully parsed JSON, keys: {list(parsed_output.keys())}") + logger.info(f"[POWERBI-TOOL] āœ… Successfully parsed notebook output JSON") + logger.info(f"[POWERBI-TOOL] šŸ“Š Parsed output keys: {list(parsed_output.keys())}") + self._extraction_debug.append(f"JSON parsed successfully, keys: {list(parsed_output.keys())}") + + # Extract the actual result data from pipeline_steps.step_3_execution.result_data + pipeline_steps = parsed_output.get('pipeline_steps', {}) + _debug_log(f"Pipeline steps: {list(pipeline_steps.keys())}") + logger.info(f"[POWERBI-TOOL] šŸ”§ Pipeline steps available: {list(pipeline_steps.keys())}") + self._extraction_debug.append(f"Pipeline steps: {list(pipeline_steps.keys())}") + + step_3 = pipeline_steps.get('step_3_execution', {}) + _debug_log(f"Step 3 keys: {list(step_3.keys())}") + logger.info(f"[POWERBI-TOOL] šŸŽÆ Step 3 (execution) keys: {list(step_3.keys())}") + self._extraction_debug.append(f"Step 3 keys: {list(step_3.keys())}") + + result_data = step_3.get('result_data', []) + + if result_data: + _debug_log(f"SUCCESS: Extracted {len(result_data)} result rows") + logger.info(f"[POWERBI-TOOL] šŸŽ‰ Successfully extracted {len(result_data)} result rows") + self._extraction_debug.append(f"SUCCESS: Extracted {len(result_data)} rows") + + # Build return data + return_data = { + 'status': parsed_output.get('status'), + 'execution_time': parsed_output.get('execution_time'), + 'generated_dax': pipeline_steps.get('step_2_dax_generation', {}).get('generated_dax'), + 'rows_returned': step_3.get('rows_returned', 0), + 'columns': step_3.get('columns', []), + 'result_data': result_data + } + _debug_log(f"Returning data with {len(str(return_data))} chars") + return return_data + else: + logger.error(f"[POWERBI-TOOL] āŒ No result_data found in step_3_execution") + logger.error(f"[POWERBI-TOOL] step_3_execution content: {json.dumps(step_3, indent=2)[:500]}") + self._extraction_debug.append("ERROR: result_data is empty/missing in step_3_execution") + self._extraction_debug.append(f"step_3 content: {json.dumps(step_3, indent=2)[:200]}") + return None + except json.JSONDecodeError as e: + logger.error(f"[POWERBI-TOOL] āŒ Failed to parse JSON: {e}") + logger.error(f"[POWERBI-TOOL] JSON string (first 500 chars): {json_str[:500]}") + self._extraction_debug.append(f"ERROR: JSON parse failed - {str(e)}") + return None + else: + logger.error(f"[POWERBI-TOOL] āŒ No JSON string to parse") + logger.error(f"[POWERBI-TOOL] Result text (first 500 chars): {result_text[:500]}") + self._extraction_debug.append("ERROR: No JSON string extracted from result") + return None + + except Exception as e: + logger.error(f"[POWERBI-TOOL] āŒ EXCEPTION in _get_notebook_output: {str(e)}", exc_info=True) + self._extraction_debug.append(f"EXCEPTION: {str(e)}") + return None + + def _format_analysis_result(self, semantic_model_id: str, question: str, result_data: dict) -> str: + """ + Format the extracted analysis results in a nice, readable format. + + Args: + semantic_model_id: The Power BI semantic model ID + question: The business question that was analyzed + result_data: Extracted result data from notebook + + Returns: + Formatted result string + """ + output = f"āœ… Power BI Analysis Complete\n\n" + output += f"šŸ“Š **Semantic Model**: {semantic_model_id}\n" + output += f"ā“ **Question**: {question}\n\n" + + # Show execution info + status = result_data.get('status', 'unknown') + execution_time = result_data.get('execution_time', 'unknown') + output += f"ā±ļø **Execution Time**: {execution_time}\n" + output += f"✨ **Status**: {status}\n\n" + + # Show the generated DAX query + generated_dax = result_data.get('generated_dax') + if generated_dax: + output += f"šŸ“ **Generated DAX Query**:\n```dax\n{generated_dax}\n```\n\n" + + # Show results summary + rows_returned = result_data.get('rows_returned', 0) + columns = result_data.get('columns', []) + output += f"šŸ“ˆ **Results Summary**:\n" + output += f"- Rows returned: {rows_returned}\n" + output += f"- Columns: {', '.join(columns)}\n\n" + + # Show the actual data + data_rows = result_data.get('result_data', []) + if data_rows: + output += f"šŸ“Š **Result Data**:\n\n" + + # Format as a table + output += f"Showing the complete list of data (total: {len(data_rows)}):\n\n" + output += "```json\n" + output += json.dumps(data_rows, indent=2) + output += "\n```\n" + else: + output += "āš ļø No result data returned\n" + + return output + + def _format_setup_instructions(self, semantic_model_id: str, question: str, job_params: dict) -> str: + """Format setup instructions when no Databricks job is configured.""" + output = f"āš™ļø Power BI Analysis Setup Required\n\n" + output += f"To execute Power BI analysis via Databricks, you need to:\n\n" + output += f"1. **Create a Databricks job** with the Power BI analysis notebook\n" + output += f"2. **Configure the job ID** in this tool\n\n" + output += f"**Analysis Parameters:**\n" + output += f"- Semantic Model: {semantic_model_id}\n" + output += f"- Question: {question}\n\n" + output += f"**Job Parameters (JSON):**\n" + output += f"```json\n{json.dumps(job_params, indent=2)}\n```\n\n" + output += f"**Notebook Location:**\n" + output += f"`scripts/dax_analysis_job.py` (from your ask.md guide)\n\n" + return output diff --git a/src/backend/src/engines/crewai/tools/tool_factory.py b/src/backend/src/engines/crewai/tools/tool_factory.py index b93d9c82..a7fb208c 100644 --- a/src/backend/src/engines/crewai/tools/tool_factory.py +++ b/src/backend/src/engines/crewai/tools/tool_factory.py @@ -50,6 +50,15 @@ DatabricksKnowledgeSearchTool = None logging.warning("Could not import DatabricksKnowledgeSearchTool") +try: + from .custom.powerbi_analysis_tool import PowerBIAnalysisTool +except ImportError: + try: + from .custom.powerbi_analysis_tool import PowerBIAnalysisTool + except ImportError: + PowerBIAnalysisTool = None + logging.warning("Could not import PowerBIAnalysisTool") + # MCPTool - Import from mcp_adapter try: from src.engines.common.mcp_adapter import MCPTool @@ -93,6 +102,7 @@ def __init__(self, config, api_keys_service=None, user_token=None): "GenieTool": GenieTool, "DatabricksJobsTool": DatabricksJobsTool, "DatabricksKnowledgeSearchTool": DatabricksKnowledgeSearchTool, + "PowerBIAnalysisTool": PowerBIAnalysisTool, } # Add MCPTool if it was successfully imported @@ -1111,6 +1121,59 @@ async def get_databricks_config(): tool = DatabricksKnowledgeSearchTool(**tool_args) return tool + elif tool_name == "PowerBIAnalysisTool": + # Create PowerBIAnalysisTool with group_id and PowerBI configuration + group_id = None + databricks_job_id = None + tenant_id = None + client_id = None + workspace_id = None + semantic_model_id = None + auth_method = "service_principal" + + try: + if isinstance(self.config, dict): + group_id = self.config.get("group_id") + + # Extract PowerBI config from tool_config (merged base + override) + if tool_config and isinstance(tool_config, dict): + databricks_job_id = tool_config.get("databricks_job_id") + tenant_id = tool_config.get("tenant_id") + client_id = tool_config.get("client_id") + workspace_id = tool_config.get("workspace_id") + semantic_model_id = tool_config.get("semantic_model_id") + auth_method = tool_config.get("auth_method", "service_principal") + + # Allow tool_config_override to override specific fields + if isinstance(tool_config_override, dict): + if "databricks_job_id" in tool_config_override: + databricks_job_id = tool_config_override["databricks_job_id"] + if "tenant_id" in tool_config_override: + tenant_id = tool_config_override["tenant_id"] + if "client_id" in tool_config_override: + client_id = tool_config_override["client_id"] + if "workspace_id" in tool_config_override: + workspace_id = tool_config_override["workspace_id"] + if "semantic_model_id" in tool_config_override: + semantic_model_id = tool_config_override["semantic_model_id"] + if "auth_method" in tool_config_override: + auth_method = tool_config_override["auth_method"] + except Exception as e: + logger.error(f"Error extracting PowerBI config: {e}") + group_id = None + databricks_job_id = None + + logger.info(f"Creating PowerBIAnalysisTool with group_id: {group_id}, databricks_job_id: {databricks_job_id}, tenant_id: {'***' if tenant_id else None}, client_id: {'***' if client_id else None}") + return tool_class( + group_id=group_id or "default", + databricks_job_id=databricks_job_id, + tenant_id=tenant_id, + client_id=client_id, + workspace_id=workspace_id, + semantic_model_id=semantic_model_id, + auth_method=auth_method + ) + elif tool_name == "MCPTool": # MCPTool might need special configuration # Check if MCPTool exists and can be created diff --git a/src/backend/src/models/powerbi_config.py b/src/backend/src/models/powerbi_config.py new file mode 100644 index 00000000..949429a4 --- /dev/null +++ b/src/backend/src/models/powerbi_config.py @@ -0,0 +1,39 @@ +from datetime import datetime, timezone +from sqlalchemy import Column, Integer, String, Boolean, DateTime + +from src.db.base import Base + + +class PowerBIConfig(Base): + """ + PowerBIConfig model for Power BI integration settings with multi-tenant support. + Stores connection details for Power BI Semantic Model (Dataset) access. + """ + + id = Column(Integer, primary_key=True) + + # Power BI connection details + tenant_id = Column(String, nullable=False) # Azure AD Tenant ID + client_id = Column(String, nullable=False) # Service Principal Application ID + encrypted_client_secret = Column(String, nullable=True) # Encrypted SPN secret + workspace_id = Column(String, nullable=True) # Power BI Workspace ID (optional) + semantic_model_id = Column(String, nullable=True) # Default semantic model/dataset ID (optional) + + # Service account credentials (alternative auth method) + encrypted_username = Column(String, nullable=True) # Encrypted username (e.g., sa_datamesh_powerbi@domain.com) + encrypted_password = Column(String, nullable=True) # Encrypted password + + # Authentication method + auth_method = Column(String, default='username_password', nullable=False) # 'username_password' or 'device_code' + + # Configuration flags + is_active = Column(Boolean, default=True) # Track the currently active configuration + is_enabled = Column(Boolean, default=True) # Enable/disable Power BI integration + + # Multi-tenant fields + group_id = Column(String(100), index=True, nullable=True) # Group isolation + created_by_email = Column(String(255), index=True, nullable=True) # Creator email for audit + + # Timestamps + created_at = Column(DateTime(timezone=True), default=datetime.now(timezone.utc)) + updated_at = Column(DateTime(timezone=True), default=datetime.now(timezone.utc), onupdate=datetime.now(timezone.utc)) diff --git a/src/backend/src/repositories/powerbi_config_repository.py b/src/backend/src/repositories/powerbi_config_repository.py new file mode 100644 index 00000000..8d0b6618 --- /dev/null +++ b/src/backend/src/repositories/powerbi_config_repository.py @@ -0,0 +1,99 @@ +from typing import Optional +import logging +from datetime import datetime, timezone + +from sqlalchemy import select, update +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.base_repository import BaseRepository +from src.models.powerbi_config import PowerBIConfig + +# Set up logger +logger = logging.getLogger(__name__) + + +class PowerBIConfigRepository(BaseRepository[PowerBIConfig]): + """ + Repository for PowerBIConfig model with custom query methods. + Inherits base CRUD operations from BaseRepository. + """ + + def __init__(self, session: AsyncSession): + """ + Initialize the repository with session. + + Args: + session: SQLAlchemy async session + """ + super().__init__(PowerBIConfig, session) + + async def get_active_config(self, group_id: Optional[str] = None) -> Optional[PowerBIConfig]: + """ + Get the currently active Power BI configuration for the specified group. + If multiple active configurations exist, returns the most recently updated one. + + Args: + group_id: Optional group ID to filter by + + Returns: + Active configuration if found, else None + """ + query = select(self.model).where(self.model.is_active == True) + if group_id is not None: + query = query.where(self.model.group_id == group_id) + + # Order by updated_at descending to get the most recent one + query = query.order_by(self.model.updated_at.desc()) + + result = await self.session.execute(query) + return result.scalars().first() + + async def deactivate_all(self, group_id: Optional[str] = None) -> None: + """ + Deactivate all existing Power BI configurations for the specified group. + + Args: + group_id: Optional group ID to filter by + + Returns: + None + """ + query = ( + update(self.model) + .where(self.model.is_active == True) + .values(is_active=False, updated_at=datetime.now(timezone.utc)) + ) + if group_id is not None: + query = query.where(self.model.group_id == group_id) + await self.session.execute(query) + await self.session.commit() # Make sure the changes are committed + + async def create_config(self, config_data: dict) -> PowerBIConfig: + """ + Create a new Power BI configuration. + + Args: + config_data: Configuration data dictionary + + Returns: + The created configuration + """ + if config_data is None: + raise TypeError("config_data cannot be None") + + # First deactivate any existing active configurations for this group + group_id = config_data.get('group_id') + await self.deactivate_all(group_id=group_id) + + # Map 'enabled' from API schema to 'is_enabled' in database model + db_data = config_data.copy() + if 'enabled' in db_data: + db_data['is_enabled'] = db_data.pop('enabled') + + # Create the new configuration + db_config = PowerBIConfig(**db_data) + self.session.add(db_config) + await self.session.flush() + await self.session.commit() # Make sure the changes are committed + + return db_config diff --git a/src/backend/src/schemas/powerbi_config.py b/src/backend/src/schemas/powerbi_config.py new file mode 100644 index 00000000..b9f4b3cf --- /dev/null +++ b/src/backend/src/schemas/powerbi_config.py @@ -0,0 +1,106 @@ +from datetime import datetime +from typing import Optional, List +from pydantic import BaseModel, Field, model_validator + + +class PowerBIConfigBase(BaseModel): + """Base schema for Power BI configuration.""" + tenant_id: str = "" + client_id: str = "" + workspace_id: Optional[str] = None + semantic_model_id: Optional[str] = None + enabled: bool = True + auth_method: str = "username_password" # "username_password" or "device_code" + + +class PowerBIConfigCreate(PowerBIConfigBase): + """Schema for creating Power BI configuration.""" + + @property + def required_fields(self) -> List[str]: + """Get list of required fields based on configuration""" + if self.enabled: + return ["tenant_id", "client_id"] + return [] + + @model_validator(mode='after') + def validate_required_fields(self): + """Validate required fields based on configuration.""" + # Only validate if Power BI is enabled + if not self.enabled: + return self + + # Check required fields + required_fields = ["tenant_id", "client_id"] + empty_fields = [] + + for field in required_fields: + value = getattr(self, field, "") + if not value: + empty_fields.append(field) + + if empty_fields: + raise ValueError(f"Invalid configuration: {', '.join(empty_fields)} must be non-empty when Power BI is enabled") + + return self + + +class PowerBIConfigUpdate(PowerBIConfigBase): + """Schema for updating Power BI configuration.""" + tenant_id: Optional[str] = None + client_id: Optional[str] = None + workspace_id: Optional[str] = None + semantic_model_id: Optional[str] = None + enabled: Optional[bool] = None + + +class PowerBIConfigInDB(PowerBIConfigBase): + """Base schema for Power BI configuration in the database.""" + id: int + is_active: bool + created_at: datetime + updated_at: datetime + + model_config = { + "from_attributes": True, + "populate_by_name": True + } + + +class PowerBIConfigResponse(PowerBIConfigBase): + """Schema for Power BI configuration response.""" + pass + + +class DAXQueryRequest(BaseModel): + """Schema for DAX query execution request.""" + dax_query: str = Field(..., description="DAX query to execute against the semantic model") + semantic_model_id: Optional[str] = Field(None, description="Semantic model ID (uses default if not provided)") + workspace_id: Optional[str] = Field(None, description="Workspace ID (uses default if not provided)") + + +class DAXQueryResponse(BaseModel): + """Schema for DAX query execution response.""" + status: str = Field(..., description="Execution status: 'success' or 'error'") + data: Optional[List[dict]] = Field(None, description="Query results as list of dictionaries") + row_count: int = Field(0, description="Number of rows returned") + columns: Optional[List[str]] = Field(None, description="Column names in the result set") + error: Optional[str] = Field(None, description="Error message if execution failed") + execution_time_ms: Optional[int] = Field(None, description="Query execution time in milliseconds") + + +class DAXAnalysisRequest(BaseModel): + """Schema for DAX analysis request with questions.""" + dashboard_id: str = Field(..., description="Power BI dashboard/semantic model ID") + questions: List[str] = Field(..., description="Business questions to analyze") + workspace_id: Optional[str] = Field(None, description="Workspace ID (uses default if not provided)") + + +class DAXAnalysisResponse(BaseModel): + """Schema for DAX analysis response.""" + status: str = Field(..., description="Analysis status: 'success' or 'error'") + dashboard_id: str = Field(..., description="Dashboard/semantic model analyzed") + questions: List[str] = Field(..., description="Questions that were analyzed") + dax_statement: Optional[str] = Field(None, description="Generated DAX statement") + results: Optional[dict] = Field(None, description="Analysis results and insights") + error: Optional[str] = Field(None, description="Error message if analysis failed") diff --git a/src/backend/src/seeds/tools.py b/src/backend/src/seeds/tools.py index 7e0dc1bb..9443f1ed 100644 --- a/src/backend/src/seeds/tools.py +++ b/src/backend/src/seeds/tools.py @@ -24,6 +24,7 @@ (36, "DatabricksKnowledgeSearchTool", "A powerful knowledge search tool that enables semantic search across documents uploaded to Databricks Vector Search. It provides RAG (Retrieval-Augmented Generation) capabilities by searching through indexed documents based on vector similarity. This tool allows agents to access and retrieve relevant information from uploaded knowledge files including PDFs, Word documents, text files, and other document formats. Essential for building context-aware AI applications with access to custom knowledge bases.", "search"), (69, "MCPTool", "An advanced adapter for Model Context Protocol (MCP) servers that enables access to thousands of specialized tools from the MCP ecosystem. This tool establishes and manages connections with MCP servers through SSE (Server-Sent Events), providing seamless integration with community-built tool collections. Perfect for extending agent capabilities with domain-specific tools without requiring custom development or direct integration work.", "integration"), (70, "DatabricksJobsTool", "A comprehensive Databricks Jobs management tool using direct REST API calls for optimal performance. IMPORTANT WORKFLOW: Always use 'get_notebook' action FIRST to analyze job notebooks and understand required parameters before running any job with custom parameters. This ensures proper parameter construction and prevents job failures. Available actions: (1) 'list' - List all jobs in workspace with optional name/ID filtering, (2) 'list_my_jobs' - List only jobs created by current user, (3) 'get' - Get detailed job configuration and recent run history, (4) 'get_notebook' - Analyze notebook content to understand parameters, widgets, and logic (REQUIRED before running jobs with parameters), (5) 'run' - Trigger job execution with custom parameters (use dict for notebook/SQL tasks, list for Python tasks), (6) 'monitor' - Track real-time execution status and task progress, (7) 'create' - Create new jobs with custom configurations. The tool provides intelligent parameter analysis, suggesting proper parameter structures based on notebook patterns (search jobs, ETL jobs, etc.). Supports OAuth/OBO authentication, PAT tokens, and Databricks CLI profiles. All operations use direct REST API calls avoiding SDK overhead for faster execution. Essential for automating data pipelines, orchestrating workflows, and integrating Databricks jobs into AI agent systems.", "database"), + (71, "PowerBIAnalysisTool", "Execute complex Power BI analysis via Databricks jobs for heavy computational workloads. This tool wraps DAX queries in Databricks job execution, enabling large-scale data processing, multi-query analysis, and resource-intensive computations. Perfect for year-over-year analysis, trend detection, comprehensive reporting, and complex business intelligence tasks that require significant compute resources. Integrates with DatabricksJobsTool for job orchestration and monitoring. IMPORTANT: To enable this tool, you MUST configure the following API Keys in Settings → API Keys: POWERBI_CLIENT_SECRET, POWERBI_USERNAME, POWERBI_PASSWORD, and DATABRICKS_API_KEY (or DATABRICKS_TOKEN).", "database"), ] def get_tool_configs(): @@ -81,7 +82,16 @@ def get_tool_configs(): "70": { "result_as_answer": False, "DATABRICKS_HOST": "", # Databricks workspace URL (e.g., "e2-demo-field-eng.cloud.databricks.com") - } # DatabricksJobsTool + }, # DatabricksJobsTool + "71": { + "result_as_answer": False, + "databricks_job_id": None, # Required: Databricks job ID for Power BI analysis + "tenant_id": "", # Azure AD Tenant ID (required) + "client_id": "", # Azure AD Application/Client ID (required) + "workspace_id": "", # Default Power BI Workspace ID (optional, can be overridden per task) + "semantic_model_id": "", # Default Power BI Semantic Model ID (optional, can be overridden per task) + "auth_method": "service_principal" # Authentication method: "service_principal" or "device_code" + } # PowerBIAnalysisTool } async def seed_async(): @@ -99,7 +109,7 @@ async def seed_async(): tools_error = 0 # List of tool IDs that should be enabled - enabled_tool_ids = [6, 16, 26, 31, 35, 36, 69, 70] + enabled_tool_ids = [6, 16, 26, 31, 35, 36, 67, 69, 70, 71] for tool_id, title, description, icon in tools_data: try: @@ -162,7 +172,7 @@ def seed_sync(): tools_error = 0 # List of tool IDs that should be enabled - enabled_tool_ids = [6, 16, 26, 31, 35, 36, 69, 70] + enabled_tool_ids = [6, 16, 26, 31, 35, 36, 67, 69, 70, 71] for tool_id, title, description, icon in tools_data: try: diff --git a/src/backend/src/services/powerbi_service.py b/src/backend/src/services/powerbi_service.py new file mode 100644 index 00000000..e55b1791 --- /dev/null +++ b/src/backend/src/services/powerbi_service.py @@ -0,0 +1,303 @@ +import logging +import os +import time +from typing import Dict, List, Optional +import requests +from azure.identity import UsernamePasswordCredential, DeviceCodeCredential +from fastapi import HTTPException + +from src.repositories.powerbi_config_repository import PowerBIConfigRepository +from src.schemas.powerbi_config import DAXQueryRequest, DAXQueryResponse + +# Set up logger +logger = logging.getLogger(__name__) + + +class PowerBIService: + """Service for Power BI DAX operations.""" + + def __init__(self, session, group_id: Optional[str] = None): + self.session = session + self.repository = PowerBIConfigRepository(session) + self.group_id = group_id + self._secrets_service = None # Lazy load to avoid circular deps + + @property + def secrets_service(self): + """Lazy load secrets_service to avoid circular dependency.""" + if self._secrets_service is None: + from src.services.api_keys_service import ApiKeysService + self._secrets_service = ApiKeysService(self.session) + return self._secrets_service + + async def execute_dax_query(self, query_request: DAXQueryRequest) -> DAXQueryResponse: + """ + Execute DAX query against Power BI semantic model. + + Args: + query_request: DAX query request with query and optional semantic model ID + + Returns: + DAXQueryResponse with results or error information + """ + start_time = time.time() + + try: + # Get active Power BI configuration + config = await self.repository.get_active_config(group_id=self.group_id) + if not config: + raise HTTPException( + status_code=404, + detail="No active Power BI configuration found. Please configure Power BI connection first." + ) + + if not config.is_enabled: + raise HTTPException( + status_code=400, + detail="Power BI integration is disabled. Please enable it in settings." + ) + + # Use provided semantic model ID or default from config + semantic_model_id = query_request.semantic_model_id or config.semantic_model_id + if not semantic_model_id: + raise HTTPException( + status_code=400, + detail="Semantic model ID is required. Provide it in the request or configure a default." + ) + + # Generate authentication token + token = await self._generate_token(config) + + # Execute DAX query + results = await self._execute_query( + token=token, + semantic_model_id=semantic_model_id, + dax_query=query_request.dax_query + ) + + # Process results + data = self._postprocess_data(results) + + execution_time_ms = int((time.time() - start_time) * 1000) + + return DAXQueryResponse( + status="success", + data=data, + row_count=len(data), + columns=list(data[0].keys()) if data else [], + execution_time_ms=execution_time_ms + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error executing DAX query: {e}", exc_info=True) + execution_time_ms = int((time.time() - start_time) * 1000) + return DAXQueryResponse( + status="error", + data=None, + row_count=0, + columns=None, + error=str(e), + execution_time_ms=execution_time_ms + ) + + async def _generate_token(self, config) -> str: + """ + Generate authentication token for Power BI API. + + Supports two authentication methods: + 1. device_code: Interactive browser/device code flow (recommended for testing/personal workspaces) + 2. username_password: Username/password flow (requires credentials) + + Args: + config: PowerBIConfig model instance + + Returns: + Authentication token string + """ + try: + tenant_id = config.tenant_id + client_id = config.client_id + + # Get authentication method from config (default to username_password for backward compatibility) + auth_method = getattr(config, 'auth_method', 'username_password') + + if auth_method == 'device_code': + # Device Code Flow - Interactive authentication + logger.info("Using Device Code Flow for authentication") + return await self._generate_token_device_code(tenant_id, client_id) + else: + # Username/Password Flow - Requires stored credentials + logger.info("Using Username/Password Flow for authentication") + return await self._generate_token_username_password(tenant_id, client_id, config) + + except Exception as e: + logger.error(f"Error generating Power BI token: {e}", exc_info=True) + raise HTTPException( + status_code=401, + detail=f"Failed to authenticate with Power BI: {str(e)}" + ) + + async def _generate_token_device_code(self, tenant_id: str, client_id: str) -> str: + """ + Generate token using device code flow (interactive authentication). + User will be prompted to visit microsoft.com/devicelogin with a code. + + Args: + tenant_id: Azure AD tenant ID + client_id: Application (client) ID + + Returns: + Authentication token string + """ + try: + credential = DeviceCodeCredential( + client_id=client_id, + tenant_id=tenant_id, + ) + + logger.info("Device Code authentication initiated - user should follow authentication prompt") + + # Get token for Power BI API + token = credential.get_token("https://analysis.windows.net/powerbi/api/.default") + logger.info("Device Code authentication successful") + return token.token + + except Exception as e: + logger.error(f"Device Code authentication failed: {e}") + raise + + async def _generate_token_username_password(self, tenant_id: str, client_id: str, config) -> str: + """ + Generate token using username/password flow. + Requires POWERBI_USERNAME and POWERBI_PASSWORD from API Keys Service or environment. + + Args: + tenant_id: Azure AD tenant ID + client_id: Application (client) ID + config: PowerBIConfig model instance + + Returns: + Authentication token string + """ + try: + # Attempt to get credentials from different sources + # Priority: API Keys Service > Environment Variables + username = None + password = None + client_secret = None + + # Try to get from API Keys Service + if self._secrets_service: + try: + username = await self.secrets_service.get_api_key("POWERBI_USERNAME") + password = await self.secrets_service.get_api_key("POWERBI_PASSWORD") + client_secret = await self.secrets_service.get_api_key("POWERBI_CLIENT_SECRET") + except Exception as e: + logger.warning(f"Could not get Power BI credentials from API Keys Service: {e}") + + # Fallback to environment variables + if not username: + username = os.getenv("POWERBI_USERNAME") or os.getenv("SADATAMESHPOWERBIUSERNAME") + if not password: + password = os.getenv("POWERBI_PASSWORD") or os.getenv("SADATAMESHPOWERBIPASSWORD") + if not client_secret: + client_secret = os.getenv("POWERBI_CLIENT_SECRET") + + # Validate credentials + if not all([username, password, client_id]): + raise ValueError( + "Missing required credentials. Please provide username, password, and client_id " + "through API Keys Service or environment variables." + ) + + logger.info(f"Authenticating with username length: {len(username)}, password length: {len(password)}") + + # Create credential and get token + credential = UsernamePasswordCredential( + client_id=client_id, + username=username, + password=password, + tenant_id=tenant_id, + client_secret=client_secret if client_secret else None, + ) + + # Token generation for Power BI API + token = credential.get_token("https://analysis.windows.net/powerbi/api/.default") + logger.info("Username/Password authentication successful") + return token.token + + except Exception as e: + logger.error(f"Username/Password authentication failed: {e}") + raise + + async def _execute_query(self, token: str, semantic_model_id: str, dax_query: str) -> List: + """ + Execute DAX query against Power BI API. + + Args: + token: Authentication token + semantic_model_id: Power BI semantic model (dataset) ID + dax_query: DAX query string + + Returns: + Raw query results from Power BI API + """ + try: + datasets_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{semantic_model_id}/executeQueries" + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {token}", + } + + body = {"queries": [{"query": dax_query}]} + + logger.info(f"Executing DAX query against semantic model: {semantic_model_id}") + logger.debug(f"Query: {dax_query[:200]}...") # Log first 200 chars + + response = requests.post(datasets_url, headers=headers, json=body, timeout=30) + + if response.status_code != 200: + error_msg = f"Power BI API error (status {response.status_code}): {response.text}" + logger.error(error_msg) + raise HTTPException(status_code=response.status_code, detail=error_msg) + + logger.info(f"Successfully fetched response with status: {response.status_code}") + + results = response.json().get("results", []) + return results + + except requests.exceptions.RequestException as e: + logger.error(f"Request error when calling Power BI API: {e}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"Failed to execute DAX query: {str(e)}" + ) + + def _postprocess_data(self, results: List) -> List[Dict]: + """ + Post-process DAX query results into a list of dictionaries. + + Args: + results: Raw results from Power BI API + + Returns: + List of dictionaries representing rows + """ + if not results: + logger.info("No results found in the response.") + return [] + + tables = results[0].get("tables", []) + if not tables: + logger.info("No tables found in the response.") + return [] + + rows = tables[0].get("rows", []) + if not rows: + logger.info("No rows found in the response.") + return [] + + return rows diff --git a/src/backend/tests/unit/repositories/test_powerbi_config_repository.py b/src/backend/tests/unit/repositories/test_powerbi_config_repository.py new file mode 100644 index 00000000..0aaa450b --- /dev/null +++ b/src/backend/tests/unit/repositories/test_powerbi_config_repository.py @@ -0,0 +1,272 @@ +""" +Unit tests for PowerBIConfigRepository. + +Tests the functionality of Power BI configuration repository including +active configuration management, deactivation operations, and configuration creation. +""" +import pytest +from unittest.mock import AsyncMock, MagicMock +from datetime import datetime, timezone + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select + +from src.repositories.powerbi_config_repository import PowerBIConfigRepository +from src.models.powerbi_config import PowerBIConfig + + +# Mock Power BI config model +class MockPowerBIConfig: + def __init__(self, id=1, tenant_id="test-tenant", client_id="test-client", + workspace_id="test-workspace", semantic_model_id="test-model", + is_active=True, is_enabled=True, group_id=None, + created_at=None, updated_at=None, **kwargs): + self.id = id + self.tenant_id = tenant_id + self.client_id = client_id + self.workspace_id = workspace_id + self.semantic_model_id = semantic_model_id + self.is_active = is_active + self.is_enabled = is_enabled + self.group_id = group_id + self.created_at = created_at or datetime.now(timezone.utc) + self.updated_at = updated_at or datetime.now(timezone.utc) + for key, value in kwargs.items(): + setattr(self, key, value) + + +@pytest.fixture +def mock_async_session(): + """Create a mock async database session.""" + session = AsyncMock(spec=AsyncSession) + session.execute = AsyncMock() + session.add = MagicMock() # add() is synchronous in SQLAlchemy + session.flush = AsyncMock() + session.commit = AsyncMock() + session.rollback = AsyncMock() + return session + + +@pytest.fixture +def powerbi_config_repository(mock_async_session): + """Create a Power BI config repository with async session.""" + return PowerBIConfigRepository(session=mock_async_session) + + +@pytest.fixture +def sample_powerbi_configs(): + """Create sample Power BI configurations for testing.""" + return [ + MockPowerBIConfig(id=1, tenant_id="active-tenant", is_active=True, group_id="group1"), + MockPowerBIConfig(id=2, tenant_id="inactive-tenant", is_active=False, group_id="group1"), + MockPowerBIConfig(id=3, tenant_id="other-group", is_active=True, group_id="group2") + ] + + +@pytest.fixture +def sample_config_data(): + """Create sample config data for creation.""" + return { + "tenant_id": "new-tenant", + "client_id": "new-client", + "workspace_id": "new-workspace", + "semantic_model_id": "new-model", + "is_active": True, + "is_enabled": True, + "group_id": "group1" + } + + +class TestPowerBIConfigRepositoryInit: + """Test cases for PowerBIConfigRepository initialization.""" + + def test_init_success(self, mock_async_session): + """Test successful initialization.""" + repository = PowerBIConfigRepository(session=mock_async_session) + + assert repository.model == PowerBIConfig + assert repository.session == mock_async_session + + +class TestPowerBIConfigRepositoryGetActiveConfig: + """Test cases for get_active_config method.""" + + @pytest.mark.asyncio + async def test_get_active_config_success(self, powerbi_config_repository, mock_async_session, sample_powerbi_configs): + """Test successful retrieval of active configuration.""" + active_config = sample_powerbi_configs[0] # is_active=True, group1 + + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.first.return_value = active_config + mock_result.scalars.return_value = mock_scalars + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.get_active_config(group_id="group1") + + assert result == active_config + mock_async_session.execute.assert_called_once() + + @pytest.mark.asyncio + async def test_get_active_config_no_group_filter(self, powerbi_config_repository, mock_async_session, sample_powerbi_configs): + """Test get active config without group filter.""" + active_config = sample_powerbi_configs[0] + + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.first.return_value = active_config + mock_result.scalars.return_value = mock_scalars + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.get_active_config() + + assert result == active_config + mock_async_session.execute.assert_called_once() + + @pytest.mark.asyncio + async def test_get_active_config_none_found(self, powerbi_config_repository, mock_async_session): + """Test get active config when no active configuration exists.""" + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.first.return_value = None + mock_result.scalars.return_value = mock_scalars + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.get_active_config(group_id="nonexistent") + + assert result is None + mock_async_session.execute.assert_called_once() + + @pytest.mark.asyncio + async def test_get_active_config_database_error(self, powerbi_config_repository, mock_async_session): + """Test get active config handles database errors.""" + mock_async_session.execute.side_effect = Exception("Database connection error") + + with pytest.raises(Exception, match="Database connection error"): + await powerbi_config_repository.get_active_config() + + +class TestPowerBIConfigRepositoryDeactivateAll: + """Test cases for deactivate_all method.""" + + @pytest.mark.asyncio + async def test_deactivate_all_success(self, powerbi_config_repository, mock_async_session): + """Test successful deactivation of all configs for a group.""" + mock_result = MagicMock() + mock_async_session.execute.return_value = mock_result + + await powerbi_config_repository.deactivate_all(group_id="group1") + + # Should be called twice: once for update, once for commit + assert mock_async_session.execute.call_count == 1 + mock_async_session.commit.assert_called_once() + + @pytest.mark.asyncio + async def test_deactivate_all_no_group_filter(self, powerbi_config_repository, mock_async_session): + """Test deactivate all without group filter.""" + mock_result = MagicMock() + mock_async_session.execute.return_value = mock_result + + await powerbi_config_repository.deactivate_all() + + assert mock_async_session.execute.call_count == 1 + mock_async_session.commit.assert_called_once() + + @pytest.mark.asyncio + async def test_deactivate_all_database_error(self, powerbi_config_repository, mock_async_session): + """Test deactivate all handles database errors.""" + mock_async_session.execute.side_effect = Exception("Database error") + + with pytest.raises(Exception, match="Database error"): + await powerbi_config_repository.deactivate_all(group_id="group1") + + +class TestPowerBIConfigRepositoryCreateConfig: + """Test cases for create_config method.""" + + @pytest.mark.asyncio + async def test_create_config_success(self, powerbi_config_repository, mock_async_session, sample_config_data): + """Test successful configuration creation.""" + # Mock deactivate_all + mock_result = MagicMock() + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.create_config(sample_config_data) + + # Verify deactivate_all was called + assert mock_async_session.execute.call_count >= 1 + + # Verify config was added to session + mock_async_session.add.assert_called_once() + + # Verify flush and commit were called + mock_async_session.flush.assert_called_once() + assert mock_async_session.commit.call_count >= 1 + + # Verify returned config has expected attributes + assert isinstance(result, PowerBIConfig) + + @pytest.mark.asyncio + async def test_create_config_with_group_id(self, powerbi_config_repository, mock_async_session): + """Test config creation with group_id.""" + config_data = { + "tenant_id": "test-tenant", + "client_id": "test-client", + "group_id": "test-group" + } + + mock_result = MagicMock() + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.create_config(config_data) + + mock_async_session.add.assert_called_once() + mock_async_session.flush.assert_called_once() + + @pytest.mark.asyncio + async def test_create_config_none_data_error(self, powerbi_config_repository): + """Test create config with None data raises error.""" + with pytest.raises(TypeError, match="config_data cannot be None"): + await powerbi_config_repository.create_config(None) + + @pytest.mark.asyncio + async def test_create_config_database_error(self, powerbi_config_repository, mock_async_session, sample_config_data): + """Test create config handles database errors.""" + mock_async_session.add.side_effect = Exception("Database error") + + with pytest.raises(Exception, match="Database error"): + await powerbi_config_repository.create_config(sample_config_data) + + +class TestPowerBIConfigRepositoryMultiTenancy: + """Test cases for multi-tenant functionality.""" + + @pytest.mark.asyncio + async def test_get_active_config_different_groups(self, powerbi_config_repository, mock_async_session): + """Test that get_active_config properly filters by group.""" + # Mock two different configs for different groups + group1_config = MockPowerBIConfig(id=1, group_id="group1", is_active=True) + group2_config = MockPowerBIConfig(id=2, group_id="group2", is_active=True) + + mock_result = MagicMock() + mock_scalars = MagicMock() + mock_scalars.first.return_value = group1_config + mock_result.scalars.return_value = mock_scalars + mock_async_session.execute.return_value = mock_result + + result = await powerbi_config_repository.get_active_config(group_id="group1") + + assert result == group1_config + assert result.group_id == "group1" + + @pytest.mark.asyncio + async def test_deactivate_only_affects_specified_group(self, powerbi_config_repository, mock_async_session): + """Test that deactivate_all only affects the specified group.""" + mock_result = MagicMock() + mock_async_session.execute.return_value = mock_result + + await powerbi_config_repository.deactivate_all(group_id="group1") + + # Verify execute was called with a query + assert mock_async_session.execute.call_count == 1 + mock_async_session.commit.assert_called_once() diff --git a/src/backend/tests/unit/router/test_powerbi_router.py b/src/backend/tests/unit/router/test_powerbi_router.py new file mode 100644 index 00000000..70375420 --- /dev/null +++ b/src/backend/tests/unit/router/test_powerbi_router.py @@ -0,0 +1,380 @@ +""" +Unit tests for PowerBIRouter. + +Tests the functionality of Power BI integration endpoints including +configuration management, DAX query execution, and status checks. +""" +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from datetime import datetime + +from fastapi import HTTPException +from fastapi.testclient import TestClient +from sqlalchemy.ext.asyncio import AsyncSession + +from src.schemas.powerbi_config import PowerBIConfigCreate, DAXQueryRequest, DAXQueryResponse + + +# Mock Power BI config response +class MockPowerBIConfigResponse: + def __init__(self, tenant_id="test-tenant", client_id="test-client", + workspace_id="test-workspace", semantic_model_id="test-model", + enabled=True): + self.tenant_id = tenant_id + self.client_id = client_id + self.workspace_id = workspace_id + self.semantic_model_id = semantic_model_id + self.enabled = enabled + self.created_at = datetime.utcnow() + self.updated_at = datetime.utcnow() + + def model_dump(self): + """Mock model_dump for Pydantic compatibility.""" + return { + "tenant_id": self.tenant_id, + "client_id": self.client_id, + "workspace_id": self.workspace_id, + "semantic_model_id": self.semantic_model_id, + "enabled": self.enabled, + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat() + } + + +@pytest.fixture +def mock_powerbi_service(): + """Create a mock Power BI service.""" + service = AsyncMock() + return service + + +@pytest.fixture +def mock_db_session(): + """Create a mock database session.""" + return AsyncMock(spec=AsyncSession) + + +@pytest.fixture +def mock_group_context(): + """Create a mock group context.""" + context = MagicMock() + context.primary_group_id = "test-group" + context.group_email = "test@example.com" + return context + + +@pytest.fixture +def app(mock_powerbi_service, mock_db_session, mock_group_context): + """Create a FastAPI app with mocked dependencies.""" + from fastapi import FastAPI + from src.api.powerbi_router import router, get_powerbi_service + from src.core.dependencies import get_db, get_group_context + + app = FastAPI() + app.include_router(router) + + # Override dependencies + app.dependency_overrides[get_db] = lambda: mock_db_session + app.dependency_overrides[get_powerbi_service] = lambda session=None, group_context=None: mock_powerbi_service + app.dependency_overrides[get_group_context] = lambda: mock_group_context + + return app + + +@pytest.fixture +def client(app): + """Create a test client.""" + return TestClient(app) + + +@pytest.fixture +def mock_workspace_admin(): + """Mock workspace admin check.""" + def mock_is_admin(context): + return True + return mock_is_admin + + +@pytest.fixture +def mock_non_admin(): + """Mock non-admin check.""" + def mock_is_admin(context): + return False + return mock_is_admin + + +class TestPowerBIRouterConfigEndpoints: + """Test cases for Power BI configuration endpoints.""" + + def test_set_powerbi_config_success(self, client, mock_powerbi_service): + """Test successful Power BI configuration setting.""" + with patch('src.api.powerbi_router.is_workspace_admin', return_value=True): + config_data = { + "tenant_id": "test-tenant", + "client_id": "test-client", + "workspace_id": "test-workspace", + "semantic_model_id": "test-model", + "enabled": True + } + + # Mock repository response + mock_config = MagicMock() + mock_config.tenant_id = config_data["tenant_id"] + mock_config.client_id = config_data["client_id"] + mock_config.workspace_id = config_data["workspace_id"] + mock_config.semantic_model_id = config_data["semantic_model_id"] + mock_config.is_enabled = config_data["enabled"] + mock_config.is_active = True + + mock_powerbi_service.repository.create_config.return_value = mock_config + + response = client.post("/powerbi/config", json=config_data) + + assert response.status_code == 200 + data = response.json() + assert data["message"] == "Power BI configuration saved successfully" + assert "config" in data + + def test_set_powerbi_config_not_admin(self, client, mock_powerbi_service): + """Test Power BI configuration setting by non-admin.""" + with patch('src.api.powerbi_router.is_workspace_admin', return_value=False): + config_data = { + "tenant_id": "test-tenant", + "client_id": "test-client", + "enabled": True + } + + response = client.post("/powerbi/config", json=config_data) + + assert response.status_code == 403 + assert "admin" in response.json()["detail"].lower() + + def test_set_powerbi_config_error(self, client, mock_powerbi_service): + """Test Power BI configuration setting with service error.""" + with patch('src.api.powerbi_router.is_workspace_admin', return_value=True): + config_data = { + "tenant_id": "test-tenant", + "client_id": "test-client", + "enabled": True + } + + mock_powerbi_service.repository.create_config.side_effect = Exception("Database error") + + response = client.post("/powerbi/config", json=config_data) + + assert response.status_code == 500 + assert "error" in response.json()["detail"].lower() + + def test_get_powerbi_config_success(self, client, mock_powerbi_service): + """Test successful Power BI configuration retrieval.""" + mock_config = MagicMock() + mock_config.tenant_id = "test-tenant" + mock_config.client_id = "test-client" + mock_config.workspace_id = "test-workspace" + mock_config.semantic_model_id = "test-model" + mock_config.is_enabled = True + + mock_powerbi_service.repository.get_active_config.return_value = mock_config + + response = client.get("/powerbi/config") + + assert response.status_code == 200 + data = response.json() + assert data["tenant_id"] == "test-tenant" + assert data["client_id"] == "test-client" + assert data["enabled"] is True + + def test_get_powerbi_config_not_found(self, client, mock_powerbi_service): + """Test Power BI configuration retrieval when not configured.""" + mock_powerbi_service.repository.get_active_config.return_value = None + + response = client.get("/powerbi/config") + + assert response.status_code == 200 + data = response.json() + assert data["tenant_id"] == "" + assert data["enabled"] is False + + def test_get_powerbi_config_error(self, client, mock_powerbi_service): + """Test Power BI configuration retrieval with service error.""" + mock_powerbi_service.repository.get_active_config.side_effect = Exception("Database error") + + response = client.get("/powerbi/config") + + assert response.status_code == 500 + + +class TestPowerBIRouterQueryEndpoint: + """Test cases for DAX query execution endpoint.""" + + def test_execute_dax_query_success(self, client, mock_powerbi_service): + """Test successful DAX query execution.""" + query_request = { + "dax_query": "EVALUATE 'Sales'", + "semantic_model_id": "test-model" + } + + mock_response = DAXQueryResponse( + status="success", + data=[{"Region": "East", "Total": 1000}], + row_count=1, + columns=["Region", "Total"], + execution_time_ms=250 + ) + + mock_powerbi_service.execute_dax_query.return_value = mock_response + + response = client.post("/powerbi/query", json=query_request) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "success" + assert data["row_count"] == 1 + assert len(data["data"]) == 1 + + def test_execute_dax_query_missing_query(self, client): + """Test DAX query execution with missing query.""" + query_request = { + "semantic_model_id": "test-model" + } + + response = client.post("/powerbi/query", json=query_request) + + assert response.status_code == 422 # Validation error + + def test_execute_dax_query_service_error(self, client, mock_powerbi_service): + """Test DAX query execution with service error.""" + query_request = { + "dax_query": "EVALUATE 'Sales'", + "semantic_model_id": "test-model" + } + + mock_powerbi_service.execute_dax_query.side_effect = Exception("Query execution failed") + + response = client.post("/powerbi/query", json=query_request) + + assert response.status_code == 500 + assert "error" in response.json()["detail"].lower() + + def test_execute_dax_query_http_exception(self, client, mock_powerbi_service): + """Test DAX query execution with HTTP exception.""" + query_request = { + "dax_query": "EVALUATE 'Sales'", + "semantic_model_id": "test-model" + } + + mock_powerbi_service.execute_dax_query.side_effect = HTTPException( + status_code=400, + detail="Invalid DAX query" + ) + + response = client.post("/powerbi/query", json=query_request) + + assert response.status_code == 400 + assert "Invalid DAX query" in response.json()["detail"] + + +class TestPowerBIRouterStatusEndpoint: + """Test cases for Power BI status endpoint.""" + + def test_check_powerbi_status_configured(self, client, mock_powerbi_service): + """Test status check when Power BI is configured.""" + mock_config = MagicMock() + mock_config.is_enabled = True + mock_config.workspace_id = "test-workspace" + mock_config.semantic_model_id = "test-model" + + mock_powerbi_service.repository.get_active_config.return_value = mock_config + + response = client.get("/powerbi/status") + + assert response.status_code == 200 + data = response.json() + assert data["configured"] is True + assert data["enabled"] is True + assert "ready" in data["message"].lower() + + def test_check_powerbi_status_not_configured(self, client, mock_powerbi_service): + """Test status check when Power BI is not configured.""" + mock_powerbi_service.repository.get_active_config.return_value = None + + response = client.get("/powerbi/status") + + assert response.status_code == 200 + data = response.json() + assert data["configured"] is False + assert data["enabled"] is False + + def test_check_powerbi_status_disabled(self, client, mock_powerbi_service): + """Test status check when Power BI is configured but disabled.""" + mock_config = MagicMock() + mock_config.is_enabled = False + mock_config.workspace_id = "test-workspace" + mock_config.semantic_model_id = "test-model" + + mock_powerbi_service.repository.get_active_config.return_value = mock_config + + response = client.get("/powerbi/status") + + assert response.status_code == 200 + data = response.json() + assert data["configured"] is True + assert data["enabled"] is False + assert "disabled" in data["message"].lower() + + def test_check_powerbi_status_error(self, client, mock_powerbi_service): + """Test status check with service error.""" + mock_powerbi_service.repository.get_active_config.side_effect = Exception("Database error") + + response = client.get("/powerbi/status") + + assert response.status_code == 500 + + +class TestPowerBIRouterMultiTenancy: + """Test cases for multi-tenant functionality.""" + + def test_config_uses_group_context(self, client, mock_powerbi_service, mock_group_context): + """Test that configuration uses group context.""" + with patch('src.api.powerbi_router.is_workspace_admin', return_value=True): + config_data = { + "tenant_id": "test-tenant", + "client_id": "test-client", + "enabled": True + } + + mock_config = MagicMock() + mock_powerbi_service.repository.create_config.return_value = mock_config + + response = client.post("/powerbi/config", json=config_data) + + assert response.status_code == 200 + + # Verify create_config was called with group_id + call_args = mock_powerbi_service.repository.create_config.call_args[0][0] + assert call_args["group_id"] == "test-group" + assert call_args["created_by_email"] == "test@example.com" + + def test_query_uses_group_context(self, client, mock_powerbi_service): + """Test that queries use group context.""" + query_request = { + "dax_query": "EVALUATE 'Sales'", + "semantic_model_id": "test-model" + } + + mock_response = DAXQueryResponse( + status="success", + data=[], + row_count=0, + columns=[], + execution_time_ms=100 + ) + + mock_powerbi_service.execute_dax_query.return_value = mock_response + + response = client.post("/powerbi/query", json=query_request) + + assert response.status_code == 200 + + # Verify service was called (group_id is set during service initialization) + mock_powerbi_service.execute_dax_query.assert_called_once() diff --git a/src/backend/tests/unit/services/test_powerbi_service.py b/src/backend/tests/unit/services/test_powerbi_service.py new file mode 100644 index 00000000..b80e9e44 --- /dev/null +++ b/src/backend/tests/unit/services/test_powerbi_service.py @@ -0,0 +1,359 @@ +""" +Unit tests for PowerBIService. + +Tests the core functionality of Power BI integration operations including +DAX query execution, authentication, and result processing. +""" +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from datetime import datetime + +from fastapi import HTTPException + +from src.services.powerbi_service import PowerBIService +from src.schemas.powerbi_config import DAXQueryRequest, DAXQueryResponse + + +# Mock models +class MockPowerBIConfig: + def __init__(self, id=1, tenant_id="test-tenant", client_id="test-client", + semantic_model_id="test-model", workspace_id="test-workspace", + is_enabled=True, is_active=True, + created_at=None, updated_at=None): + self.id = id + self.tenant_id = tenant_id + self.client_id = client_id + self.semantic_model_id = semantic_model_id + self.workspace_id = workspace_id + self.is_enabled = is_enabled + self.is_active = is_active + self.created_at = created_at or datetime.utcnow() + self.updated_at = updated_at or datetime.utcnow() + + +@pytest.fixture +def mock_session(): + """Create a mock database session.""" + return AsyncMock() + + +@pytest.fixture +def mock_repository(): + """Create a mock PowerBIConfigRepository.""" + return AsyncMock() + + +@pytest.fixture +def powerbi_service(mock_session, mock_repository): + """Create a PowerBIService instance with mocks.""" + with patch('src.services.powerbi_service.PowerBIConfigRepository') as MockRepo: + MockRepo.return_value = mock_repository + service = PowerBIService(mock_session, group_id="test-group") + service.repository = mock_repository + return service + + +@pytest.fixture +def mock_powerbi_config(): + """Create a mock Power BI config.""" + return MockPowerBIConfig() + + +@pytest.fixture +def valid_dax_query_request(): + """Create a valid DAX query request.""" + return DAXQueryRequest( + dax_query="EVALUATE 'Sales'", + semantic_model_id="test-model", + workspace_id="test-workspace" + ) + + +@pytest.fixture +def mock_power_bi_api_response(): + """Create a mock Power BI API response.""" + return { + "results": [{ + "tables": [{ + "rows": [ + {"Region": "East", "Total": 1000}, + {"Region": "West", "Total": 2000} + ] + }] + }] + } + + +class TestPowerBIServiceInitialization: + """Test cases for PowerBIService initialization.""" + + def test_powerbi_service_initialization(self, powerbi_service, mock_session, mock_repository): + """Test PowerBIService initialization.""" + assert powerbi_service.session == mock_session + assert powerbi_service.repository == mock_repository + assert powerbi_service.group_id == "test-group" + assert hasattr(powerbi_service, 'secrets_service') + + def test_powerbi_service_no_group_id(self, mock_session): + """Test PowerBIService initialization without group_id.""" + with patch('src.services.powerbi_service.PowerBIConfigRepository'): + service = PowerBIService(mock_session) + assert service.group_id is None + + +class TestPowerBIServiceExecuteDAXQuery: + """Test cases for execute_dax_query method.""" + + @pytest.mark.asyncio + async def test_execute_dax_query_success(self, powerbi_service, mock_powerbi_config, + valid_dax_query_request, mock_power_bi_api_response): + """Test successful DAX query execution.""" + # Mock repository to return config + powerbi_service.repository.get_active_config.return_value = mock_powerbi_config + + # Mock token generation + with patch.object(powerbi_service, '_generate_token', return_value="mock-token"): + # Mock API call + with patch.object(powerbi_service, '_execute_query', return_value=mock_power_bi_api_response["results"]): + result = await powerbi_service.execute_dax_query(valid_dax_query_request) + + assert isinstance(result, DAXQueryResponse) + assert result.status == "success" + assert result.row_count == 2 + assert len(result.data) == 2 + assert result.columns == ["Region", "Total"] + assert result.execution_time_ms >= 0 + + @pytest.mark.asyncio + async def test_execute_dax_query_no_config(self, powerbi_service, valid_dax_query_request): + """Test DAX query execution when no config exists.""" + powerbi_service.repository.get_active_config.return_value = None + + with pytest.raises(HTTPException) as exc_info: + await powerbi_service.execute_dax_query(valid_dax_query_request) + + assert exc_info.value.status_code == 404 + assert "No active Power BI configuration" in str(exc_info.value.detail) + + @pytest.mark.asyncio + async def test_execute_dax_query_disabled_config(self, powerbi_service, mock_powerbi_config, valid_dax_query_request): + """Test DAX query execution with disabled configuration.""" + mock_powerbi_config.is_enabled = False + powerbi_service.repository.get_active_config.return_value = mock_powerbi_config + + with pytest.raises(HTTPException) as exc_info: + await powerbi_service.execute_dax_query(valid_dax_query_request) + + assert exc_info.value.status_code == 400 + assert "disabled" in str(exc_info.value.detail).lower() + + @pytest.mark.asyncio + async def test_execute_dax_query_no_semantic_model(self, powerbi_service, mock_powerbi_config): + """Test DAX query execution without semantic model ID.""" + mock_powerbi_config.semantic_model_id = None + powerbi_service.repository.get_active_config.return_value = mock_powerbi_config + + query_request = DAXQueryRequest(dax_query="EVALUATE 'Sales'") + + with pytest.raises(HTTPException) as exc_info: + await powerbi_service.execute_dax_query(query_request) + + assert exc_info.value.status_code == 400 + assert "Semantic model ID is required" in str(exc_info.value.detail) + + @pytest.mark.asyncio + async def test_execute_dax_query_uses_default_model(self, powerbi_service, mock_powerbi_config, mock_power_bi_api_response): + """Test DAX query uses default semantic model from config.""" + powerbi_service.repository.get_active_config.return_value = mock_powerbi_config + + query_request = DAXQueryRequest(dax_query="EVALUATE 'Sales'") # No model ID in request + + with patch.object(powerbi_service, '_generate_token', return_value="mock-token"): + with patch.object(powerbi_service, '_execute_query', return_value=mock_power_bi_api_response["results"]) as mock_execute: + result = await powerbi_service.execute_dax_query(query_request) + + assert result.status == "success" + # Verify default model was used + mock_execute.assert_called_once() + call_args = mock_execute.call_args + assert call_args[1]['semantic_model_id'] == "test-model" + + @pytest.mark.asyncio + async def test_execute_dax_query_error_handling(self, powerbi_service, mock_powerbi_config, valid_dax_query_request): + """Test DAX query execution error handling.""" + powerbi_service.repository.get_active_config.return_value = mock_powerbi_config + + with patch.object(powerbi_service, '_generate_token', side_effect=Exception("Auth failed")): + result = await powerbi_service.execute_dax_query(valid_dax_query_request) + + assert result.status == "error" + assert "Auth failed" in result.error + assert result.data is None + + +class TestPowerBIServiceTokenGeneration: + """Test cases for _generate_token method.""" + + @pytest.mark.asyncio + async def test_generate_token_success(self, powerbi_service, mock_powerbi_config): + """Test successful token generation.""" + with patch('src.services.powerbi_service.UsernamePasswordCredential') as MockCred: + mock_credential = MagicMock() + mock_token = MagicMock() + mock_token.token = "test-token-123" + mock_credential.get_token.return_value = mock_token + MockCred.return_value = mock_credential + + with patch.dict('os.environ', { + 'POWERBI_USERNAME': 'test@example.com', + 'POWERBI_PASSWORD': 'test-password' + }): + token = await powerbi_service._generate_token(mock_powerbi_config) + + assert token == "test-token-123" + MockCred.assert_called_once() + + @pytest.mark.asyncio + async def test_generate_token_missing_credentials(self, powerbi_service, mock_powerbi_config): + """Test token generation with missing credentials.""" + with patch.dict('os.environ', {}, clear=True): + with pytest.raises(HTTPException) as exc_info: + await powerbi_service._generate_token(mock_powerbi_config) + + assert exc_info.value.status_code == 401 + assert "credentials" in str(exc_info.value.detail).lower() + + @pytest.mark.asyncio + async def test_generate_token_authentication_error(self, powerbi_service, mock_powerbi_config): + """Test token generation authentication error.""" + with patch('src.services.powerbi_service.UsernamePasswordCredential') as MockCred: + MockCred.side_effect = Exception("Auth failed") + + with patch.dict('os.environ', { + 'POWERBI_USERNAME': 'test@example.com', + 'POWERBI_PASSWORD': 'test-password' + }): + with pytest.raises(HTTPException) as exc_info: + await powerbi_service._generate_token(mock_powerbi_config) + + assert exc_info.value.status_code == 401 + + +class TestPowerBIServiceExecuteQuery: + """Test cases for _execute_query method.""" + + @pytest.mark.asyncio + async def test_execute_query_success(self, powerbi_service, mock_power_bi_api_response): + """Test successful Power BI API query execution.""" + with patch('src.services.powerbi_service.requests.post') as mock_post: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = mock_power_bi_api_response + mock_post.return_value = mock_response + + result = await powerbi_service._execute_query( + token="test-token", + semantic_model_id="test-model", + dax_query="EVALUATE 'Sales'" + ) + + assert result == mock_power_bi_api_response["results"] + mock_post.assert_called_once() + + @pytest.mark.asyncio + async def test_execute_query_api_error(self, powerbi_service): + """Test Power BI API error handling.""" + with patch('src.services.powerbi_service.requests.post') as mock_post: + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.text = "Bad Request" + mock_post.return_value = mock_response + + with pytest.raises(HTTPException) as exc_info: + await powerbi_service._execute_query( + token="test-token", + semantic_model_id="test-model", + dax_query="INVALID DAX" + ) + + assert exc_info.value.status_code == 400 + + @pytest.mark.asyncio + async def test_execute_query_timeout(self, powerbi_service): + """Test Power BI API timeout handling.""" + with patch('src.services.powerbi_service.requests.post') as mock_post: + import requests + mock_post.side_effect = requests.exceptions.Timeout("Request timeout") + + with pytest.raises(HTTPException) as exc_info: + await powerbi_service._execute_query( + token="test-token", + semantic_model_id="test-model", + dax_query="EVALUATE 'Sales'" + ) + + assert exc_info.value.status_code == 500 + + +class TestPowerBIServicePostprocessData: + """Test cases for _postprocess_data method.""" + + def test_postprocess_data_success(self, powerbi_service, mock_power_bi_api_response): + """Test successful data postprocessing.""" + results = mock_power_bi_api_response["results"] + + data = powerbi_service._postprocess_data(results) + + assert len(data) == 2 + assert data[0] == {"Region": "East", "Total": 1000} + assert data[1] == {"Region": "West", "Total": 2000} + + def test_postprocess_data_empty_results(self, powerbi_service): + """Test postprocessing with empty results.""" + results = [] + + data = powerbi_service._postprocess_data(results) + + assert data == [] + + def test_postprocess_data_no_tables(self, powerbi_service): + """Test postprocessing with no tables in results.""" + results = [{"tables": []}] + + data = powerbi_service._postprocess_data(results) + + assert data == [] + + def test_postprocess_data_no_rows(self, powerbi_service): + """Test postprocessing with no rows in table.""" + results = [{"tables": [{"rows": []}]}] + + data = powerbi_service._postprocess_data(results) + + assert data == [] + + +class TestPowerBIServiceMultiTenancy: + """Test cases for multi-tenant functionality.""" + + @pytest.mark.asyncio + async def test_service_uses_group_id(self, mock_session): + """Test service properly uses group_id.""" + with patch('src.services.powerbi_service.PowerBIConfigRepository') as MockRepo: + mock_repo = AsyncMock() + MockRepo.return_value = mock_repo + + service = PowerBIService(mock_session, group_id="test-group") + + # Mock config + mock_config = MockPowerBIConfig() + mock_repo.get_active_config.return_value = mock_config + + query_request = DAXQueryRequest(dax_query="EVALUATE 'Sales'", semantic_model_id="test-model") + + with patch.object(service, '_generate_token', return_value="mock-token"): + with patch.object(service, '_execute_query', return_value=[]): + await service.execute_dax_query(query_request) + + # Verify get_active_config was called with group_id + mock_repo.get_active_config.assert_called_with(group_id="test-group") diff --git a/src/deploy.py b/src/deploy.py index 1a9ecbd1..d7b6d2db 100644 --- a/src/deploy.py +++ b/src/deploy.py @@ -352,7 +352,11 @@ def deploy_source_to_databricks( backend_src = root_dir / "backend" backend_dst = databricks_dist / "backend" if backend_src.exists(): - shutil.copytree(backend_src, backend_dst, ignore=shutil.ignore_patterns('__pycache__', '*.pyc', '*.pyo', 'logs', '*.log', '.mypy_cache', '.pytest_cache')) + shutil.copytree(backend_src, backend_dst, ignore=shutil.ignore_patterns( + '__pycache__', '*.pyc', '*.pyo', 'logs', '*.log', + '.mypy_cache', '.pytest_cache', 'htmlcov', 'tests', + '*.db', '*.db-shm', '*.db-wal', '.coverage' + )) logger.info(f"Copied backend folder") else: logger.error("Backend folder not found!") diff --git a/src/docs/API_REFERENCE.md b/src/docs/API_REFERENCE.md deleted file mode 100644 index 7b9a6b4f..00000000 --- a/src/docs/API_REFERENCE.md +++ /dev/null @@ -1,479 +0,0 @@ -# Kasal API Reference - -> **RESTful API v1** - Complete endpoint documentation with examples - ---- - -## Getting Started -Base URLs, authentication, and rate limits you need before calling endpoints. - -### Base URL -``` -Production: https://api.example.com/v1 -Staging: https://staging-api.example.com/v1 -Local: http://localhost:8000/api/v1 -``` - -### Authentication -```bash -# Get access token -curl -X POST https://api.example.com/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"email": "user@example.com", "password": "secure_pass"}' - -# Use token in requests -curl -X GET https://api.example.com/v1/crews \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" -``` - -### Rate Limits -| Tier | Requests/Hour | Burst | -|------|---------------|-------| -| **Free** | 1,000 | 100/min | -| **Pro** | 10,000 | 1,000/min | -| **Enterprise** | Unlimited | Custom | - ---- - -## Authentication Endpoints -Login, refresh, and logout flows to manage tokens. - -### POST /auth/login -**Login with credentials** -```json -Request: -{ - "email": "user@example.com", - "password": "secure_password" -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "token_type": "bearer", - "expires_in": 86400 -} -``` - -### POST /auth/refresh -**Refresh access token** -```json -Request: -{ - "refresh_token": "eyJ0eXAi..." -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "expires_in": 86400 -} -``` - -### POST /auth/logout -**Invalidate tokens** -```json -Response: 204 No Content -``` - ---- - -## Crew Management -Create and manage multi-agent crews and their configurations. - -### GET /crews -**List all crews** -```json -Response: 200 OK -{ - "crews": [ - { - "id": "crew_abc123", - "name": "Customer Support Crew", - "status": "active", - "agents_count": 3, - "created_at": "2024-01-15T10:30:00Z" - } - ], - "total": 15, - "page": 1 -} -``` - -### POST /crews -**Create new crew** -```json -Request: -{ - "name": "Marketing Crew", - "description": "Content generation team", - "process": "hierarchical", - "agents": [ - { - "role": "Content Writer", - "goal": "Create engaging content", - "model": "gpt-4" - } - ] -} - -Response: 201 Created -{ - "id": "crew_xyz789", - "name": "Marketing Crew", - "status": "configuring" -} -``` - -### GET /crews/{crew_id} -**Get crew details** -```json -Response: 200 OK -{ - "id": "crew_abc123", - "name": "Customer Support Crew", - "agents": [...], - "tasks": [...], - "configuration": {...} -} -``` - -### PUT /crews/{crew_id} -**Update crew configuration** -```json -Request: -{ - "name": "Updated Crew Name", - "process": "sequential" -} - -Response: 200 OK -{ - "id": "crew_abc123", - "updated": true -} -``` - -### DELETE /crews/{crew_id} -**Delete crew** -```json -Response: 204 No Content -``` - ---- - -## Agent Management -Create and list individual agents with roles, models, and tools. - -### GET /agents -**List all agents** -```json -Response: 200 OK -{ - "agents": [ - { - "id": "agent_001", - "name": "Research Agent", - "crew_id": "crew_abc123", - "model": "gpt-4", - "status": "ready" - } - ] -} -``` - -### POST /agents -**Create new agent** -```json -Request: -{ - "crew_id": "crew_abc123", - "role": "Data Analyst", - "goal": "Analyze metrics", - "backstory": "Expert analyst with 10 years experience", - "model": "claude-3-opus", - "tools": ["web_search", "calculator"] -} - -Response: 201 Created -{ - "id": "agent_002", - "status": "created" -} -``` - ---- - -## Execution Management -Start executions, get status, retrieve traces, and stop runs. - -### POST /executions -**Start crew execution** -```json -Request: -{ - "crew_id": "crew_abc123", - "inputs": { - "topic": "Q4 Marketing Strategy", - "deadline": "2024-12-31" - } -} - -Response: 202 Accepted -{ - "job_id": "job_qwerty123", - "status": "queued", - "estimated_duration": 300 -} -``` - -### GET /executions/{job_id} -**Get execution status** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "running", - "progress": 65, - "current_task": "Analyzing data", - "started_at": "2024-01-15T14:00:00Z" -} -``` - -### GET /executions/{job_id}/traces -**Get execution trace** -```json -Response: 200 OK -{ - "traces": [ - { - "timestamp": "2024-01-15T14:00:05Z", - "agent": "Research Agent", - "action": "web_search", - "result": "Found 15 relevant articles" - } - ] -} -``` - -### POST /executions/{job_id}/stop -**Stop execution** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "stopped" -} -``` - ---- - -## Task Management -Create and list tasks assigned to agents. - -### GET /tasks -**List tasks** -```json -Response: 200 OK -{ - "tasks": [ - { - "id": "task_001", - "description": "Generate report", - "agent_id": "agent_001", - "status": "completed" - } - ] -} -``` - -### POST /tasks -**Create task** -```json -Request: -{ - "agent_id": "agent_001", - "description": "Analyze competitor pricing", - "expected_output": "Markdown report", - "context": ["Previous analysis from Q3"] -} - -Response: 201 Created -{ - "id": "task_002", - "status": "created" -} -``` - ---- - -## Tool Management -Discover built-in tools and register custom tools. - -### GET /tools -**List available tools** -```json -Response: 200 OK -{ - "tools": [ - { - "name": "web_search", - "description": "Search the web", - "category": "research" - }, - { - "name": "file_reader", - "description": "Read files", - "category": "data" - } - ] -} -``` - -### POST /tools/custom -**Register custom tool** -```json -Request: -{ - "name": "salesforce_api", - "description": "Query Salesforce data", - "endpoint": "https://api.example.com/salesforce", - "auth_type": "bearer" -} - -Response: 201 Created -{ - "tool_id": "tool_custom_001", - "status": "registered" -} -``` - ---- - -## Memory Management -Fetch and clear short/long-term memory for a crew. - -### GET /memory/{crew_id} -**Get crew memory** -```json -Response: 200 OK -{ - "short_term": [ - { - "timestamp": "2024-01-15T10:00:00Z", - "content": "Customer prefers email communication" - } - ], - "long_term": [ - { - "category": "preferences", - "insights": ["Email preferred", "Weekly reports"] - } - ] -} -``` - -### POST /memory/{crew_id}/clear -**Clear memory** -```json -Request: -{ - "type": "short_term" // or "long_term" or "all" -} - -Response: 204 No Content -``` - ---- - - - -## šŸ”µ WebSocket Events -Real-time updates for task lifecycle, errors, and progress. - -### Connection -```javascript -const ws = new WebSocket('wss://api.kasal.ai/v1/ws'); - -ws.onopen = () => { - ws.send(JSON.stringify({ - type: 'subscribe', - job_id: 'job_qwerty123' - })); -}; -``` - -### Event Types -```javascript -// Task started -{ - "type": "task_start", - "job_id": "job_qwerty123", - "task_id": "task_001", - "agent": "Research Agent" -} - -// Task completed -{ - "type": "task_complete", - "job_id": "job_qwerty123", - "task_id": "task_001", - "result": "Analysis complete" -} - -// Error -{ - "type": "error", - "job_id": "job_qwerty123", - "message": "Rate limit exceeded", - "code": "RATE_LIMIT" -} -``` - ---- - -## šŸ”· Error Codes -Standardized error responses and meanings. - -| Code | Message | Description | -|------|---------|-------------| -| 400 | Bad Request | Invalid parameters | -| 401 | Unauthorized | Invalid/expired token | -| 403 | Forbidden | Insufficient permissions | -| 404 | Not Found | Resource doesn't exist | -| 429 | Too Many Requests | Rate limit exceeded | -| 500 | Internal Error | Server error | -| 503 | Service Unavailable | Maintenance mode | - -### Error Response Format -```json -{ - "error": { - "code": "VALIDATION_ERROR", - "message": "Invalid crew configuration", - "details": { - "field": "agents", - "reason": "At least one agent required" - } - } -} -``` - ---- - -## šŸ”¹ Testing -Sandbox, Postman collection, and OpenAPI spec. - -### Sandbox Environment -```bash -# Use sandbox for testing -curl -X POST https://sandbox-api.kasal.ai/v1/crews \ - -H "Authorization: Bearer SANDBOX_TOKEN" \ - -H "Content-Type: application/json" \ - -d @crew.json -``` - - ---- - -*Build powerful integrations with Kasal API* \ No newline at end of file diff --git a/src/docs/api_endpoints.md b/src/docs/api_endpoints.md new file mode 100644 index 00000000..01e3abfa --- /dev/null +++ b/src/docs/api_endpoints.md @@ -0,0 +1,524 @@ +# Kasal API Endpoints Reference + +Complete reference for all available API endpoints in the Kasal platform. + +--- + +## Base URL + +All API endpoints use the following base URL structure: + +``` +https://.databricksapps.com/api/v1 +``` + +**Example:** +``` +https://kasal-dev-1444828305810485.aws.databricksapps.com/api/v1/executions +``` + +**Local Development:** +``` +http://localhost:8000/api/v1 +``` + +--- + +## Table of Contents + +- [Authentication](#authentication) +- [Crews (Workflows)](#crews-workflows) +- [Agents](#agents) +- [Tasks](#tasks) +- [Tools](#tools) +- [Executions](#executions) +- [Models](#models) +- [API Keys](#api-keys) +- [Power BI Integration](#power-bi-integration) +- [Health & Status](#health--status) + +--- + +## Authentication + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/auth/login` | User login with credentials | +| `POST` | `/auth/logout` | User logout | +| `GET` | `/auth/me` | Get current user information | +| `POST` | `/auth/refresh` | Refresh JWT token | + +**Authentication Header:** +``` +Authorization: Bearer +``` + +--- + +## Crews (Workflows) + +### Crew Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews` | List all crews in workspace | +| `POST` | `/crews` | Create a new crew | +| `GET` | `/crews/{id}` | Get crew details by ID | +| `PUT` | `/crews/{id}` | Update crew configuration | +| `DELETE` | `/crews/{id}` | Delete crew | +| `POST` | `/crews/{id}/duplicate` | Duplicate crew with new name | + +### Crew Execution + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/crews/{id}/kickoff` | Start crew execution | +| `POST` | `/crews/{id}/kickoff-async` | Start async crew execution | +| `GET` | `/crews/{id}/status` | Get crew execution status | +| `POST` | `/crews/{id}/stop` | Stop running crew | + +### Crew Export/Import + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews/{id}/export` | Export crew configuration as JSON | +| `POST` | `/crews/import` | Import crew from JSON | + +--- + +## Agents + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/agents` | List all agents | +| `POST` | `/agents` | Create a new agent | +| `GET` | `/agents/{id}` | Get agent details by ID | +| `PUT` | `/agents/{id}` | Update agent configuration | +| `DELETE` | `/agents/{id}` | Delete agent | + +**Agent Configuration Fields:** +- `name`: Agent name +- `role`: Agent role description +- `goal`: Agent's objective +- `backstory`: Agent's background context +- `tools`: Array of tool IDs +- `tool_configs`: Tool-specific configurations +- `llm_config`: LLM model and parameters + +--- + +## Tasks + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tasks` | List all tasks | +| `POST` | `/tasks` | Create a new task | +| `GET` | `/tasks/{id}` | Get task details by ID | +| `PUT` | `/tasks/{id}` | Update task configuration | +| `DELETE` | `/tasks/{id}` | Delete task | + +**Task Configuration Fields:** +- `name`: Task name +- `description`: Task description +- `expected_output`: Expected output format +- `agent_id`: Assigned agent ID +- `context`: Context task IDs (dependencies) +- `tool_configs`: Task-level tool configurations + +--- + +## Tools + +### Tool Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tools` | List all available tools | +| `GET` | `/tools/{id}` | Get tool details by ID | +| `PUT` | `/tools/{id}` | Update tool configuration | +| `POST` | `/tools/{id}/enable` | Enable tool for workspace | +| `POST` | `/tools/{id}/disable` | Disable tool for workspace | + +### Tool Categories + +**Available Tool Types:** +- `ai`: AI-powered tools (Dall-E, Perplexity) +- `database`: Database tools (Genie, Databricks, Power BI) +- `search`: Search tools (Serper, Knowledge Search) +- `web`: Web tools (Scrape Website) +- `integration`: Integration tools (MCP) +- `development`: Development tools + +--- + +## Executions + +### Execution Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/executions` | List all executions | +| `GET` | `/executions/{id}` | Get execution details | +| `GET` | `/executions/{id}/status` | Get execution status | +| `GET` | `/executions/{id}/logs` | Get execution logs | +| `POST` | `/executions/{id}/stop` | Stop running execution | +| `DELETE` | `/executions/{id}` | Delete execution record | + +### Execution Status Values + +- `pending`: Execution queued +- `running`: Execution in progress +- `completed`: Execution finished successfully +- `failed`: Execution failed with error +- `stopped`: Execution manually stopped + +--- + +## Models + +### Model Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/models` | List available LLM models | +| `GET` | `/models/{id}` | Get model configuration | +| `PUT` | `/models/{id}` | Update model parameters | +| `POST` | `/models/test` | Test model connection | + +**Supported Model Providers:** +- Databricks (Foundation Models) +- OpenAI (GPT-3.5, GPT-4) +- Anthropic (Claude) +- Google (Gemini) +- Azure OpenAI +- Ollama (Local models) + +--- + +## API Keys + +### API Key Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/api-keys` | List all API keys (encrypted) | +| `POST` | `/api-keys` | Create new API key | +| `GET` | `/api-keys/{id}` | Get API key details | +| `PUT` | `/api-keys/{id}` | Update API key value | +| `DELETE` | `/api-keys/{id}` | Delete API key | + +**Common API Keys:** +- `OPENAI_API_KEY`: OpenAI authentication +- `ANTHROPIC_API_KEY`: Anthropic Claude authentication +- `SERPER_API_KEY`: Serper search tool +- `PERPLEXITY_API_KEY`: Perplexity AI tool +- `DATABRICKS_TOKEN`: Databricks API access +- `POWERBI_CLIENT_SECRET`: Power BI service principal +- `POWERBI_USERNAME`: Power BI device code auth +- `POWERBI_PASSWORD`: Power BI device code auth + +**Security:** +- All API keys are encrypted at rest +- Keys are never returned in plain text via API +- Multi-tenant isolation by group_id + +--- + +## Power BI Integration + +### Power BI Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/powerbi/config` | Configure Power BI connection | +| `GET` | `/powerbi/config` | Get Power BI configuration | + +**Power BI Tool Configuration (Task-Level):** +```json +{ + "tenant_id": "Azure AD Tenant ID", + "client_id": "Azure AD Application ID", + "semantic_model_id": "Power BI Dataset ID", + "workspace_id": "Power BI Workspace ID (optional)", + "auth_method": "service_principal or device_code", + "databricks_job_id": "Databricks Job ID (optional)" +} +``` + +**Required API Keys:** +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` (for device_code) +- `POWERBI_PASSWORD` (for device_code) +- `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN` + +--- + +## Health & Status + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/health` | API health check | +| `GET` | `/health/db` | Database connection status | +| `GET` | `/health/services` | External services status | +| `GET` | `/version` | API version information | + +--- + +## Common Response Formats + +### Success Response + +```json +{ + "status": "success", + "data": { ... }, + "message": "Operation completed successfully" +} +``` + +### Error Response + +```json +{ + "status": "error", + "error": { + "code": "ERROR_CODE", + "message": "Human-readable error message", + "details": { ... } + } +} +``` + +### Pagination + +For list endpoints that support pagination: + +``` +GET /crews?page=1&limit=50&sort=created_at&order=desc +``` + +**Query Parameters:** +- `page`: Page number (default: 1) +- `limit`: Items per page (default: 50, max: 100) +- `sort`: Sort field +- `order`: Sort order (`asc` or `desc`) + +--- + +## Rate Limiting + +**Default Limits:** +- Anonymous: 100 requests/hour +- Authenticated: 1000 requests/hour +- Enterprise: 10,000 requests/hour + +**Rate Limit Headers:** +``` +X-RateLimit-Limit: 1000 +X-RateLimit-Remaining: 999 +X-RateLimit-Reset: 1609459200 +``` + +--- + +## Memory Management + +### GET /api/v1/memory/{crew_id} +**Get crew memory (short-term and long-term)** + +```json +Response: 200 OK +{ + "short_term": [ + { + "timestamp": "2024-01-15T10:00:00Z", + "content": "Customer prefers email communication" + } + ], + "long_term": [ + { + "category": "preferences", + "insights": ["Email preferred", "Weekly reports"] + } + ] +} +``` + +### POST /api/v1/memory/{crew_id}/clear +**Clear crew memory** + +```json +Request: +{ + "type": "short_term" // Options: "short_term", "long_term", or "all" +} + +Response: 204 No Content +``` + +--- + +## WebSocket Endpoints + +### Real-Time Execution Updates + +``` +ws://localhost:8000/ws/executions/{execution_id} +``` + +**Message Format:** +```json +{ + "type": "status_update", + "execution_id": "abc123", + "status": "running", + "progress": 45, + "message": "Processing task 2 of 5..." +} +``` + +--- + +## Examples + +### Create and Execute a Crew + +```bash +# 1. Create a crew +curl -X POST http://localhost:8000/api/v1/crews \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Sales Analysis Crew", + "agents": [...], + "tasks": [...] + }' + +# Response: {"id": "crew_123", ...} + +# 2. Start execution +curl -X POST http://localhost:8000/api/v1/crews/crew_123/kickoff \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"query": "Analyze Q4 sales"}}' + +# Response: {"execution_id": "exec_456", ...} + +# 3. Monitor execution +curl -X GET http://localhost:8000/api/v1/executions/exec_456/status \ + -H "Authorization: Bearer $TOKEN" +``` + +### Configure Power BI Tool in Task + +```bash +# Create task with PowerBI configuration +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Analyze Sales Data", + "description": "Analyze sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } + }' +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| `AUTH_001` | Invalid or expired token | +| `AUTH_002` | Insufficient permissions | +| `CREW_001` | Crew not found | +| `CREW_002` | Invalid crew configuration | +| `EXEC_001` | Execution failed | +| `EXEC_002` | Execution timeout | +| `TOOL_001` | Tool not available | +| `TOOL_002` | Tool configuration error | +| `DB_001` | Database connection error | +| `EXT_001` | External service unavailable | + +--- + +## SDK Examples + +### Python SDK + +```python +from kasal import KasalClient + +# Initialize client +client = KasalClient( + base_url="http://localhost:8000", + token="your-jwt-token" +) + +# Create and execute crew +crew = client.crews.create( + name="Data Analysis Crew", + agents=[...], + tasks=[...] +) + +execution = crew.kickoff(inputs={"query": "Analyze data"}) +result = execution.wait() # Blocks until complete + +print(result.output) +``` + +### JavaScript/TypeScript SDK + +```typescript +import { KasalClient } from '@kasal/sdk'; + +const client = new KasalClient({ + baseUrl: 'http://localhost:8000', + token: 'your-jwt-token' +}); + +// Create and execute crew +const crew = await client.crews.create({ + name: 'Data Analysis Crew', + agents: [...], + tasks: [...] +}); + +const execution = await crew.kickoff({ + inputs: { query: 'Analyze data' } +}); + +// Stream results +execution.on('status', (status) => { + console.log('Status:', status); +}); + +const result = await execution.wait(); +console.log('Result:', result.output); +``` + +--- + +## Additional Resources + +- **API Playground**: `/api/playground` +- **OpenAPI Schema**: `/api/openapi.json` +- **Swagger UI**: `/api/docs` +- **ReDoc**: `/api/redoc` + +For more information, see: +- [Power BI Integration Guide](powerbi_integration.md) +- [Tool Configuration Guide](powerbi_analysis_tool_setup.md) +- [Crew Deployment Guide](crew_export_deployment.md) diff --git a/src/docs/powerbi_integration.md b/src/docs/powerbi_integration.md new file mode 100644 index 00000000..27a57420 --- /dev/null +++ b/src/docs/powerbi_integration.md @@ -0,0 +1,952 @@ +# Power BI Integration Guide + +Complete guide for integrating Power BI with Kasal AI agents for advanced business intelligence analysis. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Setup Guide](#setup-guide) + - [Development Environment](#1-development-environment-setup) + - [Azure Service Principal](#2-azure-service-principal-setup) + - [Databricks Configuration](#3-databricks-configuration) + - [Kasal Configuration](#4-kasal-configuration) +- [Authentication Methods](#authentication-methods) +- [API Configuration](#api-configuration) +- [PowerBI Analysis Tool](#powerbi-analysis-tool) +- [Testing](#testing) +- [Troubleshooting](#troubleshooting) +- [Security & Best Practices](#security--best-practices) + +--- + +## Overview + +The Power BI integration enables Kasal AI agents to execute complex analysis against Power BI semantic models using Databricks compute resources. This provides a production-ready, API-driven connector for Power BI analytics within AI workflows based on a preconfigured template notebook for tracability. + +**Key Features:** +- DAX query execution against Power BI semantic models +- Complex analysis using Databricks job orchestration +- Multiple authentication methods (Service Principal, Device Code Flow) +- Task-level configuration for workspace and semantic model selection +- Multi-tenant isolation with encrypted credential storage + +**Use Cases:** +- Year-over-year growth analysis +- Trend detection and forecasting +- Complex financial reporting +- Multi-dimensional business analysis +- Automated business intelligence reporting + +--- + +## Architecture + +### System Components + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal AI │ +│ Agent │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + └─ PowerBIAnalysisTool + └─> Databricks Job + ā”œā”€ Step 1: Extract Power BI metadata + ā”œā”€ Step 2: Generate DAX query from business question + └─ Step 3: Execute query + └─> Power BI REST API + └─> Returns: JSON result data +``` + +### Backend Components + +1. **API Keys Service** (`services/api_keys_service.py`) + - Stores encrypted Power BI credentials + - Multi-tenant isolation via `group_id` + - Handles: `POWERBI_CLIENT_SECRET`, `POWERBI_USERNAME`, `POWERBI_PASSWORD` + +2. **Databricks Auth Context** (`utils/databricks_auth.py`) + - Auto-detects `databricks_host` from environment + - Retrieves `databricks_token` from API Keys or environment + +3. **PowerBIAnalysisTool** (`engines/crewai/tools/custom/powerbi_analysis_tool.py`) + - CrewAI tool for Power BI analysis + - Wraps Databricks job execution + - Handles credential retrieval and job parameter passing + +4. **Tool Factory** (`engines/crewai/tools/tool_factory.py`) + - Instantiates tools with task-level configuration + - Merges base tool config with task-specific overrides + +### Frontend Components + +1. **PowerBIConfigSelector** (`components/Common/PowerBIConfigSelector.tsx`) + - Task-level Power BI configuration UI + - Appears when PowerBIAnalysisTool is selected + - Validates required API Keys + +2. **TaskForm** (`components/Tasks/TaskForm.tsx`) + - Integrates PowerBIConfigSelector + - Stores configuration in `tool_configs.PowerBIAnalysisTool` + +### Authentication Flow + +1. Kasal retrieves credentials from API Keys Service +2. Auto-detects Databricks host from unified auth context +3. Passes credentials to Databricks job as parameters +4. Databricks job authenticates with Azure AD +5. Azure AD issues Power BI access token +6. Access token used to call Power BI REST API + +--- + +## Prerequisites + +### Required Accounts & Access + +- **Azure Tenant**: Admin access for Service Principal or Service Account (if PBI with RLS enforcement) setup +- **Power BI**: Workspace access and semantic model permissions +- **Databricks Workspace**: Access with token for job creation +- **Operating System**: Linux/macOS (Ubuntu on VDI for production) +- **Key vault connect to Databricks**: Connection of centrally managed secrets as KV variables within Databricks +- **Python**: 3.11+ +- **Node.js**: LTS version + +### Power BI Requirements + +- Power BI workspace with semantic models +- Workspace ID and Semantic Model ID +- Admin permissions to grant Service Principal access + +### Azure AD Requirements + +- Permission to create App Registrations +- Admin consent capability for API permissions +- Ability to create and manage Client Secrets + +--- + +## Setup Guide + +### 1. Development Environment Setup + +#### 1.1 Install Python 3.11 + +```bash +# Add the deadsnakes PPA (Ubuntu) +sudo add-apt-repository ppa:deadsnakes/ppa -y +sudo apt update + +# Install Python 3.11 +sudo apt install python3.11 python3.11-venv python3.11-dev -y + +# Verify installation +python3.11 --version +``` + +#### 1.2 Clone Repository + +```bash +# Clone the Kasal repository +git clone https://github.com/databrickslabs/kasal.git +cd kasal + +# Checkout the feature branch +git checkout feature/pbi-tool +``` + +#### 1.3 Create Virtual Environment + +```bash +# Create virtual environment with Python 3.11 +python3.11 -m venv venv + +# Activate the environment +source venv/bin/activate + +# Upgrade pip +pip install --upgrade pip +``` + +#### 1.4 Install Dependencies + +```bash +# Navigate to src directory +cd src + +# Install Python dependencies +pip install -r requirements.txt + +# Verify installations +pip freeze | grep -E "crewai|litellm|databricks" +``` + +#### 1.5 Install Node.js (if needed) + +```bash +# Install Node Version Manager (nvm) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash + +# Load nvm +source ~/.bashrc + +# Install Node.js LTS +nvm install --lts +nvm use --lts + +# Verify installations +node --version +npm --version +``` + +--- + +### 2. Azure Service Principal Setup + +To enable non-interactive authentication, create an Azure Service Principal with Power BI read permissions. + +#### 2.1 Create Service Principal in Azure Portal + +1. **Navigate to Azure Portal**: https://portal.azure.com +2. **Go to Azure Active Directory** → **App registrations** +3. **Click "New registration"**: + - **Name**: `Kasal-PowerBI-Connector` (or your preferred name) + - **Supported account types**: Single tenant + - **Redirect URI**: Leave blank +4. **Note the Application (client) ID** and **Directory (tenant) ID** + +Please consider that for some PowerBI reports a service principal might not be enough, but a service account might be needed. This will be especially the case for PowerBIs that enfore RLS within the PowerBI. + +#### 2.2 Create Client Secret + +1. In your app registration, go to **Certificates & secrets** +2. Click **New client secret** +3. **Description**: `Kasal PowerBI Tool` +4. **Expires**: Choose expiration period (recommended: 90 days) +5. **Copy the secret value** immediately (you won't be able to see it again) + +#### 2.3 Configure API Permissions + +**Critical**: The Service Principal needs **Application** permissions, not **Delegated**. + +1. Go to **API permissions** in your app registration +2. **Remove any Delegated permissions** if present +3. Click **Add a permission** +4. Select **Power BI Service** +5. Choose **Application permissions** (NOT Delegated) +6. Check **Dataset.Read.All** +7. Click **Add permissions** +8. **Click "Grant admin consent for [Your Organization]"** (requires admin) + +**Important**: This step requires **Azure AD Admin** privileges. If you don't have admin rights, use the email template in the Appendix. + +#### 2.4 Enable Service Principal in Power BI Admin Portal + +1. Go to **Power BI Admin Portal**: https://app.powerbi.com/admin-portal/tenantSettings +2. Navigate to **Developer settings** (or **Tenant settings**) +3. Find **Service principals can use Power BI APIs** +4. **Enable** this setting +5. Add your Service Principal to the allowed list: + - Option 1: Add specific Service Principal by name + - Option 2: Add to a security group that's allowed + +#### 2.5 Grant Workspace Access + +For each Power BI workspace you want to access: + +1. Open the Power BI workspace +2. Click **Workspace settings** +3. Go to **Access** +4. Click **Add people or groups** +5. Search for your Service Principal name +6. Assign role: **Member** or **Contributor** + +--- + +### 3. Databricks Configuration + +#### 3.1 Set Environment Variables + +```bash +# Set Databricks credentials +export DATABRICKS_TOKEN="your-databricks-token" +export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com/" +``` + +#### 3.2 Configure Databricks CLI + +```bash +# Configure Databricks CLI +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +If the prompt doesn't appear: +```bash +# Unset environment variables and retry +unset DATABRICKS_HOST +unset DATABRICKS_TOKEN +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +#### 3.3 Verify Connection + +```bash +# Test workspace access +databricks workspace list / +``` + +#### 3.4 Create Databricks Job + +The PowerBIAnalysisTool requires a Databricks job for executing the analysis pipeline. + +1. **Navigate to Databricks Workflows**: + - Go to your Databricks workspace + - Click **Workflows** in the left sidebar + +2. **Create New Job**: + - Click **Create Job** + - **Job Name**: `pbi_e2e_pipeline` + +3. **Add Task**: + - Click **Add Task** + - **Task Name**: `pbi_e2e_pipeline` + - **Type**: Notebook + - **Notebook Path**: `/Workspace/Shared/powerbi_full_pipeline` + - **Cluster**: Select or create appropriate cluster + +4. **Note the Job ID**: + - After creating the job, copy the **Job ID** from the URL + - Example: `365257288725339` + - This will be used in PowerBIAnalysisTool configuration + +#### 3.5 Upload Pipeline Notebook + +```bash +# Upload the notebook to Databricks +# Please note that the security features were implemented +# But for the notebook to work you need to be precise with +# pre-requisites (Key-Vault setup) and PBI SVP setting (ask respective admins) +databricks workspace import \ + examples/powerbi_full_pipeline.ipynb \ + /Workspace/Shared/powerbi_full_pipeline \ + --language PYTHON \ + --format JUPYTER +``` + +Or manually upload via Databricks UI: +1. Go to **Workspace** → **Shared** +2. Click **Create** → **Import** +3. Upload `examples/powerbi_full_pipeline.ipynb` + +--- + +### 4. Kasal Configuration + +#### 4.1 Build Frontend + +```bash +# From the project root +python src/build.py +``` + +This creates a `frontend_static` folder with compiled React application. + +#### 4.2 Deploy to Databricks Apps + +```bash +# Deploy the application +cd src +python deploy.py \ + --app-name kasal \ + --user-name your-email@domain.com +``` + +**Note**: Replace `--app-name` and `--user-name` with your specific values. + +#### 4.3 Configure API Keys + +After deploying, configure required API Keys: + +1. **Navigate to Configuration** → **API Keys** +2. **Add the following keys**: + - `POWERBI_CLIENT_SECRET`: Service Principal secret (from section 2.2) + - `POWERBI_USERNAME`: Power BI service account email (for device code auth) + - `POWERBI_PASSWORD`: Service account password (for device code auth) + - `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN`: Databricks access token + +**Important**: All values are encrypted at rest and never returned in plain text via API. + +#### 4.4 Enable PowerBIAnalysisTool + +1. Go to **Tools** section +2. Find **PowerBIAnalysisTool** +3. Review security disclaimers +4. Enable the tool for your workspace + +--- + +## Authentication Methods + +The PowerBIAnalysisTool supports two authentication methods: + +### Service Principal (Recommended for Production) + +**Best for**: Automated workflows, production deployments, unattended execution + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Service Principal Application ID +- `POWERBI_CLIENT_SECRET`: Stored in API Keys + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "auth_method": "service_principal" +} +``` + +**Advantages**: +- Non-interactive, fully automated +- No MFA requirements +- Production-ready +- Supports scheduled workflows + +### Device Code Flow (Recommended for Testing) + +**Best for**: Development, testing, personal workspaces + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Can use Power BI public client `1950a258-227b-4e31-a9cf-717495945fc2` +- `POWERBI_USERNAME`: User email (stored in API Keys) +- `POWERBI_PASSWORD`: User password (stored in API Keys) + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "1950a258-227b-4e31-a9cf-717495945fc2", + "auth_method": "device_code" +} +``` + +**How it works**: +1. First request prompts: "Visit microsoft.com/devicelogin" +2. Enter provided code in browser +3. Sign in with your credentials +4. Token is cached for subsequent requests (~1 hour) + +**Advantages**: +- No Service Principal setup required +- Uses your personal Power BI permissions +- Perfect for development and testing +- Supports MFA + +--- + +## API Configuration + +### Task-Level Configuration + +Configure Power BI settings at the **task level** for flexibility across different semantic models: + +1. **Create or Edit Task** +2. **Select PowerBIAnalysisTool** in tools list +3. **Configure Power BI settings** (fields appear automatically): + - **Tenant ID**: Azure AD tenant GUID + - **Client ID**: Service Principal or app client ID + - **Workspace ID**: Power BI workspace GUID (optional) + - **Semantic Model ID**: Power BI semantic model/dataset GUID + - **Auth Method**: `service_principal` or `device_code` + - **Databricks Job ID**: Databricks job ID for analysis pipeline + +**Example Task Configuration**: +```json +{ + "name": "Analyze Sales Data", + "description": "Analyze Q4 sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### Required API Keys Check + +The UI automatically checks for required API Keys when PowerBIAnalysisTool is selected: +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` +- `POWERBI_PASSWORD` +- `DATABRICKS_API_KEY` (or `DATABRICKS_TOKEN`) + +If keys are missing, an error alert is displayed with instructions. + +--- + +## PowerBI Analysis Tool + +### Tool Overview + +**PowerBIAnalysisTool** (ID: 71) enables complex Power BI analysis via Databricks job orchestration. + +**Best for**: +- Heavy computation and large datasets +- Complex multi-query analysis +- Year-over-year comparisons +- Trend detection and forecasting +- Resource-intensive business intelligence tasks + +### Tool Parameters + +**Input Parameters**: +- `question` (str): Business question to analyze +- `dashboard_id` (str): Semantic model ID (can be provided by LLM or task config) +- `workspace_id` (str): Power BI workspace ID (optional) +- `additional_params` (dict): Optional additional parameters + +**Configuration** (from tool_configs): +- `tenant_id`: Azure AD tenant +- `client_id`: Application client ID +- `semantic_model_id`: Default semantic model +- `workspace_id`: Default workspace +- `auth_method`: Authentication method +- `databricks_job_id`: Databricks job ID for pipeline + +### Agent Configuration Example + +```json +{ + "role": "Business Intelligence Analyst", + "goal": "Perform complex Power BI analysis using Databricks", + "backstory": "Expert analyst with deep understanding of business metrics", + "tools": ["PowerBIAnalysisTool"], + "llm_config": { + "model": "databricks-meta-llama-3-1-70b-instruct", + "temperature": 0.1 + } +} +``` + +### Task Configuration Example + +```json +{ + "name": "Q4 Revenue Analysis", + "description": "Analyze Q4 2024 revenue trends by product category and region, comparing year-over-year growth", + "expected_output": "Comprehensive analysis report with insights and recommendations", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### How It Works + +1. **Agent receives task** with business question +2. **PowerBIAnalysisTool invoked** with question and semantic model ID +3. **Tool retrieves credentials** from API Keys Service +4. **Tool auto-detects** databricks_host from environment +5. **Databricks job triggered** with parameters: + - `question`: Business question + - `semantic_model_id`: Dataset to query + - `workspace_id`: Power BI workspace + - `tenant_id`, `client_id`: Authentication + - `client_secret`, `username`, `password`: Credentials + - `databricks_host`, `databricks_token`: For recursive auth +6. **Job executes pipeline**: + - Extracts Power BI metadata + - Generates DAX query from question + - Executes query against Power BI + - Returns structured results +7. **Agent receives results** and continues workflow + +--- + +## Testing + +### Local Development Testing + +#### 1. Start Services + +**Backend**: +```bash +cd src/backend +./run.sh sqlite +# Backend starts on http://localhost:8000 +``` + +**Frontend**: +```bash +cd src/frontend +npm start +# Frontend starts on http://localhost:3000 +``` + +#### 2. Configure via UI + +1. Open http://localhost:3000 +2. Navigate to **Configuration** → **API Keys** +3. Add required keys: + - `POWERBI_CLIENT_SECRET` + - `POWERBI_USERNAME` + - `POWERBI_PASSWORD` + - `DATABRICKS_API_KEY` +4. Navigate to **Tools** → Enable **PowerBIAnalysisTool** + +#### 3. Create Test Agent and Task + +**Agent**: +```json +{ + "role": "Sales Analyst", + "goal": "Analyze Power BI sales data", + "tools": ["PowerBIAnalysisTool"] +} +``` + +**Task**: +```json +{ + "description": "What is the total revenue for Q4 2024?", + "expected_output": "Revenue figure with analysis", + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "your-tenant-id", + "client_id": "your-client-id", + "semantic_model_id": "your-model-id", + "workspace_id": "your-workspace-id", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +#### 4. Run Workflow + +1. Click **Run Crew** +2. Monitor execution in **Runs** tab +3. Check Databricks **Workflows** for job execution +4. Verify results in execution logs + +### Production Testing (Databricks App) + +#### 1. Deploy to Databricks + +```bash +cd src +python deploy.py --app-name kasal-prod --user-name your-email@domain.com +``` + +#### 2. Configure in Deployed App + +1. Open deployed app URL +2. Navigate to **Configuration** → **API Keys** +3. Add production credentials +4. Enable **PowerBIAnalysisTool** + +#### 3. Create Production Workflow + +Create agent and task using production semantic model IDs and workspace IDs. + +#### 4. End-to-End Test + +1. Run crew execution +2. Monitor Databricks job logs +3. Verify Power BI API calls in Azure AD audit logs +4. Validate results accuracy + +### Sample Test Queries + +**Simple aggregation**: +```json +{ + "question": "What is the total revenue by region?" +} +``` + +**Year-over-year analysis**: +```json +{ + "question": "Compare Q4 2024 revenue to Q4 2023 by product category" +} +``` + +**Trend analysis**: +```json +{ + "question": "Show monthly sales trends for the last 12 months" +} +``` + +--- + +## Troubleshooting + +### Authentication Issues + +**Error**: "Provided OAuth token does not have required scopes" + +**Causes**: +- Missing OAuth scopes in Databricks App configuration +- Service Principal lacks Power BI API permissions + +**Solutions**: +1. Verify Service Principal has **Application** (not Delegated) permissions +2. Ensure admin consent was granted in Azure AD +3. Check Service Principal is enabled in Power BI Admin Portal +4. For Databricks Apps, configure OAuth scopes: `sql`, `all-apis` + +--- + +**Error**: "Authentication failed: 403 Forbidden" + +**Causes**: +- Service Principal doesn't have workspace access +- Incorrect workspace ID + +**Solutions**: +1. Add Service Principal to Power BI workspace with Member/Contributor role +2. Verify workspace_id matches the actual workspace GUID +3. Check Power BI audit logs for access denied events + +--- + +### Configuration Issues + +**Error**: "tenant_id showing as 'your_tenant_id'" + +**Cause**: LLM-provided placeholder values taking precedence over task config + +**Solution**: Verify task configuration priority in tool_factory.py - task config should override LLM values + +--- + +**Error**: "semantic_model_id truncated or incorrect" + +**Cause**: dashboard_id from kwargs overriding task config value + +**Solution**: Check powerbi_analysis_tool.py lines 314-316 for proper priority handling + +--- + +**Error**: "Missing databricks_host or databricks_token in job parameters" + +**Cause**: Credentials not being passed to job parameters + +**Solution**: Verify powerbi_analysis_tool.py lines 411-418 add credentials to job_params + +--- + +### Job Execution Issues + +**Error**: "Databricks job times out" + +**Causes**: +- Large dataset +- Complex DAX query +- Insufficient cluster resources + +**Solutions**: +1. Increase job timeout in tool configuration +2. Optimize DAX query for performance +3. Use more powerful cluster for the job +4. Consider breaking analysis into smaller queries + +--- + +**Error**: "Dataset.Read.All permission not found" + +**Cause**: Using Delegated permission instead of Application permission + +**Solution**: +1. Go to Azure AD → App registrations → API permissions +2. Remove Delegated permissions +3. Add **Application** permission: Dataset.Read.All +4. Grant admin consent + +--- + +**Error**: "Client secret expired" + +**Cause**: Azure client secrets expire after set period + +**Solution**: +1. Create new client secret in Azure Portal +2. Update `POWERBI_CLIENT_SECRET` in API Keys +3. Rotate secrets regularly (recommended: every 90 days) + +--- + +## Security & Best Practices + +### Credential Management + +1. **Use API Keys Service**: + - All credentials stored encrypted at rest + - Multi-tenant isolation via group_id + - Never commit credentials to source control + +2. **Rotate Credentials Regularly**: + - Rotate Service Principal secrets every 90 days + - Use Azure Key Vault for production deployments + - Monitor credential usage in audit logs + +3. **Principle of Least Privilege**: + - Only grant workspace access where needed + - Use Power BI RLS (Row-Level Security) for data filtering + - Limit Service Principal to read-only permissions + +### Production Secret Management with Key Vaults + +For production deployments, **never pass credentials directly as job parameters**. Instead, use key vault references: + +#### Architecture Pattern + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal App │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Pass secret names only + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Databricks Job │ +│ Parameters: │ +│ { │ +│ "client_secret_key": "powerbi-client-secret" ← Secret name +│ "username_key": "powerbi-username" ← Secret name +│ } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Retrieve actual values + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Key Vault Storage │ +│ (Azure Key Vault, │ +│ Databricks Secrets, │ +│ AWS Secrets Manager) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +#### Option 1: Azure Key Vault (Recommended for Azure) + +**Setup Azure Key Vault:** + +1. **Create Key Vault** in Azure Portal +2. **Add Secrets**: + - `powerbi-client-secret`: Service Principal secret + - `powerbi-username`: Service account username + - `powerbi-password`: Service account password + - `databricks-token`: Databricks PAT + +3. **Grant Access** to Databricks workspace: + - Use Managed Identity or Service Principal + - Assign "Key Vault Secrets User" role + +**Configure Databricks to Access Azure Key Vault:** + +```bash +# Create secret scope backed by Azure Key Vault +databricks secrets create-scope --scope azure-key-vault \ + --scope-backend-type AZURE_KEYVAULT \ + --resource-id /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.KeyVault/vaults/{vault-name} \ + --dns-name https://{vault-name}.vault.azure.net/ +``` + +**Notebook Code (Secure Approach):** + +```python +import os + +# Retrieve secrets from Databricks secret scope (backed by Azure Key Vault) +client_secret = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-client-secret") +username = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-username") +password = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-password") +databricks_token = dbutils.secrets.get(scope="azure-key-vault", key="databricks-token") + +# Use credentials for authentication +powerbi_config = { + "tenant_id": dbutils.widgets.get("tenant_id"), + "client_id": dbutils.widgets.get("client_id"), + "client_secret": client_secret, # Retrieved from Key Vault + "username": username, # Retrieved from Key Vault + "password": password # Retrieved from Key Vault +} +``` + +**Job Parameters (No Sensitive Data):** + +```json +{ + "question": "Analyze Q4 revenue", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9" +} +``` + +**Set environment variables** in Databricks job cluster configuration: + +```json +{ + "spark_env_vars": { + "POWERBI_CLIENT_SECRET": "{{secrets/powerbi-secrets/client-secret}}", + "POWERBI_USERNAME": "{{secrets/powerbi-secrets/username}}", + "POWERBI_PASSWORD": "{{secrets/powerbi-secrets/password}}" + } +} +``` + +**Note**: No secrets in job parameters - just their names! All retrieved from Key Vault. + +--- + +#### Option 2: Environment Variables (Development Only) + +**For local development**, use environment variables: + +```python +import os + +# Retrieve from environment +client_secret = os.getenv("POWERBI_CLIENT_SECRET") +username = os.getenv("POWERBI_USERNAME") +password = os.getenv("POWERBI_PASSWORD") +databricks_token = os.getenv("DATABRICKS_TOKEN") +``` + +**Set environment variables** in Databricks job cluster configuration: + +```json +{ + "spark_env_vars": { + "POWERBI_CLIENT_SECRET": "{{secrets/powerbi-secrets/client-secret}}", + "POWERBI_USERNAME": "{{secrets/powerbi-secrets/username}}", + "POWERBI_PASSWORD": "{{secrets/powerbi-secrets/password}}" + } +} +``` diff --git a/src/frontend/public/docs/API_REFERENCE.md b/src/frontend/public/docs/API_REFERENCE.md deleted file mode 100644 index 7b9a6b4f..00000000 --- a/src/frontend/public/docs/API_REFERENCE.md +++ /dev/null @@ -1,479 +0,0 @@ -# Kasal API Reference - -> **RESTful API v1** - Complete endpoint documentation with examples - ---- - -## Getting Started -Base URLs, authentication, and rate limits you need before calling endpoints. - -### Base URL -``` -Production: https://api.example.com/v1 -Staging: https://staging-api.example.com/v1 -Local: http://localhost:8000/api/v1 -``` - -### Authentication -```bash -# Get access token -curl -X POST https://api.example.com/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"email": "user@example.com", "password": "secure_pass"}' - -# Use token in requests -curl -X GET https://api.example.com/v1/crews \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" -``` - -### Rate Limits -| Tier | Requests/Hour | Burst | -|------|---------------|-------| -| **Free** | 1,000 | 100/min | -| **Pro** | 10,000 | 1,000/min | -| **Enterprise** | Unlimited | Custom | - ---- - -## Authentication Endpoints -Login, refresh, and logout flows to manage tokens. - -### POST /auth/login -**Login with credentials** -```json -Request: -{ - "email": "user@example.com", - "password": "secure_password" -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "token_type": "bearer", - "expires_in": 86400 -} -``` - -### POST /auth/refresh -**Refresh access token** -```json -Request: -{ - "refresh_token": "eyJ0eXAi..." -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "expires_in": 86400 -} -``` - -### POST /auth/logout -**Invalidate tokens** -```json -Response: 204 No Content -``` - ---- - -## Crew Management -Create and manage multi-agent crews and their configurations. - -### GET /crews -**List all crews** -```json -Response: 200 OK -{ - "crews": [ - { - "id": "crew_abc123", - "name": "Customer Support Crew", - "status": "active", - "agents_count": 3, - "created_at": "2024-01-15T10:30:00Z" - } - ], - "total": 15, - "page": 1 -} -``` - -### POST /crews -**Create new crew** -```json -Request: -{ - "name": "Marketing Crew", - "description": "Content generation team", - "process": "hierarchical", - "agents": [ - { - "role": "Content Writer", - "goal": "Create engaging content", - "model": "gpt-4" - } - ] -} - -Response: 201 Created -{ - "id": "crew_xyz789", - "name": "Marketing Crew", - "status": "configuring" -} -``` - -### GET /crews/{crew_id} -**Get crew details** -```json -Response: 200 OK -{ - "id": "crew_abc123", - "name": "Customer Support Crew", - "agents": [...], - "tasks": [...], - "configuration": {...} -} -``` - -### PUT /crews/{crew_id} -**Update crew configuration** -```json -Request: -{ - "name": "Updated Crew Name", - "process": "sequential" -} - -Response: 200 OK -{ - "id": "crew_abc123", - "updated": true -} -``` - -### DELETE /crews/{crew_id} -**Delete crew** -```json -Response: 204 No Content -``` - ---- - -## Agent Management -Create and list individual agents with roles, models, and tools. - -### GET /agents -**List all agents** -```json -Response: 200 OK -{ - "agents": [ - { - "id": "agent_001", - "name": "Research Agent", - "crew_id": "crew_abc123", - "model": "gpt-4", - "status": "ready" - } - ] -} -``` - -### POST /agents -**Create new agent** -```json -Request: -{ - "crew_id": "crew_abc123", - "role": "Data Analyst", - "goal": "Analyze metrics", - "backstory": "Expert analyst with 10 years experience", - "model": "claude-3-opus", - "tools": ["web_search", "calculator"] -} - -Response: 201 Created -{ - "id": "agent_002", - "status": "created" -} -``` - ---- - -## Execution Management -Start executions, get status, retrieve traces, and stop runs. - -### POST /executions -**Start crew execution** -```json -Request: -{ - "crew_id": "crew_abc123", - "inputs": { - "topic": "Q4 Marketing Strategy", - "deadline": "2024-12-31" - } -} - -Response: 202 Accepted -{ - "job_id": "job_qwerty123", - "status": "queued", - "estimated_duration": 300 -} -``` - -### GET /executions/{job_id} -**Get execution status** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "running", - "progress": 65, - "current_task": "Analyzing data", - "started_at": "2024-01-15T14:00:00Z" -} -``` - -### GET /executions/{job_id}/traces -**Get execution trace** -```json -Response: 200 OK -{ - "traces": [ - { - "timestamp": "2024-01-15T14:00:05Z", - "agent": "Research Agent", - "action": "web_search", - "result": "Found 15 relevant articles" - } - ] -} -``` - -### POST /executions/{job_id}/stop -**Stop execution** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "stopped" -} -``` - ---- - -## Task Management -Create and list tasks assigned to agents. - -### GET /tasks -**List tasks** -```json -Response: 200 OK -{ - "tasks": [ - { - "id": "task_001", - "description": "Generate report", - "agent_id": "agent_001", - "status": "completed" - } - ] -} -``` - -### POST /tasks -**Create task** -```json -Request: -{ - "agent_id": "agent_001", - "description": "Analyze competitor pricing", - "expected_output": "Markdown report", - "context": ["Previous analysis from Q3"] -} - -Response: 201 Created -{ - "id": "task_002", - "status": "created" -} -``` - ---- - -## Tool Management -Discover built-in tools and register custom tools. - -### GET /tools -**List available tools** -```json -Response: 200 OK -{ - "tools": [ - { - "name": "web_search", - "description": "Search the web", - "category": "research" - }, - { - "name": "file_reader", - "description": "Read files", - "category": "data" - } - ] -} -``` - -### POST /tools/custom -**Register custom tool** -```json -Request: -{ - "name": "salesforce_api", - "description": "Query Salesforce data", - "endpoint": "https://api.example.com/salesforce", - "auth_type": "bearer" -} - -Response: 201 Created -{ - "tool_id": "tool_custom_001", - "status": "registered" -} -``` - ---- - -## Memory Management -Fetch and clear short/long-term memory for a crew. - -### GET /memory/{crew_id} -**Get crew memory** -```json -Response: 200 OK -{ - "short_term": [ - { - "timestamp": "2024-01-15T10:00:00Z", - "content": "Customer prefers email communication" - } - ], - "long_term": [ - { - "category": "preferences", - "insights": ["Email preferred", "Weekly reports"] - } - ] -} -``` - -### POST /memory/{crew_id}/clear -**Clear memory** -```json -Request: -{ - "type": "short_term" // or "long_term" or "all" -} - -Response: 204 No Content -``` - ---- - - - -## šŸ”µ WebSocket Events -Real-time updates for task lifecycle, errors, and progress. - -### Connection -```javascript -const ws = new WebSocket('wss://api.kasal.ai/v1/ws'); - -ws.onopen = () => { - ws.send(JSON.stringify({ - type: 'subscribe', - job_id: 'job_qwerty123' - })); -}; -``` - -### Event Types -```javascript -// Task started -{ - "type": "task_start", - "job_id": "job_qwerty123", - "task_id": "task_001", - "agent": "Research Agent" -} - -// Task completed -{ - "type": "task_complete", - "job_id": "job_qwerty123", - "task_id": "task_001", - "result": "Analysis complete" -} - -// Error -{ - "type": "error", - "job_id": "job_qwerty123", - "message": "Rate limit exceeded", - "code": "RATE_LIMIT" -} -``` - ---- - -## šŸ”· Error Codes -Standardized error responses and meanings. - -| Code | Message | Description | -|------|---------|-------------| -| 400 | Bad Request | Invalid parameters | -| 401 | Unauthorized | Invalid/expired token | -| 403 | Forbidden | Insufficient permissions | -| 404 | Not Found | Resource doesn't exist | -| 429 | Too Many Requests | Rate limit exceeded | -| 500 | Internal Error | Server error | -| 503 | Service Unavailable | Maintenance mode | - -### Error Response Format -```json -{ - "error": { - "code": "VALIDATION_ERROR", - "message": "Invalid crew configuration", - "details": { - "field": "agents", - "reason": "At least one agent required" - } - } -} -``` - ---- - -## šŸ”¹ Testing -Sandbox, Postman collection, and OpenAPI spec. - -### Sandbox Environment -```bash -# Use sandbox for testing -curl -X POST https://sandbox-api.kasal.ai/v1/crews \ - -H "Authorization: Bearer SANDBOX_TOKEN" \ - -H "Content-Type: application/json" \ - -d @crew.json -``` - - ---- - -*Build powerful integrations with Kasal API* \ No newline at end of file diff --git a/src/frontend/public/docs/api_endpoints.md b/src/frontend/public/docs/api_endpoints.md new file mode 100644 index 00000000..01e3abfa --- /dev/null +++ b/src/frontend/public/docs/api_endpoints.md @@ -0,0 +1,524 @@ +# Kasal API Endpoints Reference + +Complete reference for all available API endpoints in the Kasal platform. + +--- + +## Base URL + +All API endpoints use the following base URL structure: + +``` +https://.databricksapps.com/api/v1 +``` + +**Example:** +``` +https://kasal-dev-1444828305810485.aws.databricksapps.com/api/v1/executions +``` + +**Local Development:** +``` +http://localhost:8000/api/v1 +``` + +--- + +## Table of Contents + +- [Authentication](#authentication) +- [Crews (Workflows)](#crews-workflows) +- [Agents](#agents) +- [Tasks](#tasks) +- [Tools](#tools) +- [Executions](#executions) +- [Models](#models) +- [API Keys](#api-keys) +- [Power BI Integration](#power-bi-integration) +- [Health & Status](#health--status) + +--- + +## Authentication + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/auth/login` | User login with credentials | +| `POST` | `/auth/logout` | User logout | +| `GET` | `/auth/me` | Get current user information | +| `POST` | `/auth/refresh` | Refresh JWT token | + +**Authentication Header:** +``` +Authorization: Bearer +``` + +--- + +## Crews (Workflows) + +### Crew Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews` | List all crews in workspace | +| `POST` | `/crews` | Create a new crew | +| `GET` | `/crews/{id}` | Get crew details by ID | +| `PUT` | `/crews/{id}` | Update crew configuration | +| `DELETE` | `/crews/{id}` | Delete crew | +| `POST` | `/crews/{id}/duplicate` | Duplicate crew with new name | + +### Crew Execution + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/crews/{id}/kickoff` | Start crew execution | +| `POST` | `/crews/{id}/kickoff-async` | Start async crew execution | +| `GET` | `/crews/{id}/status` | Get crew execution status | +| `POST` | `/crews/{id}/stop` | Stop running crew | + +### Crew Export/Import + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews/{id}/export` | Export crew configuration as JSON | +| `POST` | `/crews/import` | Import crew from JSON | + +--- + +## Agents + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/agents` | List all agents | +| `POST` | `/agents` | Create a new agent | +| `GET` | `/agents/{id}` | Get agent details by ID | +| `PUT` | `/agents/{id}` | Update agent configuration | +| `DELETE` | `/agents/{id}` | Delete agent | + +**Agent Configuration Fields:** +- `name`: Agent name +- `role`: Agent role description +- `goal`: Agent's objective +- `backstory`: Agent's background context +- `tools`: Array of tool IDs +- `tool_configs`: Tool-specific configurations +- `llm_config`: LLM model and parameters + +--- + +## Tasks + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tasks` | List all tasks | +| `POST` | `/tasks` | Create a new task | +| `GET` | `/tasks/{id}` | Get task details by ID | +| `PUT` | `/tasks/{id}` | Update task configuration | +| `DELETE` | `/tasks/{id}` | Delete task | + +**Task Configuration Fields:** +- `name`: Task name +- `description`: Task description +- `expected_output`: Expected output format +- `agent_id`: Assigned agent ID +- `context`: Context task IDs (dependencies) +- `tool_configs`: Task-level tool configurations + +--- + +## Tools + +### Tool Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tools` | List all available tools | +| `GET` | `/tools/{id}` | Get tool details by ID | +| `PUT` | `/tools/{id}` | Update tool configuration | +| `POST` | `/tools/{id}/enable` | Enable tool for workspace | +| `POST` | `/tools/{id}/disable` | Disable tool for workspace | + +### Tool Categories + +**Available Tool Types:** +- `ai`: AI-powered tools (Dall-E, Perplexity) +- `database`: Database tools (Genie, Databricks, Power BI) +- `search`: Search tools (Serper, Knowledge Search) +- `web`: Web tools (Scrape Website) +- `integration`: Integration tools (MCP) +- `development`: Development tools + +--- + +## Executions + +### Execution Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/executions` | List all executions | +| `GET` | `/executions/{id}` | Get execution details | +| `GET` | `/executions/{id}/status` | Get execution status | +| `GET` | `/executions/{id}/logs` | Get execution logs | +| `POST` | `/executions/{id}/stop` | Stop running execution | +| `DELETE` | `/executions/{id}` | Delete execution record | + +### Execution Status Values + +- `pending`: Execution queued +- `running`: Execution in progress +- `completed`: Execution finished successfully +- `failed`: Execution failed with error +- `stopped`: Execution manually stopped + +--- + +## Models + +### Model Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/models` | List available LLM models | +| `GET` | `/models/{id}` | Get model configuration | +| `PUT` | `/models/{id}` | Update model parameters | +| `POST` | `/models/test` | Test model connection | + +**Supported Model Providers:** +- Databricks (Foundation Models) +- OpenAI (GPT-3.5, GPT-4) +- Anthropic (Claude) +- Google (Gemini) +- Azure OpenAI +- Ollama (Local models) + +--- + +## API Keys + +### API Key Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/api-keys` | List all API keys (encrypted) | +| `POST` | `/api-keys` | Create new API key | +| `GET` | `/api-keys/{id}` | Get API key details | +| `PUT` | `/api-keys/{id}` | Update API key value | +| `DELETE` | `/api-keys/{id}` | Delete API key | + +**Common API Keys:** +- `OPENAI_API_KEY`: OpenAI authentication +- `ANTHROPIC_API_KEY`: Anthropic Claude authentication +- `SERPER_API_KEY`: Serper search tool +- `PERPLEXITY_API_KEY`: Perplexity AI tool +- `DATABRICKS_TOKEN`: Databricks API access +- `POWERBI_CLIENT_SECRET`: Power BI service principal +- `POWERBI_USERNAME`: Power BI device code auth +- `POWERBI_PASSWORD`: Power BI device code auth + +**Security:** +- All API keys are encrypted at rest +- Keys are never returned in plain text via API +- Multi-tenant isolation by group_id + +--- + +## Power BI Integration + +### Power BI Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/powerbi/config` | Configure Power BI connection | +| `GET` | `/powerbi/config` | Get Power BI configuration | + +**Power BI Tool Configuration (Task-Level):** +```json +{ + "tenant_id": "Azure AD Tenant ID", + "client_id": "Azure AD Application ID", + "semantic_model_id": "Power BI Dataset ID", + "workspace_id": "Power BI Workspace ID (optional)", + "auth_method": "service_principal or device_code", + "databricks_job_id": "Databricks Job ID (optional)" +} +``` + +**Required API Keys:** +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` (for device_code) +- `POWERBI_PASSWORD` (for device_code) +- `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN` + +--- + +## Health & Status + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/health` | API health check | +| `GET` | `/health/db` | Database connection status | +| `GET` | `/health/services` | External services status | +| `GET` | `/version` | API version information | + +--- + +## Common Response Formats + +### Success Response + +```json +{ + "status": "success", + "data": { ... }, + "message": "Operation completed successfully" +} +``` + +### Error Response + +```json +{ + "status": "error", + "error": { + "code": "ERROR_CODE", + "message": "Human-readable error message", + "details": { ... } + } +} +``` + +### Pagination + +For list endpoints that support pagination: + +``` +GET /crews?page=1&limit=50&sort=created_at&order=desc +``` + +**Query Parameters:** +- `page`: Page number (default: 1) +- `limit`: Items per page (default: 50, max: 100) +- `sort`: Sort field +- `order`: Sort order (`asc` or `desc`) + +--- + +## Rate Limiting + +**Default Limits:** +- Anonymous: 100 requests/hour +- Authenticated: 1000 requests/hour +- Enterprise: 10,000 requests/hour + +**Rate Limit Headers:** +``` +X-RateLimit-Limit: 1000 +X-RateLimit-Remaining: 999 +X-RateLimit-Reset: 1609459200 +``` + +--- + +## Memory Management + +### GET /api/v1/memory/{crew_id} +**Get crew memory (short-term and long-term)** + +```json +Response: 200 OK +{ + "short_term": [ + { + "timestamp": "2024-01-15T10:00:00Z", + "content": "Customer prefers email communication" + } + ], + "long_term": [ + { + "category": "preferences", + "insights": ["Email preferred", "Weekly reports"] + } + ] +} +``` + +### POST /api/v1/memory/{crew_id}/clear +**Clear crew memory** + +```json +Request: +{ + "type": "short_term" // Options: "short_term", "long_term", or "all" +} + +Response: 204 No Content +``` + +--- + +## WebSocket Endpoints + +### Real-Time Execution Updates + +``` +ws://localhost:8000/ws/executions/{execution_id} +``` + +**Message Format:** +```json +{ + "type": "status_update", + "execution_id": "abc123", + "status": "running", + "progress": 45, + "message": "Processing task 2 of 5..." +} +``` + +--- + +## Examples + +### Create and Execute a Crew + +```bash +# 1. Create a crew +curl -X POST http://localhost:8000/api/v1/crews \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Sales Analysis Crew", + "agents": [...], + "tasks": [...] + }' + +# Response: {"id": "crew_123", ...} + +# 2. Start execution +curl -X POST http://localhost:8000/api/v1/crews/crew_123/kickoff \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"query": "Analyze Q4 sales"}}' + +# Response: {"execution_id": "exec_456", ...} + +# 3. Monitor execution +curl -X GET http://localhost:8000/api/v1/executions/exec_456/status \ + -H "Authorization: Bearer $TOKEN" +``` + +### Configure Power BI Tool in Task + +```bash +# Create task with PowerBI configuration +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Analyze Sales Data", + "description": "Analyze sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } + }' +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| `AUTH_001` | Invalid or expired token | +| `AUTH_002` | Insufficient permissions | +| `CREW_001` | Crew not found | +| `CREW_002` | Invalid crew configuration | +| `EXEC_001` | Execution failed | +| `EXEC_002` | Execution timeout | +| `TOOL_001` | Tool not available | +| `TOOL_002` | Tool configuration error | +| `DB_001` | Database connection error | +| `EXT_001` | External service unavailable | + +--- + +## SDK Examples + +### Python SDK + +```python +from kasal import KasalClient + +# Initialize client +client = KasalClient( + base_url="http://localhost:8000", + token="your-jwt-token" +) + +# Create and execute crew +crew = client.crews.create( + name="Data Analysis Crew", + agents=[...], + tasks=[...] +) + +execution = crew.kickoff(inputs={"query": "Analyze data"}) +result = execution.wait() # Blocks until complete + +print(result.output) +``` + +### JavaScript/TypeScript SDK + +```typescript +import { KasalClient } from '@kasal/sdk'; + +const client = new KasalClient({ + baseUrl: 'http://localhost:8000', + token: 'your-jwt-token' +}); + +// Create and execute crew +const crew = await client.crews.create({ + name: 'Data Analysis Crew', + agents: [...], + tasks: [...] +}); + +const execution = await crew.kickoff({ + inputs: { query: 'Analyze data' } +}); + +// Stream results +execution.on('status', (status) => { + console.log('Status:', status); +}); + +const result = await execution.wait(); +console.log('Result:', result.output); +``` + +--- + +## Additional Resources + +- **API Playground**: `/api/playground` +- **OpenAPI Schema**: `/api/openapi.json` +- **Swagger UI**: `/api/docs` +- **ReDoc**: `/api/redoc` + +For more information, see: +- [Power BI Integration Guide](powerbi_integration.md) +- [Tool Configuration Guide](powerbi_analysis_tool_setup.md) +- [Crew Deployment Guide](crew_export_deployment.md) diff --git a/src/frontend/public/docs/crew-export-deployment.md b/src/frontend/public/docs/crew-export-deployment.md new file mode 100644 index 00000000..a4be78f1 --- /dev/null +++ b/src/frontend/public/docs/crew-export-deployment.md @@ -0,0 +1,335 @@ +# Crew Export and Deployment + +## Overview + +Kasal now supports exporting CrewAI crews to various formats and deploying them to Databricks Model Serving endpoints. This feature allows you to take your visually designed agent workflows and run them in production environments. + +## Features + +### Export Formats + +#### 1. Python Project Export +Exports your crew as a complete Python project with the following structure: +- **README.md**: Setup and usage instructions +- **requirements.txt**: Python dependencies +- **.env.example**: Environment variable template +- **.gitignore**: Git ignore patterns +- **src/{crew_name}/config/agents.yaml**: Agent configurations +- **src/{crew_name}/config/tasks.yaml**: Task configurations +- **src/{crew_name}/crew.py**: Crew class implementation +- **src/{crew_name}/main.py**: Execution entry point +- **tests/test_crew.py**: Unit tests (optional) + +**Best for:** +- Local development and testing +- Version control integration +- Custom modifications and extensions +- CI/CD pipelines + +#### 2. Databricks Notebook Export +Exports your crew as a single `.ipynb` notebook file compatible with Databricks, containing: +- Title and overview +- Setup instructions +- **Package compatibility warning** (important - read before running) +- Installation commands with proper dependency handling +- Agent and task configurations (YAML) +- Crew implementation code with custom tool placeholders +- Execution logic +- Usage examples + +**Best for:** +- Quick prototyping in Databricks +- Interactive development and debugging +- Sharing with team members +- Documentation and demonstrations + +**Important Note:** +- Installing CrewAI will upgrade core Databricks packages (numpy, pyarrow, protobuf, grpcio) +- This triggers warnings but is expected behavior +- Recommend using Databricks Runtime 14.3 LTS ML or higher for best compatibility +- After installation, Python kernel will restart automatically + +### Deployment to Databricks Model Serving + +Deploy your crew as an MLflow model behind a Databricks Model Serving endpoint for production use. + +**Deployment Process:** +1. Wraps crew as MLflow PyFunc model +2. Logs model to MLflow with dependencies +3. Registers in Unity Catalog (optional) +4. Creates/updates Model Serving endpoint +5. Returns endpoint URL for API invocations + +**Configuration Options:** +- **Model Name**: Name for the registered model (required) +- **Endpoint Name**: Name for serving endpoint (defaults to model name) +- **Workload Size**: Small, Medium, or Large +- **Scale to Zero**: Enable automatic scaling to zero +- **Unity Catalog**: Register model in Unity Catalog +- **Catalog/Schema**: Unity Catalog location (required if enabled) + +## How to Use + +### Prerequisites + +1. **Save Your Crew**: You must save your crew before exporting or deploying +2. **Permissions**: + - Export: Editor or Admin role required + - Deploy: Admin role required + +### Export a Crew + +1. Design your crew in the visual canvas +2. Save the crew using the Save button +3. Click the **Export** button (download icon) in the toolbar +4. Select export format: + - **Python Project**: For local development + - **Databricks Notebook**: For Databricks environment +5. Configure export options: + - **Include custom tools**: Include tool implementations + - **Include comments**: Add explanatory comments + - **Include tests**: Generate test files (Python Project only) + - **Model override**: Override LLM model for all agents +6. Click **Export & Download** + +The exported file will be downloaded to your browser: +- Python Project: `{crew_name}_project.zip` +- Databricks Notebook: `{crew_name}.ipynb` + +### Deploy to Databricks Model Serving + +1. Design and save your crew +2. Click the **Deploy** button (rocket icon) in the toolbar +3. Configure deployment: + - **Model Name**: Choose a unique model name (required) + - **Endpoint Name**: Optionally specify endpoint name + - **Workload Size**: Select based on expected load + - **Scale to Zero**: Enable for cost optimization + - **Unity Catalog**: Enable for centralized model registry + - **Catalog/Schema**: Specify Unity Catalog location +4. Click **Deploy to Model Serving** +5. Wait for deployment to complete +6. Copy the endpoint URL and usage example + +### Invoke Deployed Crew + +#### Using HTTP Request +```python +import requests +import os + +# Get Databricks token +token = os.getenv("DATABRICKS_TOKEN") + +# Endpoint URL from deployment +endpoint_url = "https://your-workspace.cloud.databricks.com/serving-endpoints/your-crew/invocations" + +# Prepare request +headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" +} + +data = { + "inputs": { + "topic": "Artificial Intelligence trends in 2025" + } +} + +# Invoke endpoint +response = requests.post( + endpoint_url, + headers=headers, + json=data +) + +print("Status Code:", response.status_code) +print("Result:", response.json()) +``` + +#### Using Databricks SDK +```python +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() + +response = w.serving_endpoints.query( + name="your-crew", + inputs={"topic": "Artificial Intelligence trends in 2025"} +) + +print("Result:", response) +``` + +## Export Options Explained + +### Include Custom Tools +When enabled, the export includes implementations for custom tools used by your agents. Standard tools (SerperDevTool, etc.) are imported from crewai-tools, but custom tools need implementations. + +### Include Comments +Adds explanatory comments throughout the generated code to help understand the structure and functionality. + +### Include Tests +(Python Project only) Generates a basic test file with examples of how to test your crew. + +### Model Override +Allows you to override the LLM model for all agents in the exported crew. This is useful when: +- Moving from development to production models +- Testing with different model providers +- Standardizing models across all agents + +## Deployment Configuration + +### Workload Size +- **Small**: Suitable for development and light production loads +- **Medium**: Balanced performance for moderate loads +- **Large**: High performance for heavy production workloads + +### Scale to Zero +When enabled, the endpoint automatically scales down to zero replicas when not in use, reducing costs. It will automatically scale up when requests arrive. + +### Unity Catalog Integration +Registering your model in Unity Catalog provides: +- Centralized model registry +- Access control and governance +- Model lineage tracking +- Versioning and lifecycle management + +## Best Practices + +### Before Export +1. Test your crew thoroughly in Kasal +2. Verify all agents and tasks are properly configured +3. Ensure custom tools are working correctly +4. Save your crew with a descriptive name + +### Python Project Export +1. Review the generated code +2. Add custom tool implementations if needed +3. Update environment variables in `.env` +4. Run tests before deploying +5. Commit to version control + +### Databricks Notebook Export +1. Import notebook into Databricks workspace +2. Configure Databricks secrets for API keys +3. Run cells sequentially to verify functionality +4. Customize inputs for your use case + +### Deployment +1. Start with small workload size for testing +2. Enable scale to zero for development endpoints +3. Use Unity Catalog for production models +4. Monitor endpoint performance and costs +5. Update endpoint configuration as needed + +## Troubleshooting + +### Export Issues + +**Error: "Only editors and admins can export crews"** +- Solution: Contact your admin to get Editor or Admin role + +**Error: "Crew not found"** +- Solution: Make sure you've saved the crew before exporting + +**Export button is disabled** +- Solution: Save your crew first using the Save button + +### Deployment Issues + +**Error: "Only admins can deploy crews to Model Serving"** +- Solution: Contact your admin to get Admin role + +**Error: "Model name is required"** +- Solution: Provide a unique model name in the deployment form + +**Error: "Catalog name and schema name are required"** +- Solution: When using Unity Catalog, both catalog and schema names must be provided + +**Deployment status shows "NOT_READY"** +- Solution: Wait for the endpoint to initialize. This can take several minutes for the first deployment. + +## API Endpoints + +### Export Crew +``` +POST /api/crews/{crew_id}/export +``` + +Request body: +```json +{ + "export_format": "python_project" | "databricks_notebook", + "options": { + "include_custom_tools": true, + "include_comments": true, + "include_tests": true, + "model_override": "optional-model-name" + } +} +``` + +### Download Export +``` +GET /api/crews/{crew_id}/export/download?format={format} +``` + +Returns: File download (zip or ipynb) + +### Deploy Crew +``` +POST /api/crews/{crew_id}/deploy +``` + +Request body: +```json +{ + "config": { + "model_name": "my-crew-model", + "endpoint_name": "my-crew-endpoint", + "workload_size": "Small" | "Medium" | "Large", + "scale_to_zero_enabled": true, + "unity_catalog_model": true, + "catalog_name": "main", + "schema_name": "ml_models" + } +} +``` + +### Get Deployment Status +``` +GET /api/crews/{crew_id}/deployment/status?endpoint_name={name} +``` + +### Delete Deployment +``` +DELETE /api/crews/{crew_id}/deployment/{endpoint_name} +``` + +## Technical Details + +### MLflow Model Structure +Deployed crews are wrapped as MLflow PyFunc models with: +- Custom `CrewAIModelWrapper` class +- Conda environment with CrewAI dependencies +- Crew configuration stored as model artifact +- Input/output signature for Model Serving + +### Authentication +The deployment uses Databricks SDK's authentication chain: +1. Environment variables (DATABRICKS_HOST, DATABRICKS_TOKEN) +2. Databricks CLI configuration +3. Default profile + +### Memory and State +Deployed crews use CrewAI's built-in memory system for maintaining context across agent interactions within a single execution. + +## Future Enhancements + +Planned features for future releases: +- Export to other platforms (AWS SageMaker, Azure ML) +- Batch inference endpoints +- Custom deployment configurations +- A/B testing support +- Monitoring and observability integration diff --git a/src/frontend/public/docs/powerbi_integration.md b/src/frontend/public/docs/powerbi_integration.md new file mode 100644 index 00000000..27a57420 --- /dev/null +++ b/src/frontend/public/docs/powerbi_integration.md @@ -0,0 +1,952 @@ +# Power BI Integration Guide + +Complete guide for integrating Power BI with Kasal AI agents for advanced business intelligence analysis. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Setup Guide](#setup-guide) + - [Development Environment](#1-development-environment-setup) + - [Azure Service Principal](#2-azure-service-principal-setup) + - [Databricks Configuration](#3-databricks-configuration) + - [Kasal Configuration](#4-kasal-configuration) +- [Authentication Methods](#authentication-methods) +- [API Configuration](#api-configuration) +- [PowerBI Analysis Tool](#powerbi-analysis-tool) +- [Testing](#testing) +- [Troubleshooting](#troubleshooting) +- [Security & Best Practices](#security--best-practices) + +--- + +## Overview + +The Power BI integration enables Kasal AI agents to execute complex analysis against Power BI semantic models using Databricks compute resources. This provides a production-ready, API-driven connector for Power BI analytics within AI workflows based on a preconfigured template notebook for tracability. + +**Key Features:** +- DAX query execution against Power BI semantic models +- Complex analysis using Databricks job orchestration +- Multiple authentication methods (Service Principal, Device Code Flow) +- Task-level configuration for workspace and semantic model selection +- Multi-tenant isolation with encrypted credential storage + +**Use Cases:** +- Year-over-year growth analysis +- Trend detection and forecasting +- Complex financial reporting +- Multi-dimensional business analysis +- Automated business intelligence reporting + +--- + +## Architecture + +### System Components + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal AI │ +│ Agent │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + └─ PowerBIAnalysisTool + └─> Databricks Job + ā”œā”€ Step 1: Extract Power BI metadata + ā”œā”€ Step 2: Generate DAX query from business question + └─ Step 3: Execute query + └─> Power BI REST API + └─> Returns: JSON result data +``` + +### Backend Components + +1. **API Keys Service** (`services/api_keys_service.py`) + - Stores encrypted Power BI credentials + - Multi-tenant isolation via `group_id` + - Handles: `POWERBI_CLIENT_SECRET`, `POWERBI_USERNAME`, `POWERBI_PASSWORD` + +2. **Databricks Auth Context** (`utils/databricks_auth.py`) + - Auto-detects `databricks_host` from environment + - Retrieves `databricks_token` from API Keys or environment + +3. **PowerBIAnalysisTool** (`engines/crewai/tools/custom/powerbi_analysis_tool.py`) + - CrewAI tool for Power BI analysis + - Wraps Databricks job execution + - Handles credential retrieval and job parameter passing + +4. **Tool Factory** (`engines/crewai/tools/tool_factory.py`) + - Instantiates tools with task-level configuration + - Merges base tool config with task-specific overrides + +### Frontend Components + +1. **PowerBIConfigSelector** (`components/Common/PowerBIConfigSelector.tsx`) + - Task-level Power BI configuration UI + - Appears when PowerBIAnalysisTool is selected + - Validates required API Keys + +2. **TaskForm** (`components/Tasks/TaskForm.tsx`) + - Integrates PowerBIConfigSelector + - Stores configuration in `tool_configs.PowerBIAnalysisTool` + +### Authentication Flow + +1. Kasal retrieves credentials from API Keys Service +2. Auto-detects Databricks host from unified auth context +3. Passes credentials to Databricks job as parameters +4. Databricks job authenticates with Azure AD +5. Azure AD issues Power BI access token +6. Access token used to call Power BI REST API + +--- + +## Prerequisites + +### Required Accounts & Access + +- **Azure Tenant**: Admin access for Service Principal or Service Account (if PBI with RLS enforcement) setup +- **Power BI**: Workspace access and semantic model permissions +- **Databricks Workspace**: Access with token for job creation +- **Operating System**: Linux/macOS (Ubuntu on VDI for production) +- **Key vault connect to Databricks**: Connection of centrally managed secrets as KV variables within Databricks +- **Python**: 3.11+ +- **Node.js**: LTS version + +### Power BI Requirements + +- Power BI workspace with semantic models +- Workspace ID and Semantic Model ID +- Admin permissions to grant Service Principal access + +### Azure AD Requirements + +- Permission to create App Registrations +- Admin consent capability for API permissions +- Ability to create and manage Client Secrets + +--- + +## Setup Guide + +### 1. Development Environment Setup + +#### 1.1 Install Python 3.11 + +```bash +# Add the deadsnakes PPA (Ubuntu) +sudo add-apt-repository ppa:deadsnakes/ppa -y +sudo apt update + +# Install Python 3.11 +sudo apt install python3.11 python3.11-venv python3.11-dev -y + +# Verify installation +python3.11 --version +``` + +#### 1.2 Clone Repository + +```bash +# Clone the Kasal repository +git clone https://github.com/databrickslabs/kasal.git +cd kasal + +# Checkout the feature branch +git checkout feature/pbi-tool +``` + +#### 1.3 Create Virtual Environment + +```bash +# Create virtual environment with Python 3.11 +python3.11 -m venv venv + +# Activate the environment +source venv/bin/activate + +# Upgrade pip +pip install --upgrade pip +``` + +#### 1.4 Install Dependencies + +```bash +# Navigate to src directory +cd src + +# Install Python dependencies +pip install -r requirements.txt + +# Verify installations +pip freeze | grep -E "crewai|litellm|databricks" +``` + +#### 1.5 Install Node.js (if needed) + +```bash +# Install Node Version Manager (nvm) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash + +# Load nvm +source ~/.bashrc + +# Install Node.js LTS +nvm install --lts +nvm use --lts + +# Verify installations +node --version +npm --version +``` + +--- + +### 2. Azure Service Principal Setup + +To enable non-interactive authentication, create an Azure Service Principal with Power BI read permissions. + +#### 2.1 Create Service Principal in Azure Portal + +1. **Navigate to Azure Portal**: https://portal.azure.com +2. **Go to Azure Active Directory** → **App registrations** +3. **Click "New registration"**: + - **Name**: `Kasal-PowerBI-Connector` (or your preferred name) + - **Supported account types**: Single tenant + - **Redirect URI**: Leave blank +4. **Note the Application (client) ID** and **Directory (tenant) ID** + +Please consider that for some PowerBI reports a service principal might not be enough, but a service account might be needed. This will be especially the case for PowerBIs that enfore RLS within the PowerBI. + +#### 2.2 Create Client Secret + +1. In your app registration, go to **Certificates & secrets** +2. Click **New client secret** +3. **Description**: `Kasal PowerBI Tool` +4. **Expires**: Choose expiration period (recommended: 90 days) +5. **Copy the secret value** immediately (you won't be able to see it again) + +#### 2.3 Configure API Permissions + +**Critical**: The Service Principal needs **Application** permissions, not **Delegated**. + +1. Go to **API permissions** in your app registration +2. **Remove any Delegated permissions** if present +3. Click **Add a permission** +4. Select **Power BI Service** +5. Choose **Application permissions** (NOT Delegated) +6. Check **Dataset.Read.All** +7. Click **Add permissions** +8. **Click "Grant admin consent for [Your Organization]"** (requires admin) + +**Important**: This step requires **Azure AD Admin** privileges. If you don't have admin rights, use the email template in the Appendix. + +#### 2.4 Enable Service Principal in Power BI Admin Portal + +1. Go to **Power BI Admin Portal**: https://app.powerbi.com/admin-portal/tenantSettings +2. Navigate to **Developer settings** (or **Tenant settings**) +3. Find **Service principals can use Power BI APIs** +4. **Enable** this setting +5. Add your Service Principal to the allowed list: + - Option 1: Add specific Service Principal by name + - Option 2: Add to a security group that's allowed + +#### 2.5 Grant Workspace Access + +For each Power BI workspace you want to access: + +1. Open the Power BI workspace +2. Click **Workspace settings** +3. Go to **Access** +4. Click **Add people or groups** +5. Search for your Service Principal name +6. Assign role: **Member** or **Contributor** + +--- + +### 3. Databricks Configuration + +#### 3.1 Set Environment Variables + +```bash +# Set Databricks credentials +export DATABRICKS_TOKEN="your-databricks-token" +export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com/" +``` + +#### 3.2 Configure Databricks CLI + +```bash +# Configure Databricks CLI +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +If the prompt doesn't appear: +```bash +# Unset environment variables and retry +unset DATABRICKS_HOST +unset DATABRICKS_TOKEN +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +#### 3.3 Verify Connection + +```bash +# Test workspace access +databricks workspace list / +``` + +#### 3.4 Create Databricks Job + +The PowerBIAnalysisTool requires a Databricks job for executing the analysis pipeline. + +1. **Navigate to Databricks Workflows**: + - Go to your Databricks workspace + - Click **Workflows** in the left sidebar + +2. **Create New Job**: + - Click **Create Job** + - **Job Name**: `pbi_e2e_pipeline` + +3. **Add Task**: + - Click **Add Task** + - **Task Name**: `pbi_e2e_pipeline` + - **Type**: Notebook + - **Notebook Path**: `/Workspace/Shared/powerbi_full_pipeline` + - **Cluster**: Select or create appropriate cluster + +4. **Note the Job ID**: + - After creating the job, copy the **Job ID** from the URL + - Example: `365257288725339` + - This will be used in PowerBIAnalysisTool configuration + +#### 3.5 Upload Pipeline Notebook + +```bash +# Upload the notebook to Databricks +# Please note that the security features were implemented +# But for the notebook to work you need to be precise with +# pre-requisites (Key-Vault setup) and PBI SVP setting (ask respective admins) +databricks workspace import \ + examples/powerbi_full_pipeline.ipynb \ + /Workspace/Shared/powerbi_full_pipeline \ + --language PYTHON \ + --format JUPYTER +``` + +Or manually upload via Databricks UI: +1. Go to **Workspace** → **Shared** +2. Click **Create** → **Import** +3. Upload `examples/powerbi_full_pipeline.ipynb` + +--- + +### 4. Kasal Configuration + +#### 4.1 Build Frontend + +```bash +# From the project root +python src/build.py +``` + +This creates a `frontend_static` folder with compiled React application. + +#### 4.2 Deploy to Databricks Apps + +```bash +# Deploy the application +cd src +python deploy.py \ + --app-name kasal \ + --user-name your-email@domain.com +``` + +**Note**: Replace `--app-name` and `--user-name` with your specific values. + +#### 4.3 Configure API Keys + +After deploying, configure required API Keys: + +1. **Navigate to Configuration** → **API Keys** +2. **Add the following keys**: + - `POWERBI_CLIENT_SECRET`: Service Principal secret (from section 2.2) + - `POWERBI_USERNAME`: Power BI service account email (for device code auth) + - `POWERBI_PASSWORD`: Service account password (for device code auth) + - `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN`: Databricks access token + +**Important**: All values are encrypted at rest and never returned in plain text via API. + +#### 4.4 Enable PowerBIAnalysisTool + +1. Go to **Tools** section +2. Find **PowerBIAnalysisTool** +3. Review security disclaimers +4. Enable the tool for your workspace + +--- + +## Authentication Methods + +The PowerBIAnalysisTool supports two authentication methods: + +### Service Principal (Recommended for Production) + +**Best for**: Automated workflows, production deployments, unattended execution + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Service Principal Application ID +- `POWERBI_CLIENT_SECRET`: Stored in API Keys + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "auth_method": "service_principal" +} +``` + +**Advantages**: +- Non-interactive, fully automated +- No MFA requirements +- Production-ready +- Supports scheduled workflows + +### Device Code Flow (Recommended for Testing) + +**Best for**: Development, testing, personal workspaces + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Can use Power BI public client `1950a258-227b-4e31-a9cf-717495945fc2` +- `POWERBI_USERNAME`: User email (stored in API Keys) +- `POWERBI_PASSWORD`: User password (stored in API Keys) + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "1950a258-227b-4e31-a9cf-717495945fc2", + "auth_method": "device_code" +} +``` + +**How it works**: +1. First request prompts: "Visit microsoft.com/devicelogin" +2. Enter provided code in browser +3. Sign in with your credentials +4. Token is cached for subsequent requests (~1 hour) + +**Advantages**: +- No Service Principal setup required +- Uses your personal Power BI permissions +- Perfect for development and testing +- Supports MFA + +--- + +## API Configuration + +### Task-Level Configuration + +Configure Power BI settings at the **task level** for flexibility across different semantic models: + +1. **Create or Edit Task** +2. **Select PowerBIAnalysisTool** in tools list +3. **Configure Power BI settings** (fields appear automatically): + - **Tenant ID**: Azure AD tenant GUID + - **Client ID**: Service Principal or app client ID + - **Workspace ID**: Power BI workspace GUID (optional) + - **Semantic Model ID**: Power BI semantic model/dataset GUID + - **Auth Method**: `service_principal` or `device_code` + - **Databricks Job ID**: Databricks job ID for analysis pipeline + +**Example Task Configuration**: +```json +{ + "name": "Analyze Sales Data", + "description": "Analyze Q4 sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### Required API Keys Check + +The UI automatically checks for required API Keys when PowerBIAnalysisTool is selected: +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` +- `POWERBI_PASSWORD` +- `DATABRICKS_API_KEY` (or `DATABRICKS_TOKEN`) + +If keys are missing, an error alert is displayed with instructions. + +--- + +## PowerBI Analysis Tool + +### Tool Overview + +**PowerBIAnalysisTool** (ID: 71) enables complex Power BI analysis via Databricks job orchestration. + +**Best for**: +- Heavy computation and large datasets +- Complex multi-query analysis +- Year-over-year comparisons +- Trend detection and forecasting +- Resource-intensive business intelligence tasks + +### Tool Parameters + +**Input Parameters**: +- `question` (str): Business question to analyze +- `dashboard_id` (str): Semantic model ID (can be provided by LLM or task config) +- `workspace_id` (str): Power BI workspace ID (optional) +- `additional_params` (dict): Optional additional parameters + +**Configuration** (from tool_configs): +- `tenant_id`: Azure AD tenant +- `client_id`: Application client ID +- `semantic_model_id`: Default semantic model +- `workspace_id`: Default workspace +- `auth_method`: Authentication method +- `databricks_job_id`: Databricks job ID for pipeline + +### Agent Configuration Example + +```json +{ + "role": "Business Intelligence Analyst", + "goal": "Perform complex Power BI analysis using Databricks", + "backstory": "Expert analyst with deep understanding of business metrics", + "tools": ["PowerBIAnalysisTool"], + "llm_config": { + "model": "databricks-meta-llama-3-1-70b-instruct", + "temperature": 0.1 + } +} +``` + +### Task Configuration Example + +```json +{ + "name": "Q4 Revenue Analysis", + "description": "Analyze Q4 2024 revenue trends by product category and region, comparing year-over-year growth", + "expected_output": "Comprehensive analysis report with insights and recommendations", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### How It Works + +1. **Agent receives task** with business question +2. **PowerBIAnalysisTool invoked** with question and semantic model ID +3. **Tool retrieves credentials** from API Keys Service +4. **Tool auto-detects** databricks_host from environment +5. **Databricks job triggered** with parameters: + - `question`: Business question + - `semantic_model_id`: Dataset to query + - `workspace_id`: Power BI workspace + - `tenant_id`, `client_id`: Authentication + - `client_secret`, `username`, `password`: Credentials + - `databricks_host`, `databricks_token`: For recursive auth +6. **Job executes pipeline**: + - Extracts Power BI metadata + - Generates DAX query from question + - Executes query against Power BI + - Returns structured results +7. **Agent receives results** and continues workflow + +--- + +## Testing + +### Local Development Testing + +#### 1. Start Services + +**Backend**: +```bash +cd src/backend +./run.sh sqlite +# Backend starts on http://localhost:8000 +``` + +**Frontend**: +```bash +cd src/frontend +npm start +# Frontend starts on http://localhost:3000 +``` + +#### 2. Configure via UI + +1. Open http://localhost:3000 +2. Navigate to **Configuration** → **API Keys** +3. Add required keys: + - `POWERBI_CLIENT_SECRET` + - `POWERBI_USERNAME` + - `POWERBI_PASSWORD` + - `DATABRICKS_API_KEY` +4. Navigate to **Tools** → Enable **PowerBIAnalysisTool** + +#### 3. Create Test Agent and Task + +**Agent**: +```json +{ + "role": "Sales Analyst", + "goal": "Analyze Power BI sales data", + "tools": ["PowerBIAnalysisTool"] +} +``` + +**Task**: +```json +{ + "description": "What is the total revenue for Q4 2024?", + "expected_output": "Revenue figure with analysis", + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "your-tenant-id", + "client_id": "your-client-id", + "semantic_model_id": "your-model-id", + "workspace_id": "your-workspace-id", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +#### 4. Run Workflow + +1. Click **Run Crew** +2. Monitor execution in **Runs** tab +3. Check Databricks **Workflows** for job execution +4. Verify results in execution logs + +### Production Testing (Databricks App) + +#### 1. Deploy to Databricks + +```bash +cd src +python deploy.py --app-name kasal-prod --user-name your-email@domain.com +``` + +#### 2. Configure in Deployed App + +1. Open deployed app URL +2. Navigate to **Configuration** → **API Keys** +3. Add production credentials +4. Enable **PowerBIAnalysisTool** + +#### 3. Create Production Workflow + +Create agent and task using production semantic model IDs and workspace IDs. + +#### 4. End-to-End Test + +1. Run crew execution +2. Monitor Databricks job logs +3. Verify Power BI API calls in Azure AD audit logs +4. Validate results accuracy + +### Sample Test Queries + +**Simple aggregation**: +```json +{ + "question": "What is the total revenue by region?" +} +``` + +**Year-over-year analysis**: +```json +{ + "question": "Compare Q4 2024 revenue to Q4 2023 by product category" +} +``` + +**Trend analysis**: +```json +{ + "question": "Show monthly sales trends for the last 12 months" +} +``` + +--- + +## Troubleshooting + +### Authentication Issues + +**Error**: "Provided OAuth token does not have required scopes" + +**Causes**: +- Missing OAuth scopes in Databricks App configuration +- Service Principal lacks Power BI API permissions + +**Solutions**: +1. Verify Service Principal has **Application** (not Delegated) permissions +2. Ensure admin consent was granted in Azure AD +3. Check Service Principal is enabled in Power BI Admin Portal +4. For Databricks Apps, configure OAuth scopes: `sql`, `all-apis` + +--- + +**Error**: "Authentication failed: 403 Forbidden" + +**Causes**: +- Service Principal doesn't have workspace access +- Incorrect workspace ID + +**Solutions**: +1. Add Service Principal to Power BI workspace with Member/Contributor role +2. Verify workspace_id matches the actual workspace GUID +3. Check Power BI audit logs for access denied events + +--- + +### Configuration Issues + +**Error**: "tenant_id showing as 'your_tenant_id'" + +**Cause**: LLM-provided placeholder values taking precedence over task config + +**Solution**: Verify task configuration priority in tool_factory.py - task config should override LLM values + +--- + +**Error**: "semantic_model_id truncated or incorrect" + +**Cause**: dashboard_id from kwargs overriding task config value + +**Solution**: Check powerbi_analysis_tool.py lines 314-316 for proper priority handling + +--- + +**Error**: "Missing databricks_host or databricks_token in job parameters" + +**Cause**: Credentials not being passed to job parameters + +**Solution**: Verify powerbi_analysis_tool.py lines 411-418 add credentials to job_params + +--- + +### Job Execution Issues + +**Error**: "Databricks job times out" + +**Causes**: +- Large dataset +- Complex DAX query +- Insufficient cluster resources + +**Solutions**: +1. Increase job timeout in tool configuration +2. Optimize DAX query for performance +3. Use more powerful cluster for the job +4. Consider breaking analysis into smaller queries + +--- + +**Error**: "Dataset.Read.All permission not found" + +**Cause**: Using Delegated permission instead of Application permission + +**Solution**: +1. Go to Azure AD → App registrations → API permissions +2. Remove Delegated permissions +3. Add **Application** permission: Dataset.Read.All +4. Grant admin consent + +--- + +**Error**: "Client secret expired" + +**Cause**: Azure client secrets expire after set period + +**Solution**: +1. Create new client secret in Azure Portal +2. Update `POWERBI_CLIENT_SECRET` in API Keys +3. Rotate secrets regularly (recommended: every 90 days) + +--- + +## Security & Best Practices + +### Credential Management + +1. **Use API Keys Service**: + - All credentials stored encrypted at rest + - Multi-tenant isolation via group_id + - Never commit credentials to source control + +2. **Rotate Credentials Regularly**: + - Rotate Service Principal secrets every 90 days + - Use Azure Key Vault for production deployments + - Monitor credential usage in audit logs + +3. **Principle of Least Privilege**: + - Only grant workspace access where needed + - Use Power BI RLS (Row-Level Security) for data filtering + - Limit Service Principal to read-only permissions + +### Production Secret Management with Key Vaults + +For production deployments, **never pass credentials directly as job parameters**. Instead, use key vault references: + +#### Architecture Pattern + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal App │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Pass secret names only + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Databricks Job │ +│ Parameters: │ +│ { │ +│ "client_secret_key": "powerbi-client-secret" ← Secret name +│ "username_key": "powerbi-username" ← Secret name +│ } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Retrieve actual values + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Key Vault Storage │ +│ (Azure Key Vault, │ +│ Databricks Secrets, │ +│ AWS Secrets Manager) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +#### Option 1: Azure Key Vault (Recommended for Azure) + +**Setup Azure Key Vault:** + +1. **Create Key Vault** in Azure Portal +2. **Add Secrets**: + - `powerbi-client-secret`: Service Principal secret + - `powerbi-username`: Service account username + - `powerbi-password`: Service account password + - `databricks-token`: Databricks PAT + +3. **Grant Access** to Databricks workspace: + - Use Managed Identity or Service Principal + - Assign "Key Vault Secrets User" role + +**Configure Databricks to Access Azure Key Vault:** + +```bash +# Create secret scope backed by Azure Key Vault +databricks secrets create-scope --scope azure-key-vault \ + --scope-backend-type AZURE_KEYVAULT \ + --resource-id /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.KeyVault/vaults/{vault-name} \ + --dns-name https://{vault-name}.vault.azure.net/ +``` + +**Notebook Code (Secure Approach):** + +```python +import os + +# Retrieve secrets from Databricks secret scope (backed by Azure Key Vault) +client_secret = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-client-secret") +username = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-username") +password = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-password") +databricks_token = dbutils.secrets.get(scope="azure-key-vault", key="databricks-token") + +# Use credentials for authentication +powerbi_config = { + "tenant_id": dbutils.widgets.get("tenant_id"), + "client_id": dbutils.widgets.get("client_id"), + "client_secret": client_secret, # Retrieved from Key Vault + "username": username, # Retrieved from Key Vault + "password": password # Retrieved from Key Vault +} +``` + +**Job Parameters (No Sensitive Data):** + +```json +{ + "question": "Analyze Q4 revenue", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9" +} +``` + +**Set environment variables** in Databricks job cluster configuration: + +```json +{ + "spark_env_vars": { + "POWERBI_CLIENT_SECRET": "{{secrets/powerbi-secrets/client-secret}}", + "POWERBI_USERNAME": "{{secrets/powerbi-secrets/username}}", + "POWERBI_PASSWORD": "{{secrets/powerbi-secrets/password}}" + } +} +``` + +**Note**: No secrets in job parameters - just their names! All retrieved from Key Vault. + +--- + +#### Option 2: Environment Variables (Development Only) + +**For local development**, use environment variables: + +```python +import os + +# Retrieve from environment +client_secret = os.getenv("POWERBI_CLIENT_SECRET") +username = os.getenv("POWERBI_USERNAME") +password = os.getenv("POWERBI_PASSWORD") +databricks_token = os.getenv("DATABRICKS_TOKEN") +``` + +**Set environment variables** in Databricks job cluster configuration: + +```json +{ + "spark_env_vars": { + "POWERBI_CLIENT_SECRET": "{{secrets/powerbi-secrets/client-secret}}", + "POWERBI_USERNAME": "{{secrets/powerbi-secrets/username}}", + "POWERBI_PASSWORD": "{{secrets/powerbi-secrets/password}}" + } +} +``` diff --git a/src/frontend/src/components/Common/PowerBIConfigSelector.tsx b/src/frontend/src/components/Common/PowerBIConfigSelector.tsx new file mode 100644 index 00000000..faa7b22f --- /dev/null +++ b/src/frontend/src/components/Common/PowerBIConfigSelector.tsx @@ -0,0 +1,303 @@ +/** + * Power BI Configuration Selector Component + * + * A configuration form for customizing Power BI Analysis tool settings at the task level. + */ + +import React, { useState, useEffect } from 'react'; +import { + Box, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Typography, + Accordion, + AccordionSummary, + AccordionDetails, + Grid, + Alert, + Tooltip, + IconButton, +} from '@mui/material'; +import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; +import InfoIcon from '@mui/icons-material/Info'; +import { useAPIKeysStore } from '../../store/apiKeys'; + +export interface PowerBIConfig { + tenant_id?: string; + client_id?: string; + workspace_id?: string; + semantic_model_id?: string; + auth_method?: string; + databricks_job_id?: number; +} + +interface PowerBIConfigSelectorProps { + value: PowerBIConfig; + onChange: (config: PowerBIConfig) => void; + label?: string; + helperText?: string; + fullWidth?: boolean; + disabled?: boolean; +} + +const AUTH_METHODS = [ + { value: 'service_principal', label: 'Service Principal' }, + { value: 'device_code', label: 'Device Code' }, +]; + +const DEFAULT_CONFIG: PowerBIConfig = { + tenant_id: '', + client_id: '', + workspace_id: '', + semantic_model_id: '', + auth_method: 'service_principal', + databricks_job_id: undefined, +}; + +export const PowerBIConfigSelector: React.FC = ({ + value, + onChange, + label = 'Power BI Configuration', + helperText, + fullWidth = true, + disabled = false +}) => { + const [config, setConfig] = useState({ ...DEFAULT_CONFIG, ...value }); + const [expanded, setExpanded] = useState(false); + const { secrets, fetchAPIKeys } = useAPIKeysStore(); + const [missingApiKeys, setMissingApiKeys] = useState([]); + + // Check for required API keys + useEffect(() => { + fetchAPIKeys(); + }, [fetchAPIKeys]); + + useEffect(() => { + const requiredKeys = [ + 'POWERBI_CLIENT_SECRET', + 'POWERBI_USERNAME', + 'POWERBI_PASSWORD', + 'DATABRICKS_API_KEY' + ]; + + const missing = requiredKeys.filter(keyName => + !secrets.find(key => key.name === keyName) + ); + + setMissingApiKeys(missing); + }, [secrets]); + + useEffect(() => { + setConfig({ ...DEFAULT_CONFIG, ...value }); + }, [value]); + + const handleChange = (field: keyof PowerBIConfig, fieldValue: string | number | undefined) => { + const newConfig = { + ...config, + [field]: fieldValue === '' ? undefined : fieldValue + }; + setConfig(newConfig); + onChange(newConfig); + }; + + const isConfigured = config.tenant_id && config.client_id && config.semantic_model_id; + + return ( + + {!isConfigured && ( + + Power BI configuration is required. Please configure tenant_id, client_id, and semantic_model_id. + + )} + + {missingApiKeys.length > 0 && ( + + + + āš ļø Required API Keys Missing + + + To use PowerBIAnalysisTool, you MUST configure the following API Keys in Settings → API Keys: + + + {missingApiKeys.map(key => ( +
  • + + {key} + +
  • + ))} +
    +
    +
    + )} + + setExpanded(isExpanded)} + disabled={disabled} + sx={{ + '&:before': { display: 'none' }, + boxShadow: 1, + }} + > + } + sx={{ + backgroundColor: 'rgba(0, 0, 0, 0.02)', + borderRadius: 1, + '&:hover': { + backgroundColor: 'rgba(0, 0, 0, 0.04)', + }, + }} + > + + + {label} + + {isConfigured && ( + + āœ“ Configured + + )} + + + + + + {/* Required Fields */} + + + Required Configuration + + + + + handleChange('tenant_id', e.target.value)} + disabled={disabled} + placeholder="Azure AD Tenant ID" + helperText="Your Azure AD Tenant ID" + InputProps={{ + endAdornment: ( + + + + + + ), + }} + /> + + + + handleChange('client_id', e.target.value)} + disabled={disabled} + placeholder="Azure AD Application ID" + helperText="Your Azure AD Application/Client ID" + InputProps={{ + endAdornment: ( + + + + + + ), + }} + /> + + + + handleChange('semantic_model_id', e.target.value)} + disabled={disabled} + placeholder="Power BI Semantic Model ID" + helperText="The Power BI semantic model (dataset) ID to query" + InputProps={{ + endAdornment: ( + + + + + + ), + }} + /> + + + + + Authentication Method + + + + + {/* Optional Fields */} + + + Optional Configuration + + + + + handleChange('workspace_id', e.target.value)} + disabled={disabled} + placeholder="Power BI Workspace ID (optional)" + helperText="Optional: Power BI workspace ID" + /> + + + + handleChange('databricks_job_id', e.target.value ? parseInt(e.target.value) : undefined)} + disabled={disabled} + placeholder="Databricks job ID (optional)" + helperText="Optional: Override default Databricks job ID" + /> + + + + {helperText && ( + + {helperText} + + )} + + +
    + ); +}; diff --git a/src/frontend/src/components/Configuration/APIKeys/APIKeys.tsx b/src/frontend/src/components/Configuration/APIKeys/APIKeys.tsx index 6b8b3bb4..c06a7e41 100644 --- a/src/frontend/src/components/Configuration/APIKeys/APIKeys.tsx +++ b/src/frontend/src/components/Configuration/APIKeys/APIKeys.tsx @@ -109,7 +109,10 @@ function APIKeys(): JSX.Element { 'QWEN_API_KEY', 'DEEPSEEK_API_KEY', 'GROK_API_KEY', - 'GEMINI_API_KEY' + 'GEMINI_API_KEY', + 'POWERBI_USERNAME', + 'POWERBI_PASSWORD', + 'POWERBI_CLIENT_SECRET' ]; // Map provider names to API key names with proper typing diff --git a/src/frontend/src/components/Configuration/PowerBIConfiguration.tsx b/src/frontend/src/components/Configuration/PowerBIConfiguration.tsx new file mode 100644 index 00000000..fc623893 --- /dev/null +++ b/src/frontend/src/components/Configuration/PowerBIConfiguration.tsx @@ -0,0 +1,296 @@ +import React, { useState, useEffect } from 'react'; +import { + Typography, + Box, + Alert, + TextField, + Button, + Snackbar, + CircularProgress, + Stack, + FormControlLabel, + Switch, + Divider, + Paper, + Link, +} from '@mui/material'; +import SaveIcon from '@mui/icons-material/Save'; +import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline'; +import BarChartIcon from '@mui/icons-material/BarChart'; +import InfoOutlinedIcon from '@mui/icons-material/InfoOutlined'; +import apiClient from '../../config/api/ApiConfig'; + +interface PowerBIConfigurationProps { + onSaved?: () => void; +} + +interface PowerBIConfig { + tenant_id: string; + client_id: string; + workspace_id: string; + semantic_model_id: string; + enabled: boolean; +} + +interface PowerBIStatus { + configured: boolean; + enabled: boolean; + message: string; +} + +const PowerBIConfiguration: React.FC = ({ onSaved }) => { + const [config, setConfig] = useState({ + tenant_id: '', + client_id: '', + workspace_id: '', + semantic_model_id: '', + enabled: false, + }); + const [loading, setLoading] = useState(false); + const [status, setStatus] = useState(null); + const [notification, setNotification] = useState({ + open: false, + message: '', + severity: 'success' as 'success' | 'error', + }); + + useEffect(() => { + const loadConfig = async () => { + try { + const response = await apiClient.get('/powerbi/config'); + setConfig(response.data); + + // Load status + const statusResponse = await apiClient.get('/powerbi/status'); + setStatus(statusResponse.data); + } catch (error) { + console.error('Error loading Power BI configuration:', error); + } + }; + + loadConfig(); + }, []); + + const handleSaveConfig = async () => { + if (config.enabled && (!config.tenant_id || !config.client_id)) { + setNotification({ + open: true, + message: 'Tenant ID and Client ID are required when Power BI is enabled', + severity: 'error', + }); + return; + } + + setLoading(true); + try { + await apiClient.post('/powerbi/config', config); + + setNotification({ + open: true, + message: 'Power BI configuration saved successfully', + severity: 'success', + }); + + // Reload status + const statusResponse = await apiClient.get('/powerbi/status'); + setStatus(statusResponse.data); + + if (onSaved) { + onSaved(); + } + } catch (error: any) { + setNotification({ + open: true, + message: error.response?.data?.detail || 'Error saving Power BI configuration', + severity: 'error', + }); + } finally { + setLoading(false); + } + }; + + const handleFieldChange = (field: keyof PowerBIConfig) => ( + event: React.ChangeEvent + ) => { + setConfig({ + ...config, + [field]: event.target.value, + }); + }; + + const handleEnabledToggle = (event: React.ChangeEvent) => { + setConfig({ + ...config, + enabled: event.target.checked, + }); + }; + + return ( + + + + + + Power BI Integration + + + Configure Power BI DAX query integration for your workspace + + + + + + + {/* Status Display */} + {status && ( + : undefined} + > + {status.message} + + )} + + {/* Configuration Help */} + }> + + Power BI Configuration Requirements: + + +
      +
    1. + Azure AD Application: Register an app in Azure AD and note the Tenant ID and Client ID +
    2. +
    3. + Credentials: Set the following in API Keys (Configuration → API Keys): +
        +
      • POWERBI_USERNAME - Service account username
      • +
      • POWERBI_PASSWORD - Service account password
      • +
      • POWERBI_CLIENT_SECRET - Azure AD app client secret (optional)
      • +
      +
    4. +
    5. + Workspace Access: Grant the service account access to your Power BI workspace +
    6. +
    +
    + + View full documentation → + +
    + + + {/* Enable Toggle */} + + } + label={ + + Enable Power BI Integration + + Allow agents to execute DAX queries against Power BI semantic models + + + } + /> + + + + {/* Azure AD Configuration */} + + Azure AD Configuration + + + + + + + + + {/* Power BI Configuration */} + + Power BI Workspace Configuration + + + + + + + {/* Save Button */} + + + + + + {/* Notification Snackbar */} + setNotification({ ...notification, open: false })} + anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }} + > + setNotification({ ...notification, open: false })} + severity={notification.severity} + sx={{ width: '100%' }} + > + {notification.message} + + +
    + ); +}; + +export default PowerBIConfiguration; diff --git a/src/frontend/src/components/Documentation/Documentation.tsx b/src/frontend/src/components/Documentation/Documentation.tsx index 33d8779b..a2efb2f2 100644 --- a/src/frontend/src/components/Documentation/Documentation.tsx +++ b/src/frontend/src/components/Documentation/Documentation.tsx @@ -32,7 +32,7 @@ interface DocSection { } const docSections: DocSection[] = [ - + { label: 'Architecture', items: [ @@ -44,10 +44,21 @@ const docSections: DocSection[] = [ items: [ { label: 'Developer Guide', file: 'DEVELOPER_GUIDE' }, { label: 'Code structure', file: 'CODE_STRUCTURE_GUIDE' }, - { label: 'API Reference', file: 'API_REFERENCE' }, ], }, - + { + label: 'API', + items: [ + { label: 'API Endpoints', file: 'api_endpoints' }, + ], + }, + { + label: 'Integrations', + items: [ + { label: 'Power BI Integration', file: 'powerbi_integration' }, + ], + }, + ]; const Documentation: React.FC = () => { diff --git a/src/frontend/src/components/Tasks/TaskForm.tsx b/src/frontend/src/components/Tasks/TaskForm.tsx index ed4e55cc..9ef7309b 100644 --- a/src/frontend/src/components/Tasks/TaskForm.tsx +++ b/src/frontend/src/components/Tasks/TaskForm.tsx @@ -41,6 +41,7 @@ import { GenieSpaceSelector } from '../Common/GenieSpaceSelector'; import { PerplexityConfigSelector } from '../Common/PerplexityConfigSelector'; import { SerperConfigSelector } from '../Common/SerperConfigSelector'; import { MCPServerSelector } from '../Common/MCPServerSelector'; +import { PowerBIConfigSelector, PowerBIConfig } from '../Common/PowerBIConfigSelector'; import { PerplexityConfig, SerperConfig } from '../../types/config'; import TaskBestPractices from '../BestPractices/TaskBestPractices'; @@ -121,6 +122,7 @@ const TaskForm: React.FC = ({ initialData, onCancel, onTaskSaved, const [selectedGenieSpace, setSelectedGenieSpace] = useState<{ id: string; name: string } | null>(null); const [perplexityConfig, setPerplexityConfig] = useState({}); const [serperConfig, setSerperConfig] = useState({}); + const [powerBIConfig, setPowerBIConfig] = useState({}); const [selectedMcpServers, setSelectedMcpServers] = useState([]); const [toolConfigs, setToolConfigs] = useState>(initialData?.tool_configs || {}); const [showBestPractices, setShowBestPractices] = useState(false); @@ -158,6 +160,11 @@ const TaskForm: React.FC = ({ initialData, onCancel, onTaskSaved, setSerperConfig(initialData.tool_configs.SerperDevTool as SerperConfig); } + // Check for PowerBIAnalysisTool config + if (initialData.tool_configs.PowerBIAnalysisTool) { + setPowerBIConfig(initialData.tool_configs.PowerBIAnalysisTool as PowerBIConfig); + } + // Check for MCP_SERVERS config if (initialData.tool_configs.MCP_SERVERS) { const mcpConfig = initialData.tool_configs.MCP_SERVERS as Record; @@ -856,6 +863,33 @@ const TaskForm: React.FC = ({ initialData, onCancel, onTaskSaved, )} + {/* Power BI Configuration - Show only when PowerBIAnalysisTool is selected */} + {formData.tools.some(toolId => { + const tool = tools.find(t => + String(t.id) === String(toolId) || + t.id === Number(toolId) || + t.title === toolId + ); + return tool?.title === 'PowerBIAnalysisTool'; + }) && ( + + { + setPowerBIConfig(config); + // Update tool configs when configuration changes + setToolConfigs(prev => ({ + ...prev, + PowerBIAnalysisTool: config + })); + }} + label="Power BI Configuration" + helperText="Configure Power BI authentication and semantic model for this task" + fullWidth + /> + + )} + {/* MCP Server Configuration - Always show as it's independent of regular tools */} {/* Show selected MCP servers visually */} diff --git a/src/frontend/src/components/Tools/SecurityDisclaimer.tsx b/src/frontend/src/components/Tools/SecurityDisclaimer.tsx index b85d65e3..de9ed771 100644 --- a/src/frontend/src/components/Tools/SecurityDisclaimer.tsx +++ b/src/frontend/src/components/Tools/SecurityDisclaimer.tsx @@ -608,6 +608,36 @@ export const TOOL_SECURITY_INFO: Record { diff --git a/src/frontend_static/docs/API_REFERENCE.md b/src/frontend_static/docs/API_REFERENCE.md deleted file mode 100644 index 7b9a6b4f..00000000 --- a/src/frontend_static/docs/API_REFERENCE.md +++ /dev/null @@ -1,479 +0,0 @@ -# Kasal API Reference - -> **RESTful API v1** - Complete endpoint documentation with examples - ---- - -## Getting Started -Base URLs, authentication, and rate limits you need before calling endpoints. - -### Base URL -``` -Production: https://api.example.com/v1 -Staging: https://staging-api.example.com/v1 -Local: http://localhost:8000/api/v1 -``` - -### Authentication -```bash -# Get access token -curl -X POST https://api.example.com/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"email": "user@example.com", "password": "secure_pass"}' - -# Use token in requests -curl -X GET https://api.example.com/v1/crews \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" -``` - -### Rate Limits -| Tier | Requests/Hour | Burst | -|------|---------------|-------| -| **Free** | 1,000 | 100/min | -| **Pro** | 10,000 | 1,000/min | -| **Enterprise** | Unlimited | Custom | - ---- - -## Authentication Endpoints -Login, refresh, and logout flows to manage tokens. - -### POST /auth/login -**Login with credentials** -```json -Request: -{ - "email": "user@example.com", - "password": "secure_password" -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "token_type": "bearer", - "expires_in": 86400 -} -``` - -### POST /auth/refresh -**Refresh access token** -```json -Request: -{ - "refresh_token": "eyJ0eXAi..." -} - -Response: 200 OK -{ - "access_token": "eyJ0eXAi...", - "expires_in": 86400 -} -``` - -### POST /auth/logout -**Invalidate tokens** -```json -Response: 204 No Content -``` - ---- - -## Crew Management -Create and manage multi-agent crews and their configurations. - -### GET /crews -**List all crews** -```json -Response: 200 OK -{ - "crews": [ - { - "id": "crew_abc123", - "name": "Customer Support Crew", - "status": "active", - "agents_count": 3, - "created_at": "2024-01-15T10:30:00Z" - } - ], - "total": 15, - "page": 1 -} -``` - -### POST /crews -**Create new crew** -```json -Request: -{ - "name": "Marketing Crew", - "description": "Content generation team", - "process": "hierarchical", - "agents": [ - { - "role": "Content Writer", - "goal": "Create engaging content", - "model": "gpt-4" - } - ] -} - -Response: 201 Created -{ - "id": "crew_xyz789", - "name": "Marketing Crew", - "status": "configuring" -} -``` - -### GET /crews/{crew_id} -**Get crew details** -```json -Response: 200 OK -{ - "id": "crew_abc123", - "name": "Customer Support Crew", - "agents": [...], - "tasks": [...], - "configuration": {...} -} -``` - -### PUT /crews/{crew_id} -**Update crew configuration** -```json -Request: -{ - "name": "Updated Crew Name", - "process": "sequential" -} - -Response: 200 OK -{ - "id": "crew_abc123", - "updated": true -} -``` - -### DELETE /crews/{crew_id} -**Delete crew** -```json -Response: 204 No Content -``` - ---- - -## Agent Management -Create and list individual agents with roles, models, and tools. - -### GET /agents -**List all agents** -```json -Response: 200 OK -{ - "agents": [ - { - "id": "agent_001", - "name": "Research Agent", - "crew_id": "crew_abc123", - "model": "gpt-4", - "status": "ready" - } - ] -} -``` - -### POST /agents -**Create new agent** -```json -Request: -{ - "crew_id": "crew_abc123", - "role": "Data Analyst", - "goal": "Analyze metrics", - "backstory": "Expert analyst with 10 years experience", - "model": "claude-3-opus", - "tools": ["web_search", "calculator"] -} - -Response: 201 Created -{ - "id": "agent_002", - "status": "created" -} -``` - ---- - -## Execution Management -Start executions, get status, retrieve traces, and stop runs. - -### POST /executions -**Start crew execution** -```json -Request: -{ - "crew_id": "crew_abc123", - "inputs": { - "topic": "Q4 Marketing Strategy", - "deadline": "2024-12-31" - } -} - -Response: 202 Accepted -{ - "job_id": "job_qwerty123", - "status": "queued", - "estimated_duration": 300 -} -``` - -### GET /executions/{job_id} -**Get execution status** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "running", - "progress": 65, - "current_task": "Analyzing data", - "started_at": "2024-01-15T14:00:00Z" -} -``` - -### GET /executions/{job_id}/traces -**Get execution trace** -```json -Response: 200 OK -{ - "traces": [ - { - "timestamp": "2024-01-15T14:00:05Z", - "agent": "Research Agent", - "action": "web_search", - "result": "Found 15 relevant articles" - } - ] -} -``` - -### POST /executions/{job_id}/stop -**Stop execution** -```json -Response: 200 OK -{ - "job_id": "job_qwerty123", - "status": "stopped" -} -``` - ---- - -## Task Management -Create and list tasks assigned to agents. - -### GET /tasks -**List tasks** -```json -Response: 200 OK -{ - "tasks": [ - { - "id": "task_001", - "description": "Generate report", - "agent_id": "agent_001", - "status": "completed" - } - ] -} -``` - -### POST /tasks -**Create task** -```json -Request: -{ - "agent_id": "agent_001", - "description": "Analyze competitor pricing", - "expected_output": "Markdown report", - "context": ["Previous analysis from Q3"] -} - -Response: 201 Created -{ - "id": "task_002", - "status": "created" -} -``` - ---- - -## Tool Management -Discover built-in tools and register custom tools. - -### GET /tools -**List available tools** -```json -Response: 200 OK -{ - "tools": [ - { - "name": "web_search", - "description": "Search the web", - "category": "research" - }, - { - "name": "file_reader", - "description": "Read files", - "category": "data" - } - ] -} -``` - -### POST /tools/custom -**Register custom tool** -```json -Request: -{ - "name": "salesforce_api", - "description": "Query Salesforce data", - "endpoint": "https://api.example.com/salesforce", - "auth_type": "bearer" -} - -Response: 201 Created -{ - "tool_id": "tool_custom_001", - "status": "registered" -} -``` - ---- - -## Memory Management -Fetch and clear short/long-term memory for a crew. - -### GET /memory/{crew_id} -**Get crew memory** -```json -Response: 200 OK -{ - "short_term": [ - { - "timestamp": "2024-01-15T10:00:00Z", - "content": "Customer prefers email communication" - } - ], - "long_term": [ - { - "category": "preferences", - "insights": ["Email preferred", "Weekly reports"] - } - ] -} -``` - -### POST /memory/{crew_id}/clear -**Clear memory** -```json -Request: -{ - "type": "short_term" // or "long_term" or "all" -} - -Response: 204 No Content -``` - ---- - - - -## šŸ”µ WebSocket Events -Real-time updates for task lifecycle, errors, and progress. - -### Connection -```javascript -const ws = new WebSocket('wss://api.kasal.ai/v1/ws'); - -ws.onopen = () => { - ws.send(JSON.stringify({ - type: 'subscribe', - job_id: 'job_qwerty123' - })); -}; -``` - -### Event Types -```javascript -// Task started -{ - "type": "task_start", - "job_id": "job_qwerty123", - "task_id": "task_001", - "agent": "Research Agent" -} - -// Task completed -{ - "type": "task_complete", - "job_id": "job_qwerty123", - "task_id": "task_001", - "result": "Analysis complete" -} - -// Error -{ - "type": "error", - "job_id": "job_qwerty123", - "message": "Rate limit exceeded", - "code": "RATE_LIMIT" -} -``` - ---- - -## šŸ”· Error Codes -Standardized error responses and meanings. - -| Code | Message | Description | -|------|---------|-------------| -| 400 | Bad Request | Invalid parameters | -| 401 | Unauthorized | Invalid/expired token | -| 403 | Forbidden | Insufficient permissions | -| 404 | Not Found | Resource doesn't exist | -| 429 | Too Many Requests | Rate limit exceeded | -| 500 | Internal Error | Server error | -| 503 | Service Unavailable | Maintenance mode | - -### Error Response Format -```json -{ - "error": { - "code": "VALIDATION_ERROR", - "message": "Invalid crew configuration", - "details": { - "field": "agents", - "reason": "At least one agent required" - } - } -} -``` - ---- - -## šŸ”¹ Testing -Sandbox, Postman collection, and OpenAPI spec. - -### Sandbox Environment -```bash -# Use sandbox for testing -curl -X POST https://sandbox-api.kasal.ai/v1/crews \ - -H "Authorization: Bearer SANDBOX_TOKEN" \ - -H "Content-Type: application/json" \ - -d @crew.json -``` - - ---- - -*Build powerful integrations with Kasal API* \ No newline at end of file diff --git a/src/frontend_static/docs/api_endpoints.md b/src/frontend_static/docs/api_endpoints.md new file mode 100644 index 00000000..01e3abfa --- /dev/null +++ b/src/frontend_static/docs/api_endpoints.md @@ -0,0 +1,524 @@ +# Kasal API Endpoints Reference + +Complete reference for all available API endpoints in the Kasal platform. + +--- + +## Base URL + +All API endpoints use the following base URL structure: + +``` +https://.databricksapps.com/api/v1 +``` + +**Example:** +``` +https://kasal-dev-1444828305810485.aws.databricksapps.com/api/v1/executions +``` + +**Local Development:** +``` +http://localhost:8000/api/v1 +``` + +--- + +## Table of Contents + +- [Authentication](#authentication) +- [Crews (Workflows)](#crews-workflows) +- [Agents](#agents) +- [Tasks](#tasks) +- [Tools](#tools) +- [Executions](#executions) +- [Models](#models) +- [API Keys](#api-keys) +- [Power BI Integration](#power-bi-integration) +- [Health & Status](#health--status) + +--- + +## Authentication + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/auth/login` | User login with credentials | +| `POST` | `/auth/logout` | User logout | +| `GET` | `/auth/me` | Get current user information | +| `POST` | `/auth/refresh` | Refresh JWT token | + +**Authentication Header:** +``` +Authorization: Bearer +``` + +--- + +## Crews (Workflows) + +### Crew Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews` | List all crews in workspace | +| `POST` | `/crews` | Create a new crew | +| `GET` | `/crews/{id}` | Get crew details by ID | +| `PUT` | `/crews/{id}` | Update crew configuration | +| `DELETE` | `/crews/{id}` | Delete crew | +| `POST` | `/crews/{id}/duplicate` | Duplicate crew with new name | + +### Crew Execution + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/crews/{id}/kickoff` | Start crew execution | +| `POST` | `/crews/{id}/kickoff-async` | Start async crew execution | +| `GET` | `/crews/{id}/status` | Get crew execution status | +| `POST` | `/crews/{id}/stop` | Stop running crew | + +### Crew Export/Import + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/crews/{id}/export` | Export crew configuration as JSON | +| `POST` | `/crews/import` | Import crew from JSON | + +--- + +## Agents + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/agents` | List all agents | +| `POST` | `/agents` | Create a new agent | +| `GET` | `/agents/{id}` | Get agent details by ID | +| `PUT` | `/agents/{id}` | Update agent configuration | +| `DELETE` | `/agents/{id}` | Delete agent | + +**Agent Configuration Fields:** +- `name`: Agent name +- `role`: Agent role description +- `goal`: Agent's objective +- `backstory`: Agent's background context +- `tools`: Array of tool IDs +- `tool_configs`: Tool-specific configurations +- `llm_config`: LLM model and parameters + +--- + +## Tasks + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tasks` | List all tasks | +| `POST` | `/tasks` | Create a new task | +| `GET` | `/tasks/{id}` | Get task details by ID | +| `PUT` | `/tasks/{id}` | Update task configuration | +| `DELETE` | `/tasks/{id}` | Delete task | + +**Task Configuration Fields:** +- `name`: Task name +- `description`: Task description +- `expected_output`: Expected output format +- `agent_id`: Assigned agent ID +- `context`: Context task IDs (dependencies) +- `tool_configs`: Task-level tool configurations + +--- + +## Tools + +### Tool Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/tools` | List all available tools | +| `GET` | `/tools/{id}` | Get tool details by ID | +| `PUT` | `/tools/{id}` | Update tool configuration | +| `POST` | `/tools/{id}/enable` | Enable tool for workspace | +| `POST` | `/tools/{id}/disable` | Disable tool for workspace | + +### Tool Categories + +**Available Tool Types:** +- `ai`: AI-powered tools (Dall-E, Perplexity) +- `database`: Database tools (Genie, Databricks, Power BI) +- `search`: Search tools (Serper, Knowledge Search) +- `web`: Web tools (Scrape Website) +- `integration`: Integration tools (MCP) +- `development`: Development tools + +--- + +## Executions + +### Execution Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/executions` | List all executions | +| `GET` | `/executions/{id}` | Get execution details | +| `GET` | `/executions/{id}/status` | Get execution status | +| `GET` | `/executions/{id}/logs` | Get execution logs | +| `POST` | `/executions/{id}/stop` | Stop running execution | +| `DELETE` | `/executions/{id}` | Delete execution record | + +### Execution Status Values + +- `pending`: Execution queued +- `running`: Execution in progress +- `completed`: Execution finished successfully +- `failed`: Execution failed with error +- `stopped`: Execution manually stopped + +--- + +## Models + +### Model Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/models` | List available LLM models | +| `GET` | `/models/{id}` | Get model configuration | +| `PUT` | `/models/{id}` | Update model parameters | +| `POST` | `/models/test` | Test model connection | + +**Supported Model Providers:** +- Databricks (Foundation Models) +- OpenAI (GPT-3.5, GPT-4) +- Anthropic (Claude) +- Google (Gemini) +- Azure OpenAI +- Ollama (Local models) + +--- + +## API Keys + +### API Key Management + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/api-keys` | List all API keys (encrypted) | +| `POST` | `/api-keys` | Create new API key | +| `GET` | `/api-keys/{id}` | Get API key details | +| `PUT` | `/api-keys/{id}` | Update API key value | +| `DELETE` | `/api-keys/{id}` | Delete API key | + +**Common API Keys:** +- `OPENAI_API_KEY`: OpenAI authentication +- `ANTHROPIC_API_KEY`: Anthropic Claude authentication +- `SERPER_API_KEY`: Serper search tool +- `PERPLEXITY_API_KEY`: Perplexity AI tool +- `DATABRICKS_TOKEN`: Databricks API access +- `POWERBI_CLIENT_SECRET`: Power BI service principal +- `POWERBI_USERNAME`: Power BI device code auth +- `POWERBI_PASSWORD`: Power BI device code auth + +**Security:** +- All API keys are encrypted at rest +- Keys are never returned in plain text via API +- Multi-tenant isolation by group_id + +--- + +## Power BI Integration + +### Power BI Configuration + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/powerbi/config` | Configure Power BI connection | +| `GET` | `/powerbi/config` | Get Power BI configuration | + +**Power BI Tool Configuration (Task-Level):** +```json +{ + "tenant_id": "Azure AD Tenant ID", + "client_id": "Azure AD Application ID", + "semantic_model_id": "Power BI Dataset ID", + "workspace_id": "Power BI Workspace ID (optional)", + "auth_method": "service_principal or device_code", + "databricks_job_id": "Databricks Job ID (optional)" +} +``` + +**Required API Keys:** +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` (for device_code) +- `POWERBI_PASSWORD` (for device_code) +- `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN` + +--- + +## Health & Status + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/health` | API health check | +| `GET` | `/health/db` | Database connection status | +| `GET` | `/health/services` | External services status | +| `GET` | `/version` | API version information | + +--- + +## Common Response Formats + +### Success Response + +```json +{ + "status": "success", + "data": { ... }, + "message": "Operation completed successfully" +} +``` + +### Error Response + +```json +{ + "status": "error", + "error": { + "code": "ERROR_CODE", + "message": "Human-readable error message", + "details": { ... } + } +} +``` + +### Pagination + +For list endpoints that support pagination: + +``` +GET /crews?page=1&limit=50&sort=created_at&order=desc +``` + +**Query Parameters:** +- `page`: Page number (default: 1) +- `limit`: Items per page (default: 50, max: 100) +- `sort`: Sort field +- `order`: Sort order (`asc` or `desc`) + +--- + +## Rate Limiting + +**Default Limits:** +- Anonymous: 100 requests/hour +- Authenticated: 1000 requests/hour +- Enterprise: 10,000 requests/hour + +**Rate Limit Headers:** +``` +X-RateLimit-Limit: 1000 +X-RateLimit-Remaining: 999 +X-RateLimit-Reset: 1609459200 +``` + +--- + +## Memory Management + +### GET /api/v1/memory/{crew_id} +**Get crew memory (short-term and long-term)** + +```json +Response: 200 OK +{ + "short_term": [ + { + "timestamp": "2024-01-15T10:00:00Z", + "content": "Customer prefers email communication" + } + ], + "long_term": [ + { + "category": "preferences", + "insights": ["Email preferred", "Weekly reports"] + } + ] +} +``` + +### POST /api/v1/memory/{crew_id}/clear +**Clear crew memory** + +```json +Request: +{ + "type": "short_term" // Options: "short_term", "long_term", or "all" +} + +Response: 204 No Content +``` + +--- + +## WebSocket Endpoints + +### Real-Time Execution Updates + +``` +ws://localhost:8000/ws/executions/{execution_id} +``` + +**Message Format:** +```json +{ + "type": "status_update", + "execution_id": "abc123", + "status": "running", + "progress": 45, + "message": "Processing task 2 of 5..." +} +``` + +--- + +## Examples + +### Create and Execute a Crew + +```bash +# 1. Create a crew +curl -X POST http://localhost:8000/api/v1/crews \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Sales Analysis Crew", + "agents": [...], + "tasks": [...] + }' + +# Response: {"id": "crew_123", ...} + +# 2. Start execution +curl -X POST http://localhost:8000/api/v1/crews/crew_123/kickoff \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"query": "Analyze Q4 sales"}}' + +# Response: {"execution_id": "exec_456", ...} + +# 3. Monitor execution +curl -X GET http://localhost:8000/api/v1/executions/exec_456/status \ + -H "Authorization: Bearer $TOKEN" +``` + +### Configure Power BI Tool in Task + +```bash +# Create task with PowerBI configuration +curl -X POST http://localhost:8000/api/v1/tasks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Analyze Sales Data", + "description": "Analyze sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } + }' +``` + +--- + +## Error Codes + +| Code | Description | +|------|-------------| +| `AUTH_001` | Invalid or expired token | +| `AUTH_002` | Insufficient permissions | +| `CREW_001` | Crew not found | +| `CREW_002` | Invalid crew configuration | +| `EXEC_001` | Execution failed | +| `EXEC_002` | Execution timeout | +| `TOOL_001` | Tool not available | +| `TOOL_002` | Tool configuration error | +| `DB_001` | Database connection error | +| `EXT_001` | External service unavailable | + +--- + +## SDK Examples + +### Python SDK + +```python +from kasal import KasalClient + +# Initialize client +client = KasalClient( + base_url="http://localhost:8000", + token="your-jwt-token" +) + +# Create and execute crew +crew = client.crews.create( + name="Data Analysis Crew", + agents=[...], + tasks=[...] +) + +execution = crew.kickoff(inputs={"query": "Analyze data"}) +result = execution.wait() # Blocks until complete + +print(result.output) +``` + +### JavaScript/TypeScript SDK + +```typescript +import { KasalClient } from '@kasal/sdk'; + +const client = new KasalClient({ + baseUrl: 'http://localhost:8000', + token: 'your-jwt-token' +}); + +// Create and execute crew +const crew = await client.crews.create({ + name: 'Data Analysis Crew', + agents: [...], + tasks: [...] +}); + +const execution = await crew.kickoff({ + inputs: { query: 'Analyze data' } +}); + +// Stream results +execution.on('status', (status) => { + console.log('Status:', status); +}); + +const result = await execution.wait(); +console.log('Result:', result.output); +``` + +--- + +## Additional Resources + +- **API Playground**: `/api/playground` +- **OpenAPI Schema**: `/api/openapi.json` +- **Swagger UI**: `/api/docs` +- **ReDoc**: `/api/redoc` + +For more information, see: +- [Power BI Integration Guide](powerbi_integration.md) +- [Tool Configuration Guide](powerbi_analysis_tool_setup.md) +- [Crew Deployment Guide](crew_export_deployment.md) diff --git a/src/frontend_static/docs/powerbi_integration.md b/src/frontend_static/docs/powerbi_integration.md new file mode 100644 index 00000000..931f821b --- /dev/null +++ b/src/frontend_static/docs/powerbi_integration.md @@ -0,0 +1,1195 @@ +# Power BI Integration Guide + +Complete guide for integrating Power BI with Kasal AI agents for advanced business intelligence analysis. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Setup Guide](#setup-guide) + - [Development Environment](#1-development-environment-setup) + - [Azure Service Principal](#2-azure-service-principal-setup) + - [Databricks Configuration](#3-databricks-configuration) + - [Kasal Configuration](#4-kasal-configuration) +- [Authentication Methods](#authentication-methods) +- [API Configuration](#api-configuration) +- [PowerBI Analysis Tool](#powerbi-analysis-tool) +- [Testing](#testing) +- [Troubleshooting](#troubleshooting) +- [Security & Best Practices](#security--best-practices) + +--- + +## Overview + +The Power BI integration enables Kasal AI agents to execute complex analysis against Power BI semantic models using Databricks compute resources. This provides a production-ready, API-driven connector for Power BI analytics within AI workflows based on a preconfigured template notebook for tracability. + +**Key Features:** +- DAX query execution against Power BI semantic models +- Complex analysis using Databricks job orchestration +- Multiple authentication methods (Service Principal, Device Code Flow) +- Task-level configuration for workspace and semantic model selection +- Multi-tenant isolation with encrypted credential storage + +**Use Cases:** +- Year-over-year growth analysis +- Trend detection and forecasting +- Complex financial reporting +- Multi-dimensional business analysis +- Automated business intelligence reporting + +--- + +## Architecture + +### System Components + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal AI │ +│ Agent │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ + └─ PowerBIAnalysisTool + └─> Databricks Job + ā”œā”€ Step 1: Extract Power BI metadata + ā”œā”€ Step 2: Generate DAX query from business question + └─ Step 3: Execute query + └─> Power BI REST API + └─> Returns: JSON result data +``` + +### Backend Components + +1. **API Keys Service** (`services/api_keys_service.py`) + - Stores encrypted Power BI credentials + - Multi-tenant isolation via `group_id` + - Handles: `POWERBI_CLIENT_SECRET`, `POWERBI_USERNAME`, `POWERBI_PASSWORD` + +2. **Databricks Auth Context** (`utils/databricks_auth.py`) + - Auto-detects `databricks_host` from environment + - Retrieves `databricks_token` from API Keys or environment + +3. **PowerBIAnalysisTool** (`engines/crewai/tools/custom/powerbi_analysis_tool.py`) + - CrewAI tool for Power BI analysis + - Wraps Databricks job execution + - Handles credential retrieval and job parameter passing + +4. **Tool Factory** (`engines/crewai/tools/tool_factory.py`) + - Instantiates tools with task-level configuration + - Merges base tool config with task-specific overrides + +### Frontend Components + +1. **PowerBIConfigSelector** (`components/Common/PowerBIConfigSelector.tsx`) + - Task-level Power BI configuration UI + - Appears when PowerBIAnalysisTool is selected + - Validates required API Keys + +2. **TaskForm** (`components/Tasks/TaskForm.tsx`) + - Integrates PowerBIConfigSelector + - Stores configuration in `tool_configs.PowerBIAnalysisTool` + +### Authentication Flow + +1. Kasal retrieves credentials from API Keys Service +2. Auto-detects Databricks host from unified auth context +3. Passes credentials to Databricks job as parameters +4. Databricks job authenticates with Azure AD +5. Azure AD issues Power BI access token +6. Access token used to call Power BI REST API + +--- + +## Prerequisites + +### Required Accounts & Access + +- **Azure Tenant**: Admin access for Service Principal or Service Account (if PBI with RLS enforcement) setup +- **Power BI**: Workspace access and semantic model permissions +- **Databricks Workspace**: Access with token for job creation +- **Operating System**: Linux/macOS (Ubuntu on VDI for production) +- **Key vault connect to Databricks**: Connection of centrally managed secrets as KV variables within Databricks +- **Python**: 3.11+ +- **Node.js**: LTS version + +### Power BI Requirements + +- Power BI workspace with semantic models +- Workspace ID and Semantic Model ID +- Admin permissions to grant Service Principal access + +### Azure AD Requirements + +- Permission to create App Registrations +- Admin consent capability for API permissions +- Ability to create and manage Client Secrets + +--- + +## Setup Guide + +### 1. Development Environment Setup + +#### 1.1 Install Python 3.11 + +```bash +# Add the deadsnakes PPA (Ubuntu) +sudo add-apt-repository ppa:deadsnakes/ppa -y +sudo apt update + +# Install Python 3.11 +sudo apt install python3.11 python3.11-venv python3.11-dev -y + +# Verify installation +python3.11 --version +``` + +#### 1.2 Clone Repository + +```bash +# Clone the Kasal repository +git clone https://github.com/databrickslabs/kasal.git +cd kasal + +# Checkout the feature branch +git checkout feature/pbi-tool +``` + +#### 1.3 Create Virtual Environment + +```bash +# Create virtual environment with Python 3.11 +python3.11 -m venv venv + +# Activate the environment +source venv/bin/activate + +# Upgrade pip +pip install --upgrade pip +``` + +#### 1.4 Install Dependencies + +```bash +# Navigate to src directory +cd src + +# Install Python dependencies +pip install -r requirements.txt + +# Verify installations +pip freeze | grep -E "crewai|litellm|databricks" +``` + +#### 1.5 Install Node.js (if needed) + +```bash +# Install Node Version Manager (nvm) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash + +# Load nvm +source ~/.bashrc + +# Install Node.js LTS +nvm install --lts +nvm use --lts + +# Verify installations +node --version +npm --version +``` + +--- + +### 2. Azure Service Principal Setup + +To enable non-interactive authentication, create an Azure Service Principal with Power BI read permissions. + +#### 2.1 Create Service Principal in Azure Portal + +1. **Navigate to Azure Portal**: https://portal.azure.com +2. **Go to Azure Active Directory** → **App registrations** +3. **Click "New registration"**: + - **Name**: `Kasal-PowerBI-Connector` (or your preferred name) + - **Supported account types**: Single tenant + - **Redirect URI**: Leave blank +4. **Note the Application (client) ID** and **Directory (tenant) ID** + +Please consider that for some PowerBI reports a service principal might not be enough, but a service account might be needed. This will be especially the case for PowerBIs that enfore RLS within the PowerBI. + +#### 2.2 Create Client Secret + +1. In your app registration, go to **Certificates & secrets** +2. Click **New client secret** +3. **Description**: `Kasal PowerBI Tool` +4. **Expires**: Choose expiration period (recommended: 90 days) +5. **Copy the secret value** immediately (you won't be able to see it again) + +#### 2.3 Configure API Permissions + +**Critical**: The Service Principal needs **Application** permissions, not **Delegated**. + +1. Go to **API permissions** in your app registration +2. **Remove any Delegated permissions** if present +3. Click **Add a permission** +4. Select **Power BI Service** +5. Choose **Application permissions** (NOT Delegated) +6. Check **Dataset.Read.All** +7. Click **Add permissions** +8. **Click "Grant admin consent for [Your Organization]"** (requires admin) + +**Important**: This step requires **Azure AD Admin** privileges. If you don't have admin rights, use the email template in the Appendix. + +#### 2.4 Enable Service Principal in Power BI Admin Portal + +1. Go to **Power BI Admin Portal**: https://app.powerbi.com/admin-portal/tenantSettings +2. Navigate to **Developer settings** (or **Tenant settings**) +3. Find **Service principals can use Power BI APIs** +4. **Enable** this setting +5. Add your Service Principal to the allowed list: + - Option 1: Add specific Service Principal by name + - Option 2: Add to a security group that's allowed + +#### 2.5 Grant Workspace Access + +For each Power BI workspace you want to access: + +1. Open the Power BI workspace +2. Click **Workspace settings** +3. Go to **Access** +4. Click **Add people or groups** +5. Search for your Service Principal name +6. Assign role: **Member** or **Contributor** + +--- + +### 3. Databricks Configuration + +#### 3.1 Set Environment Variables + +```bash +# Set Databricks credentials +export DATABRICKS_TOKEN="your-databricks-token" +export DATABRICKS_HOST="https://your-workspace.cloud.databricks.com/" +``` + +#### 3.2 Configure Databricks CLI + +```bash +# Configure Databricks CLI +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +If the prompt doesn't appear: +```bash +# Unset environment variables and retry +unset DATABRICKS_HOST +unset DATABRICKS_TOKEN +databricks configure --host https://your-workspace.cloud.databricks.com --token +``` + +#### 3.3 Verify Connection + +```bash +# Test workspace access +databricks workspace list / +``` + +#### 3.4 Create Databricks Job + +The PowerBIAnalysisTool requires a Databricks job for executing the analysis pipeline. + +1. **Navigate to Databricks Workflows**: + - Go to your Databricks workspace + - Click **Workflows** in the left sidebar + +2. **Create New Job**: + - Click **Create Job** + - **Job Name**: `pbi_e2e_pipeline` + +3. **Add Task**: + - Click **Add Task** + - **Task Name**: `pbi_e2e_pipeline` + - **Type**: Notebook + - **Notebook Path**: `/Workspace/Shared/powerbi_full_pipeline` + - **Cluster**: Select or create appropriate cluster + +4. **Note the Job ID**: + - After creating the job, copy the **Job ID** from the URL + - Example: `365257288725339` + - This will be used in PowerBIAnalysisTool configuration + +#### 3.5 Upload Pipeline Notebook + +```bash +# Upload the notebook to Databricks +databricks workspace import \ + examples/powerbi_full_pipeline.ipynb \ + /Workspace/Shared/powerbi_full_pipeline \ + --language PYTHON \ + --format JUPYTER +``` + +Or manually upload via Databricks UI: +1. Go to **Workspace** → **Shared** +2. Click **Create** → **Import** +3. Upload `examples/powerbi_full_pipeline.ipynb` + +--- + +### 4. Kasal Configuration + +#### 4.1 Build Frontend + +```bash +# From the project root +python src/build.py +``` + +This creates a `frontend_static` folder with compiled React application. + +#### 4.2 Deploy to Databricks Apps + +```bash +# Deploy the application +cd src +python deploy.py \ + --app-name kasal \ + --user-name your-email@domain.com +``` + +**Note**: Replace `--app-name` and `--user-name` with your specific values. + +#### 4.3 Configure API Keys + +After deploying, configure required API Keys: + +1. **Navigate to Configuration** → **API Keys** +2. **Add the following keys**: + - `POWERBI_CLIENT_SECRET`: Service Principal secret (from section 2.2) + - `POWERBI_USERNAME`: Power BI service account email (for device code auth) + - `POWERBI_PASSWORD`: Service account password (for device code auth) + - `DATABRICKS_API_KEY` or `DATABRICKS_TOKEN`: Databricks access token + +**Important**: All values are encrypted at rest and never returned in plain text via API. + +#### 4.4 Enable PowerBIAnalysisTool + +1. Go to **Tools** section +2. Find **PowerBIAnalysisTool** +3. Review security disclaimers +4. Enable the tool for your workspace + +--- + +## Authentication Methods + +The PowerBIAnalysisTool supports two authentication methods: + +### Service Principal (Recommended for Production) + +**Best for**: Automated workflows, production deployments, unattended execution + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Service Principal Application ID +- `POWERBI_CLIENT_SECRET`: Stored in API Keys + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "auth_method": "service_principal" +} +``` + +**Advantages**: +- Non-interactive, fully automated +- No MFA requirements +- Production-ready +- Supports scheduled workflows + +### Device Code Flow (Recommended for Testing) + +**Best for**: Development, testing, personal workspaces + +**Requirements**: +- `tenant_id`: Azure AD Tenant ID +- `client_id`: Can use Power BI public client `1950a258-227b-4e31-a9cf-717495945fc2` +- `POWERBI_USERNAME`: User email (stored in API Keys) +- `POWERBI_PASSWORD`: User password (stored in API Keys) + +**Configuration**: +```json +{ + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "1950a258-227b-4e31-a9cf-717495945fc2", + "auth_method": "device_code" +} +``` + +**How it works**: +1. First request prompts: "Visit microsoft.com/devicelogin" +2. Enter provided code in browser +3. Sign in with your credentials +4. Token is cached for subsequent requests (~1 hour) + +**Advantages**: +- No Service Principal setup required +- Uses your personal Power BI permissions +- Perfect for development and testing +- Supports MFA + +--- + +## API Configuration + +### Task-Level Configuration + +Configure Power BI settings at the **task level** for flexibility across different semantic models: + +1. **Create or Edit Task** +2. **Select PowerBIAnalysisTool** in tools list +3. **Configure Power BI settings** (fields appear automatically): + - **Tenant ID**: Azure AD tenant GUID + - **Client ID**: Service Principal or app client ID + - **Workspace ID**: Power BI workspace GUID (optional) + - **Semantic Model ID**: Power BI semantic model/dataset GUID + - **Auth Method**: `service_principal` or `device_code` + - **Databricks Job ID**: Databricks job ID for analysis pipeline + +**Example Task Configuration**: +```json +{ + "name": "Analyze Sales Data", + "description": "Analyze Q4 sales trends using Power BI", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### Required API Keys Check + +The UI automatically checks for required API Keys when PowerBIAnalysisTool is selected: +- `POWERBI_CLIENT_SECRET` +- `POWERBI_USERNAME` +- `POWERBI_PASSWORD` +- `DATABRICKS_API_KEY` (or `DATABRICKS_TOKEN`) + +If keys are missing, an error alert is displayed with instructions. + +--- + +## PowerBI Analysis Tool + +### Tool Overview + +**PowerBIAnalysisTool** (ID: 71) enables complex Power BI analysis via Databricks job orchestration. + +**Best for**: +- Heavy computation and large datasets +- Complex multi-query analysis +- Year-over-year comparisons +- Trend detection and forecasting +- Resource-intensive business intelligence tasks + +### Tool Parameters + +**Input Parameters**: +- `question` (str): Business question to analyze +- `dashboard_id` (str): Semantic model ID (can be provided by LLM or task config) +- `workspace_id` (str): Power BI workspace ID (optional) +- `additional_params` (dict): Optional additional parameters + +**Configuration** (from tool_configs): +- `tenant_id`: Azure AD tenant +- `client_id`: Application client ID +- `semantic_model_id`: Default semantic model +- `workspace_id`: Default workspace +- `auth_method`: Authentication method +- `databricks_job_id`: Databricks job ID for pipeline + +### Agent Configuration Example + +```json +{ + "role": "Business Intelligence Analyst", + "goal": "Perform complex Power BI analysis using Databricks", + "backstory": "Expert analyst with deep understanding of business metrics", + "tools": ["PowerBIAnalysisTool"], + "llm_config": { + "model": "databricks-meta-llama-3-1-70b-instruct", + "temperature": 0.1 + } +} +``` + +### Task Configuration Example + +```json +{ + "name": "Q4 Revenue Analysis", + "description": "Analyze Q4 2024 revenue trends by product category and region, comparing year-over-year growth", + "expected_output": "Comprehensive analysis report with insights and recommendations", + "agent_id": "agent_123", + "tools": [71], + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +### How It Works + +1. **Agent receives task** with business question +2. **PowerBIAnalysisTool invoked** with question and semantic model ID +3. **Tool retrieves credentials** from API Keys Service +4. **Tool auto-detects** databricks_host from environment +5. **Databricks job triggered** with parameters: + - `question`: Business question + - `semantic_model_id`: Dataset to query + - `workspace_id`: Power BI workspace + - `tenant_id`, `client_id`: Authentication + - `client_secret`, `username`, `password`: Credentials + - `databricks_host`, `databricks_token`: For recursive auth +6. **Job executes pipeline**: + - Extracts Power BI metadata + - Generates DAX query from question + - Executes query against Power BI + - Returns structured results +7. **Agent receives results** and continues workflow + +--- + +## Testing + +### Local Development Testing + +#### 1. Start Services + +**Backend**: +```bash +cd src/backend +./run.sh sqlite +# Backend starts on http://localhost:8000 +``` + +**Frontend**: +```bash +cd src/frontend +npm start +# Frontend starts on http://localhost:3000 +``` + +#### 2. Configure via UI + +1. Open http://localhost:3000 +2. Navigate to **Configuration** → **API Keys** +3. Add required keys: + - `POWERBI_CLIENT_SECRET` + - `POWERBI_USERNAME` + - `POWERBI_PASSWORD` + - `DATABRICKS_API_KEY` +4. Navigate to **Tools** → Enable **PowerBIAnalysisTool** + +#### 3. Create Test Agent and Task + +**Agent**: +```json +{ + "role": "Sales Analyst", + "goal": "Analyze Power BI sales data", + "tools": ["PowerBIAnalysisTool"] +} +``` + +**Task**: +```json +{ + "description": "What is the total revenue for Q4 2024?", + "expected_output": "Revenue figure with analysis", + "tool_configs": { + "PowerBIAnalysisTool": { + "tenant_id": "your-tenant-id", + "client_id": "your-client-id", + "semantic_model_id": "your-model-id", + "workspace_id": "your-workspace-id", + "auth_method": "service_principal", + "databricks_job_id": 365257288725339 + } + } +} +``` + +#### 4. Run Workflow + +1. Click **Run Crew** +2. Monitor execution in **Runs** tab +3. Check Databricks **Workflows** for job execution +4. Verify results in execution logs + +### Production Testing (Databricks App) + +#### 1. Deploy to Databricks + +```bash +cd src +python deploy.py --app-name kasal-prod --user-name your-email@domain.com +``` + +#### 2. Configure in Deployed App + +1. Open deployed app URL +2. Navigate to **Configuration** → **API Keys** +3. Add production credentials +4. Enable **PowerBIAnalysisTool** + +#### 3. Create Production Workflow + +Create agent and task using production semantic model IDs and workspace IDs. + +#### 4. End-to-End Test + +1. Run crew execution +2. Monitor Databricks job logs +3. Verify Power BI API calls in Azure AD audit logs +4. Validate results accuracy + +### Sample Test Queries + +**Simple aggregation**: +```json +{ + "question": "What is the total revenue by region?" +} +``` + +**Year-over-year analysis**: +```json +{ + "question": "Compare Q4 2024 revenue to Q4 2023 by product category" +} +``` + +**Trend analysis**: +```json +{ + "question": "Show monthly sales trends for the last 12 months" +} +``` + +--- + +## Troubleshooting + +### Authentication Issues + +**Error**: "Provided OAuth token does not have required scopes" + +**Causes**: +- Missing OAuth scopes in Databricks App configuration +- Service Principal lacks Power BI API permissions + +**Solutions**: +1. Verify Service Principal has **Application** (not Delegated) permissions +2. Ensure admin consent was granted in Azure AD +3. Check Service Principal is enabled in Power BI Admin Portal +4. For Databricks Apps, configure OAuth scopes: `sql`, `all-apis` + +--- + +**Error**: "Authentication failed: 403 Forbidden" + +**Causes**: +- Service Principal doesn't have workspace access +- Incorrect workspace ID + +**Solutions**: +1. Add Service Principal to Power BI workspace with Member/Contributor role +2. Verify workspace_id matches the actual workspace GUID +3. Check Power BI audit logs for access denied events + +--- + +### Configuration Issues + +**Error**: "tenant_id showing as 'your_tenant_id'" + +**Cause**: LLM-provided placeholder values taking precedence over task config + +**Solution**: Verify task configuration priority in tool_factory.py - task config should override LLM values + +--- + +**Error**: "semantic_model_id truncated or incorrect" + +**Cause**: dashboard_id from kwargs overriding task config value + +**Solution**: Check powerbi_analysis_tool.py lines 314-316 for proper priority handling + +--- + +**Error**: "Missing databricks_host or databricks_token in job parameters" + +**Cause**: Credentials not being passed to job parameters + +**Solution**: Verify powerbi_analysis_tool.py lines 411-418 add credentials to job_params + +--- + +### Job Execution Issues + +**Error**: "Databricks job times out" + +**Causes**: +- Large dataset +- Complex DAX query +- Insufficient cluster resources + +**Solutions**: +1. Increase job timeout in tool configuration +2. Optimize DAX query for performance +3. Use more powerful cluster for the job +4. Consider breaking analysis into smaller queries + +--- + +**Error**: "Dataset.Read.All permission not found" + +**Cause**: Using Delegated permission instead of Application permission + +**Solution**: +1. Go to Azure AD → App registrations → API permissions +2. Remove Delegated permissions +3. Add **Application** permission: Dataset.Read.All +4. Grant admin consent + +--- + +**Error**: "Client secret expired" + +**Cause**: Azure client secrets expire after set period + +**Solution**: +1. Create new client secret in Azure Portal +2. Update `POWERBI_CLIENT_SECRET` in API Keys +3. Rotate secrets regularly (recommended: every 90 days) + +--- + +## Security & Best Practices + +### Credential Management + +1. **Use API Keys Service**: + - All credentials stored encrypted at rest + - Multi-tenant isolation via group_id + - Never commit credentials to source control + +2. **Rotate Credentials Regularly**: + - Rotate Service Principal secrets every 90 days + - Use Azure Key Vault for production deployments + - Monitor credential usage in audit logs + +3. **Principle of Least Privilege**: + - Only grant workspace access where needed + - Use Power BI RLS (Row-Level Security) for data filtering + - Limit Service Principal to read-only permissions + +### Production Secret Management with Key Vaults + +For production deployments, **never pass credentials directly as job parameters**. Instead, use key vault references: + +#### Architecture Pattern + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Kasal App │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Pass secret names only + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Databricks Job │ +│ Parameters: │ +│ { │ +│ "client_secret_key": "powerbi-client-secret" ← Secret name +│ "username_key": "powerbi-username" ← Secret name +│ } │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + │ Retrieve actual values + ā–¼ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ Key Vault Storage │ +│ (Azure Key Vault, │ +│ Databricks Secrets, │ +│ AWS Secrets Manager) │ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +#### Option 1: Azure Key Vault (Recommended for Azure) + +**Setup Azure Key Vault:** + +1. **Create Key Vault** in Azure Portal +2. **Add Secrets**: + - `powerbi-client-secret`: Service Principal secret + - `powerbi-username`: Service account username + - `powerbi-password`: Service account password + - `databricks-token`: Databricks PAT + +3. **Grant Access** to Databricks workspace: + - Use Managed Identity or Service Principal + - Assign "Key Vault Secrets User" role + +**Configure Databricks to Access Azure Key Vault:** + +```bash +# Create secret scope backed by Azure Key Vault +databricks secrets create-scope --scope azure-key-vault \ + --scope-backend-type AZURE_KEYVAULT \ + --resource-id /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.KeyVault/vaults/{vault-name} \ + --dns-name https://{vault-name}.vault.azure.net/ +``` + +**Notebook Code (Secure Approach):** + +```python +import os + +# Retrieve secrets from Databricks secret scope (backed by Azure Key Vault) +client_secret = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-client-secret") +username = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-username") +password = dbutils.secrets.get(scope="azure-key-vault", key="powerbi-password") +databricks_token = dbutils.secrets.get(scope="azure-key-vault", key="databricks-token") + +# Use credentials for authentication +powerbi_config = { + "tenant_id": dbutils.widgets.get("tenant_id"), + "client_id": dbutils.widgets.get("client_id"), + "client_secret": client_secret, # Retrieved from Key Vault + "username": username, # Retrieved from Key Vault + "password": password # Retrieved from Key Vault +} +``` + +**Job Parameters (No Sensitive Data):** + +```json +{ + "question": "Analyze Q4 revenue", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9" +} +``` + +**Note**: No secrets in job parameters! All retrieved from Key Vault. + +--- + +#### Option 2: Databricks Secrets (Recommended for Multi-Cloud) + +**Setup Databricks Secrets:** + +```bash +# Create secret scope +databricks secrets create-scope --scope powerbi-secrets + +# Add secrets +databricks secrets put --scope powerbi-secrets --key client-secret +databricks secrets put --scope powerbi-secrets --key username +databricks secrets put --scope powerbi-secrets --key password +databricks secrets put --scope powerbi-secrets --key databricks-token +``` + +**Notebook Code:** + +```python +# Retrieve secrets from Databricks secret scope +client_secret = dbutils.secrets.get(scope="powerbi-secrets", key="client-secret") +username = dbutils.secrets.get(scope="powerbi-secrets", key="username") +password = dbutils.secrets.get(scope="powerbi-secrets", key="password") +databricks_token = dbutils.secrets.get(scope="powerbi-secrets", key="databricks-token") + +# No secrets in widgets or parameters +``` + +--- + +#### Option 3: Environment Variables (Development Only) + +**For local development**, use environment variables: + +```python +import os + +# Retrieve from environment +client_secret = os.getenv("POWERBI_CLIENT_SECRET") +username = os.getenv("POWERBI_USERNAME") +password = os.getenv("POWERBI_PASSWORD") +databricks_token = os.getenv("DATABRICKS_TOKEN") +``` + +**Set environment variables** in Databricks job cluster configuration: + +```json +{ + "spark_env_vars": { + "POWERBI_CLIENT_SECRET": "{{secrets/powerbi-secrets/client-secret}}", + "POWERBI_USERNAME": "{{secrets/powerbi-secrets/username}}", + "POWERBI_PASSWORD": "{{secrets/powerbi-secrets/password}}" + } +} +``` + +--- + +#### Security Benefits + +**Why This Approach is More Secure:** + +1. **No Credentials in Logs**: + - Job parameters logged in Databricks job history + - Secret values never appear in logs + - Only secret names/references visible + +2. **Centralized Secret Management**: + - Single source of truth for all secrets + - Consistent access control policies + - Easier audit and compliance + +3. **Secret Rotation Without Code Changes**: + - Update secret in Key Vault + - No need to update job parameters + - No application redeployment required + +4. **Audit Trail**: + - Key Vault logs all secret access + - Track who/what accessed secrets and when + - Compliance reporting built-in + +5. **Principle of Least Privilege**: + - Grant secret access only to specific jobs/clusters + - Scope-based access control + - Temporary access tokens + +--- + +#### Migration from Direct Credentials + +**Current Approach** (Less Secure): + +```python +# Job parameters include secrets +{ + "client_secret": "xxxx-YOUR-SECRET-HERE-xxxx", # āŒ INSECURE - Never do this! + "password": "YourPassword123" # āŒ INSECURE - Never do this! +} +``` + +**Secure Approach** (Recommended): + +```python +# Job parameters reference secret names only +{ + "secret_scope": "azure-key-vault", # āœ… SECURE + "client_secret_key": "powerbi-client-secret" # āœ… SECURE +} + +# In notebook +client_secret = dbutils.secrets.get( + scope=dbutils.widgets.get("secret_scope"), + key=dbutils.widgets.get("client_secret_key") +) +``` + +--- + +#### Implementation Checklist + +**For Production Deployment:** + +- [ ] Create Azure Key Vault or Databricks secret scope +- [ ] Migrate all credentials to Key Vault +- [ ] Update notebook to retrieve secrets via `dbutils.secrets.get()` +- [ ] Remove credentials from job parameters +- [ ] Grant Key Vault access to Databricks workspace +- [ ] Test secret retrieval in notebook +- [ ] Verify credentials not visible in job logs +- [ ] Set up Key Vault access audit logging +- [ ] Document secret rotation procedures +- [ ] Configure alerts for unauthorized access attempts + +--- + +#### Example: Secure Notebook Implementation + +```python +# Power BI Analysis Notebook - Secure Implementation + +import os +from azure.identity import ClientSecretCredential + +# Widget parameters (non-sensitive) +question = dbutils.widgets.get("question") +semantic_model_id = dbutils.widgets.get("semantic_model_id") +workspace_id = dbutils.widgets.get("workspace_id") +tenant_id = dbutils.widgets.get("tenant_id") +client_id = dbutils.widgets.get("client_id") +auth_method = dbutils.widgets.get("auth_method") + +# Retrieve secrets from Key Vault (secure) +secret_scope = "azure-key-vault" # or from widget parameter +client_secret = dbutils.secrets.get(scope=secret_scope, key="powerbi-client-secret") +username = dbutils.secrets.get(scope=secret_scope, key="powerbi-username") +password = dbutils.secrets.get(scope=secret_scope, key="powerbi-password") +databricks_token = dbutils.secrets.get(scope=secret_scope, key="databricks-token") + +# Authenticate based on method +if auth_method == "service_principal": + credential = ClientSecretCredential( + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret # From Key Vault + ) +elif auth_method == "device_code": + # Use username/password from Key Vault + credential = authenticate_device_code(username, password) + +# Execute Power BI analysis +result = execute_powerbi_query( + question=question, + semantic_model_id=semantic_model_id, + workspace_id=workspace_id, + credential=credential +) + +print(result) +``` + +**Job Parameters** (All Non-Sensitive): + +```json +{ + "question": "What is the total revenue for Q4 2024?", + "semantic_model_id": "a17de62e-8dc0-4a8a-acaa-2a9954de8c75", + "workspace_id": "bcb084ed-f8c9-422c-b148-29839c0f9227", + "tenant_id": "9f37a392-f0ae-4280-9796-f1864a10effc", + "client_id": "7b597aac-de00-44c9-8e2a-3d2c345c36a9", + "auth_method": "service_principal" +} +``` + +**Note**: Zero credentials in job parameters or logs! + +--- + +### Monitoring + +1. **Power BI Audit Logs**: + - Review regularly for unusual API activity + - Set up alerts for failed authentication attempts + - Track query patterns and data access + +2. **Databricks Job Monitoring**: + - Monitor job execution times + - Track failure rates and error patterns + - Set up alerts for job failures + +3. **Application Logging**: + - Log all Power BI API calls + - Track authentication events + - Monitor tool usage patterns + +### Production Deployment Checklist + +- [ ] Service Principal created with Application permissions +- [ ] Admin consent granted in Azure AD +- [ ] Service Principal enabled in Power BI Admin Portal +- [ ] Workspace access granted to Service Principal +- [ ] API Keys configured in Kasal +- [ ] Databricks job created and tested +- [ ] Tool enabled and configured +- [ ] End-to-end testing completed +- [ ] Monitoring and alerts configured +- [ ] Credential rotation policy established + +--- + +## Appendix + +### Email Template for Azure Admin + +Use this template when requesting Azure admin assistance: + +``` +Subject: Azure AD Admin Consent Required for Power BI Service Principal + +Hi [Admin Name], + +I need admin consent for a Service Principal to enable automated Power BI data access for our Kasal AI platform. + +**Service Principal Details:** +- Name: Kasal-PowerBI-Connector +- App ID: [Your Application ID] +- Requested Permission: Power BI Service → Dataset.Read.All (Application) + +**Steps Required:** +1. Go to Azure Portal → Azure AD → App registrations → Kasal-PowerBI-Connector +2. Go to "API permissions" +3. Remove any Delegated Dataset.Read.All permission +4. Add Application permission: Power BI Service → Dataset.Read.All (Application, not Delegated) +5. Click "Grant admin consent for [Organization]" + +**Additionally:** +- Enable Service Principal in Power BI Admin Portal under "Developer settings" +- Allow service principals to use Power BI APIs + +**Test Plan:** +After setup, I will test by running the Kasal app: [Your App URL] + +Let me know if you have any questions! + +Best regards, +[Your Name] +``` + +--- + +## References + +- **Power BI REST API**: https://learn.microsoft.com/en-us/rest/api/power-bi/ +- **DAX Query Language**: https://learn.microsoft.com/en-us/dax/ +- **Azure Identity SDK**: https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme +- **Databricks Jobs API**: https://docs.databricks.com/dev-tools/api/latest/jobs.html +- **CrewAI Documentation**: https://docs.crewai.com/ +- **Kasal API Documentation**: [api_endpoints.md](api_endpoints.md) + +--- + +**Document Version**: 2.0 +**Last Updated**: 2025-12-04 +**Maintained By**: Kasal Development Team diff --git a/src/requirements.txt b/src/requirements.txt index 7d66b0c4..962f380e 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -39,6 +39,7 @@ rich>=13.7.0,<14.0.0 # Required by embedchain 0.1.128 pytest-cov psutil # For process management and cleanup in ProcessCrewExecutor mlflow==3.4.0 # For LLM observability and tracking in Databricks +azure-identity>=1.15.0 # For Power BI authentication (UsernamePasswordCredential) protobuf>=5.29.0,<6.0.0 # mem0ai compatibility databricks-agents>=1.4.0 grpcio-status>=1.62.0,<1.71.0 # Compatible with protobuf <6.0.0 diff --git a/test_powerbi_connection_interactive.ipynb b/test_powerbi_connection_interactive.ipynb new file mode 100644 index 00000000..d55a844d --- /dev/null +++ b/test_powerbi_connection_interactive.ipynb @@ -0,0 +1,1295 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "a74d3571-a9fb-49aa-b68b-b482749a56c1", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "# Power BI Connection Test - Interactive Authentication\n", + "Uses browser-based authentication to support MFA.\n", + "\n", + "**Dataset Details:**\n", + "- Workspace ID: `bcb084ed-f8c9-422c-b148-29839c0f9227`\n", + "- Semantic Model ID: `a17de62e-8dc0-4a8a-acaa-2a9954de8c75`" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "cd36b664-d50d-4139-9406-2ff5999576f5", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting azure-identity\n Downloading azure_identity-1.25.1-py3-none-any.whl (191 kB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 191.3/191.3 kB 2.7 MB/s eta 0:00:00\nRequirement already satisfied: requests in /databricks/python3/lib/python3.10/site-packages (2.28.1)\nRequirement already satisfied: pandas in /databricks/python3/lib/python3.10/site-packages (1.5.3)\nCollecting azure-core>=1.31.0\n Downloading azure_core-1.35.1-py3-none-any.whl (211 kB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.8/211.8 kB 6.8 MB/s eta 0:00:00\nCollecting msal-extensions>=1.2.0\n Downloading msal_extensions-1.3.1-py3-none-any.whl (20 kB)\nCollecting msal>=1.30.0\n Downloading msal-1.34.0-py3-none-any.whl (116 kB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 117.0/117.0 kB 16.5 MB/s eta 0:00:00\nRequirement already satisfied: typing-extensions>=4.0.0 in /databricks/python3/lib/python3.10/site-packages (from azure-identity) (4.4.0)\nRequirement already satisfied: cryptography>=2.5 in /databricks/python3/lib/python3.10/site-packages (from azure-identity) (39.0.1)\nRequirement already satisfied: certifi>=2017.4.17 in /databricks/python3/lib/python3.10/site-packages (from requests) (2022.12.7)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /databricks/python3/lib/python3.10/site-packages (from requests) (1.26.14)\nRequirement already satisfied: idna<4,>=2.5 in /databricks/python3/lib/python3.10/site-packages (from requests) (3.4)\nRequirement already satisfied: charset-normalizer<3,>=2 in /databricks/python3/lib/python3.10/site-packages (from requests) (2.0.4)\nRequirement already satisfied: python-dateutil>=2.8.1 in /databricks/python3/lib/python3.10/site-packages (from pandas) (2.8.2)\nRequirement already satisfied: pytz>=2020.1 in /databricks/python3/lib/python3.10/site-packages (from pandas) (2022.7)\nRequirement already satisfied: numpy>=1.21.0 in /databricks/python3/lib/python3.10/site-packages (from pandas) (1.23.5)\nCollecting typing-extensions>=4.0.0\n Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 44.6/44.6 kB 4.2 MB/s eta 0:00:00\nRequirement already satisfied: six>=1.11.0 in /usr/lib/python3/dist-packages (from azure-core>=1.31.0->azure-identity) (1.16.0)\nRequirement already satisfied: cffi>=1.12 in /databricks/python3/lib/python3.10/site-packages (from cryptography>=2.5->azure-identity) (1.15.1)\nRequirement already satisfied: PyJWT[crypto]<3,>=1.0.0 in /usr/lib/python3/dist-packages (from msal>=1.30.0->azure-identity) (2.3.0)\nRequirement already satisfied: pycparser in /databricks/python3/lib/python3.10/site-packages (from cffi>=1.12->cryptography>=2.5->azure-identity) (2.21)\nInstalling collected packages: typing-extensions, azure-core, msal, msal-extensions, azure-identity\n Attempting uninstall: typing-extensions\n Found existing installation: typing_extensions 4.4.0\n Not uninstalling typing-extensions at /databricks/python3/lib/python3.10/site-packages, outside environment /local_disk0/.ephemeral_nfs/envs/pythonEnv-94f3a408-c949-41c7-a53f-be5af906856c\n Can't uninstall 'typing_extensions'. No files were found to uninstall.\nSuccessfully installed azure-core-1.35.1 azure-identity-1.25.1 msal-1.34.0 msal-extensions-1.3.1 typing-extensions-4.15.0\n\u001B[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.\u001B[0m\n" + ] + } + ], + "source": [ + "%pip install azure-identity requests pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "3d76428c-bd99-4e68-87f5-9e727ea988ca", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Configuration loaded\n" + ] + } + ], + "source": [ + "from azure.identity import InteractiveBrowserCredential, DeviceCodeCredential\n", + "import requests\n", + "import pandas as pd\n", + "\n", + "# Configuration\n", + "TENANT_ID = \"9f37a392-f0ae-4280-9796-f1864a10effc\" # Your tenant ID\n", + "CLIENT_ID = \"1950a258-227b-4e31-a9cf-717495945fc2\" # Power BI public client\n", + "\n", + "# Dataset information\n", + "WORKSPACE_ID = \"bcb084ed-f8c9-422c-b148-29839c0f9227\"\n", + "SEMANTIC_MODEL_ID = \"a17de62e-8dc0-4a8a-acaa-2a9954de8c75\"\n", + "\n", + "print(\"Configuration loaded\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "f0e33a80-9c41-4d30-b2b8-c66f841e3331", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Method 2: Device Code Flow (Best for Databricks/remote environments)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "77344e9e-ef8e-47ed-b5c4-b502dcc62c66", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "\nāš ļø Follow the instructions above to authenticate\nTo sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code BHFEVBK7L to authenticate.\nāœ“ Token generated successfully\nToken length: 2048 characters\n" + ] + } + ], + "source": [ + "def generate_token_device_code(tenant_id: str, client_id: str) -> str:\n", + " \"\"\"\n", + " Generate token using device code flow.\n", + " You'll get a code to enter at microsoft.com/devicelogin\n", + " \"\"\"\n", + " try:\n", + " credential = DeviceCodeCredential(\n", + " client_id=client_id,\n", + " tenant_id=tenant_id,\n", + " )\n", + " \n", + " # Get token for Power BI API\n", + " print(\"\\nāš ļø Follow the instructions above to authenticate\")\n", + " token = credential.get_token(\"https://analysis.windows.net/powerbi/api/.default\")\n", + " print(\"āœ“ Token generated successfully\")\n", + " return token.token\n", + " except Exception as e:\n", + " print(f\"āœ— Token generation failed: {str(e)}\")\n", + " raise\n", + "\n", + "# Uncomment to use device code flow instead\n", + "access_token = generate_token_device_code(TENANT_ID, CLIENT_ID)\n", + "print(f\"Token length: {len(access_token)} characters\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3cf2e913-baff-42f7-ae12-0b3a92556f58", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Test Connection - Get Dataset Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "884deb1e-1182-4f30-a913-9b5137aad440", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Response status: 200\nāœ“ Successfully connected to dataset\n\nDataset Name: test_pbi\nDataset ID: a17de62e-8dc0-4a8a-acaa-2a9954de8c75\nIs Refreshable: True\n" + ] + } + ], + "source": [ + "def get_dataset_info(token: str, dataset_id: str) -> dict:\n", + " \"\"\"\n", + " Get metadata about the dataset to verify connection.\n", + " \"\"\"\n", + " url = f\"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}\"\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + " \n", + " response = requests.get(url, headers=headers)\n", + " print(f\"Response status: {response.status_code}\")\n", + " \n", + " if response.status_code == 200:\n", + " print(\"āœ“ Successfully connected to dataset\")\n", + " return response.json()\n", + " else:\n", + " print(f\"āœ— Failed to connect: {response.text}\")\n", + " return {}\n", + "\n", + "# Get dataset info\n", + "dataset_info = get_dataset_info(access_token, SEMANTIC_MODEL_ID)\n", + "if dataset_info:\n", + " print(f\"\\nDataset Name: {dataset_info.get('name', 'N/A')}\")\n", + " print(f\"Dataset ID: {dataset_info.get('id', 'N/A')}\")\n", + " print(f\"Is Refreshable: {dataset_info.get('isRefreshable', 'N/A')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "2b8dfe2d-8ebc-4e71-939b-93876e1adccd", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Execute DAX Query" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f3320ba2-f1ce-4214-8a24-df5a741ba5a3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "def execute_dax_query(token: str, dataset_id: str, dax_query: str) -> pd.DataFrame:\n", + " \"\"\"\n", + " Execute a DAX query against the Power BI dataset.\n", + " \"\"\"\n", + " url = f\"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/executeQueries\"\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + " \n", + " body = {\n", + " \"queries\": [\n", + " {\n", + " \"query\": dax_query\n", + " }\n", + " ],\n", + " \"serializerSettings\": {\n", + " \"includeNulls\": True\n", + " }\n", + " }\n", + " \n", + " print(f\"Executing DAX query...\")\n", + " print(f\"Query: {dax_query[:100]}...\")\n", + " response = requests.post(url, headers=headers, json=body, timeout=30)\n", + " print(f\"Response status: {response.status_code}\")\n", + " \n", + " if response.status_code == 200:\n", + " results = response.json().get(\"results\", [])\n", + " if results and results[0].get(\"tables\"):\n", + " rows = results[0][\"tables\"][0].get(\"rows\", [])\n", + " if rows:\n", + " df = pd.DataFrame(rows)\n", + " print(f\"āœ“ Query successful: {len(df)} rows returned\")\n", + " return df\n", + " else:\n", + " print(\"⚠ Query returned no rows\")\n", + " return pd.DataFrame()\n", + " else:\n", + " print(\"⚠ No tables in response\")\n", + " return pd.DataFrame()\n", + " else:\n", + " print(f\"āœ— Query failed: {response.text}\")\n", + " return pd.DataFrame()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "caad1ca4-d2cc-4950-82f9-9c2c011f57f0", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Query Your Test Data\n", + "Replace `test_data` with the actual table name from the results above" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1faf9422-0ac5-4557-87db-c76e2c8bd025", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing DAX query...\nQuery: \nEVALUATE\nTOPN(\n 100,\n TestData\n)\n...\nResponse status: 200\nāœ“ Query successful: 100 rows returned\n\nQuery Results:\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "
    TestData[fiscper]TestData[country]TestData[product]TestData[nsr]TestData[cogs]TestData[net_income]
    2025001USproduct_a948098.35550339.99284132.16
    2025001USproduct_b814986.7484678.3221491.96
    2025001USproduct_c140653.3964144.5958286.37
    2025001CHproduct_a750605.98348458.51318911.28
    2025001CHproduct_b362438.24146104.42172259.95
    2025001CHproduct_c511223.14229828.67209732.01
    2025001JPproduct_a828814.79341783.11329301.18
    2025001JPproduct_b297355.43159337.884615.56
    2025001JPproduct_c347502.0148410.79132671.56
    2025002USproduct_a529548.09245267.82205204.95
    2025002USproduct_b838237.26345192.75357337.11
    2025002USproduct_c565527.11264761.07189725.91
    2025002CHproduct_a247105.7698942.41105444.47
    2025002CHproduct_b259029.1131215.7384001.9
    2025002CHproduct_c363869.32190673.07119986.33
    2025002JPproduct_a837276.11393878.8359277.27
    2025002JPproduct_b834042.08361367.97353984.66
    2025002JPproduct_c865819.92348139.38354761.06
    2025003USproduct_a866423.2377260.22318189.9
    2025003USproduct_b631029.56287242.75271411.25
    2025003USproduct_c732655.42318781.35329515.18
    2025003CHproduct_a370891.6186642.92138023.76
    2025003CHproduct_b472839.91274611.34137700.57
    2025003CHproduct_c503415.04261022.8146146.67
    2025003JPproduct_a400256.44194567.62150389.96
    2025003JPproduct_b592364.93312406.59191022.75
    2025003JPproduct_c392035.2205594.91139755.02
    2025004USproduct_a856057.56380036.43387620.31
    2025004USproduct_b354994.92197527.49106098.15
    2025004USproduct_c132814.0755577.4560671.25
    2025004CHproduct_a171007.9185657.557447.69
    2025004CHproduct_b723828.94367105.04233784.42
    2025004CHproduct_c703144.12339293.39287776.1
    2025004JPproduct_a879589.72469879.43287573.35
    2025004JPproduct_b423806.24209891.23140790.61
    2025004JPproduct_c123921.4356114.854744.15
    2025005USproduct_a287101.36128088.06121769.78
    2025005USproduct_b120840.463250.2134025.58
    2025005USproduct_c859822.82438638.52319823.53
    2025005CHproduct_a918616.51476375.02345263.69
    2025005CHproduct_b463836.57225411.91164363.35
    2025005CHproduct_c150400.0676830.157734.0
    2025005JPproduct_a130533.9474303.4142639.87
    2025005JPproduct_b276648.51149847.7677974.71
    2025005JPproduct_c966021.36549117.68226348.08
    2025006USproduct_a697734.41321209.98269287.17
    2025006USproduct_b509537.69216742.39196649.56
    2025006USproduct_c599681.96338606.69152776.68
    2025006CHproduct_a691502.97306733.92305841.85
    2025006CHproduct_b977070.61555779.77323475.14
    2025006CHproduct_c446652.57257604.52124964.86
    2025006JPproduct_a300708.41138909.84125055.35
    2025006JPproduct_b641637.91266513.99299388.57
    2025006JPproduct_c894812.82374904.8382273.77
    2025007USproduct_a106321.2660493.2134829.92
    2025007USproduct_b204861.35114522.5254867.92
    2025007USproduct_c933831.51381047.87367784.23
    2025007CHproduct_a383839.98161336.15182516.17
    2025007CHproduct_b707722.5323192.11291995.74
    2025007CHproduct_c544842.77262207.99224891.86
    2025007JPproduct_a308222.24137888.67122091.67
    2025007JPproduct_b728837.53426422.53213908.35
    2025007JPproduct_c712283.42422025.72150220.72
    2025008USproduct_a979021.74556814.66229460.27
    2025008USproduct_b178367.18100991.7958554.17
    2025008USproduct_c181876.6390186.1669962.44
    2025008CHproduct_a836401.83376402.19368927.21
    2025008CHproduct_b563077.39295250.4175508.79
    2025008CHproduct_c952572.58443937.78329456.24
    2025008JPproduct_a922470.7463955.03323906.98
    2025008JPproduct_b539467.23245846.09231790.61
    2025008JPproduct_c378505.15159260.4179402.68
    2025009USproduct_a960707.21515890.38334324.46
    2025009USproduct_b777976.74400134.77241917.79
    2025009USproduct_c110103.651263.4839458.96
    2025009CHproduct_a235029.18129361.0971326.23
    2025009CHproduct_b482250.81207154.3178737.23
    2025009CHproduct_c396707.9205878.24145895.5
    2025009JPproduct_a112621.6957853.2142786.64
    2025009JPproduct_b658192.23357318.77190031.57
    2025009JPproduct_c720227.45301253.92296465.66
    2025010USproduct_a392562.89227032.43108339.83
    2025010USproduct_b719366.66325648.44292073.28
    2025010USproduct_c186753.64111147.144941.28
    2025010CHproduct_a226464.0994389.44106109.91
    2025010CHproduct_b631766.81296887.51243311.91
    2025010CHproduct_c600591.27255431.5283562.52
    2025010JPproduct_a956333.99437594.14407157.02
    2025010JPproduct_b970552.32508488.75362467.57
    2025010JPproduct_c505769.26299983.96148422.92
    2025011USproduct_a732979.6374060.51232885.1
    2025011USproduct_b870887.45392455.89341730.08
    2025011USproduct_c439435.11193609.34165449.12
    2025011CHproduct_a779961.29404085.06231809.21
    2025011CHproduct_b810429.69411255.96294956.47
    2025011CHproduct_c165180.2484361.4747919.3
    2025011JPproduct_a769879.43433069.36198758.52
    2025011JPproduct_b926353.07506305.7236142.17
    2025011JPproduct_c809579.22435692.65276765.56
    2025012USproduct_a768095.96409728.92275510.8
    " + ] + }, + "metadata": { + "application/vnd.databricks.v1+output": { + "addedWidgets": {}, + "aggData": [], + "aggError": "", + "aggOverflow": false, + "aggSchema": [], + "aggSeriesLimitReached": false, + "aggType": "", + "arguments": {}, + "columnCustomDisplayInfos": {}, + "data": [ + [ + 2025001, + "US", + "product_a", + 948098.35, + 550339.99, + 284132.16 + ], + [ + 2025001, + "US", + "product_b", + 814986.7, + 484678.3, + 221491.96 + ], + [ + 2025001, + "US", + "product_c", + 140653.39, + 64144.59, + 58286.37 + ], + [ + 2025001, + "CH", + "product_a", + 750605.98, + 348458.51, + 318911.28 + ], + [ + 2025001, + "CH", + "product_b", + 362438.24, + 146104.42, + 172259.95 + ], + [ + 2025001, + "CH", + "product_c", + 511223.14, + 229828.67, + 209732.01 + ], + [ + 2025001, + "JP", + "product_a", + 828814.79, + 341783.11, + 329301.18 + ], + [ + 2025001, + "JP", + "product_b", + 297355.43, + 159337.8, + 84615.56 + ], + [ + 2025001, + "JP", + "product_c", + 347502.0, + 148410.79, + 132671.56 + ], + [ + 2025002, + "US", + "product_a", + 529548.09, + 245267.82, + 205204.95 + ], + [ + 2025002, + "US", + "product_b", + 838237.26, + 345192.75, + 357337.11 + ], + [ + 2025002, + "US", + "product_c", + 565527.11, + 264761.07, + 189725.91 + ], + [ + 2025002, + "CH", + "product_a", + 247105.76, + 98942.41, + 105444.47 + ], + [ + 2025002, + "CH", + "product_b", + 259029.1, + 131215.73, + 84001.9 + ], + [ + 2025002, + "CH", + "product_c", + 363869.32, + 190673.07, + 119986.33 + ], + [ + 2025002, + "JP", + "product_a", + 837276.11, + 393878.8, + 359277.27 + ], + [ + 2025002, + "JP", + "product_b", + 834042.08, + 361367.97, + 353984.66 + ], + [ + 2025002, + "JP", + "product_c", + 865819.92, + 348139.38, + 354761.06 + ], + [ + 2025003, + "US", + "product_a", + 866423.2, + 377260.22, + 318189.9 + ], + [ + 2025003, + "US", + "product_b", + 631029.56, + 287242.75, + 271411.25 + ], + [ + 2025003, + "US", + "product_c", + 732655.42, + 318781.35, + 329515.18 + ], + [ + 2025003, + "CH", + "product_a", + 370891.6, + 186642.92, + 138023.76 + ], + [ + 2025003, + "CH", + "product_b", + 472839.91, + 274611.34, + 137700.57 + ], + [ + 2025003, + "CH", + "product_c", + 503415.04, + 261022.8, + 146146.67 + ], + [ + 2025003, + "JP", + "product_a", + 400256.44, + 194567.62, + 150389.96 + ], + [ + 2025003, + "JP", + "product_b", + 592364.93, + 312406.59, + 191022.75 + ], + [ + 2025003, + "JP", + "product_c", + 392035.2, + 205594.91, + 139755.02 + ], + [ + 2025004, + "US", + "product_a", + 856057.56, + 380036.43, + 387620.31 + ], + [ + 2025004, + "US", + "product_b", + 354994.92, + 197527.49, + 106098.15 + ], + [ + 2025004, + "US", + "product_c", + 132814.07, + 55577.45, + 60671.25 + ], + [ + 2025004, + "CH", + "product_a", + 171007.91, + 85657.5, + 57447.69 + ], + [ + 2025004, + "CH", + "product_b", + 723828.94, + 367105.04, + 233784.42 + ], + [ + 2025004, + "CH", + "product_c", + 703144.12, + 339293.39, + 287776.1 + ], + [ + 2025004, + "JP", + "product_a", + 879589.72, + 469879.43, + 287573.35 + ], + [ + 2025004, + "JP", + "product_b", + 423806.24, + 209891.23, + 140790.61 + ], + [ + 2025004, + "JP", + "product_c", + 123921.43, + 56114.8, + 54744.15 + ], + [ + 2025005, + "US", + "product_a", + 287101.36, + 128088.06, + 121769.78 + ], + [ + 2025005, + "US", + "product_b", + 120840.4, + 63250.21, + 34025.58 + ], + [ + 2025005, + "US", + "product_c", + 859822.82, + 438638.52, + 319823.53 + ], + [ + 2025005, + "CH", + "product_a", + 918616.51, + 476375.02, + 345263.69 + ], + [ + 2025005, + "CH", + "product_b", + 463836.57, + 225411.91, + 164363.35 + ], + [ + 2025005, + "CH", + "product_c", + 150400.06, + 76830.1, + 57734.0 + ], + [ + 2025005, + "JP", + "product_a", + 130533.94, + 74303.41, + 42639.87 + ], + [ + 2025005, + "JP", + "product_b", + 276648.51, + 149847.76, + 77974.71 + ], + [ + 2025005, + "JP", + "product_c", + 966021.36, + 549117.68, + 226348.08 + ], + [ + 2025006, + "US", + "product_a", + 697734.41, + 321209.98, + 269287.17 + ], + [ + 2025006, + "US", + "product_b", + 509537.69, + 216742.39, + 196649.56 + ], + [ + 2025006, + "US", + "product_c", + 599681.96, + 338606.69, + 152776.68 + ], + [ + 2025006, + "CH", + "product_a", + 691502.97, + 306733.92, + 305841.85 + ], + [ + 2025006, + "CH", + "product_b", + 977070.61, + 555779.77, + 323475.14 + ], + [ + 2025006, + "CH", + "product_c", + 446652.57, + 257604.52, + 124964.86 + ], + [ + 2025006, + "JP", + "product_a", + 300708.41, + 138909.84, + 125055.35 + ], + [ + 2025006, + "JP", + "product_b", + 641637.91, + 266513.99, + 299388.57 + ], + [ + 2025006, + "JP", + "product_c", + 894812.82, + 374904.8, + 382273.77 + ], + [ + 2025007, + "US", + "product_a", + 106321.26, + 60493.21, + 34829.92 + ], + [ + 2025007, + "US", + "product_b", + 204861.35, + 114522.52, + 54867.92 + ], + [ + 2025007, + "US", + "product_c", + 933831.51, + 381047.87, + 367784.23 + ], + [ + 2025007, + "CH", + "product_a", + 383839.98, + 161336.15, + 182516.17 + ], + [ + 2025007, + "CH", + "product_b", + 707722.5, + 323192.11, + 291995.74 + ], + [ + 2025007, + "CH", + "product_c", + 544842.77, + 262207.99, + 224891.86 + ], + [ + 2025007, + "JP", + "product_a", + 308222.24, + 137888.67, + 122091.67 + ], + [ + 2025007, + "JP", + "product_b", + 728837.53, + 426422.53, + 213908.35 + ], + [ + 2025007, + "JP", + "product_c", + 712283.42, + 422025.72, + 150220.72 + ], + [ + 2025008, + "US", + "product_a", + 979021.74, + 556814.66, + 229460.27 + ], + [ + 2025008, + "US", + "product_b", + 178367.18, + 100991.79, + 58554.17 + ], + [ + 2025008, + "US", + "product_c", + 181876.63, + 90186.16, + 69962.44 + ], + [ + 2025008, + "CH", + "product_a", + 836401.83, + 376402.19, + 368927.21 + ], + [ + 2025008, + "CH", + "product_b", + 563077.39, + 295250.4, + 175508.79 + ], + [ + 2025008, + "CH", + "product_c", + 952572.58, + 443937.78, + 329456.24 + ], + [ + 2025008, + "JP", + "product_a", + 922470.7, + 463955.03, + 323906.98 + ], + [ + 2025008, + "JP", + "product_b", + 539467.23, + 245846.09, + 231790.61 + ], + [ + 2025008, + "JP", + "product_c", + 378505.15, + 159260.4, + 179402.68 + ], + [ + 2025009, + "US", + "product_a", + 960707.21, + 515890.38, + 334324.46 + ], + [ + 2025009, + "US", + "product_b", + 777976.74, + 400134.77, + 241917.79 + ], + [ + 2025009, + "US", + "product_c", + 110103.6, + 51263.48, + 39458.96 + ], + [ + 2025009, + "CH", + "product_a", + 235029.18, + 129361.09, + 71326.23 + ], + [ + 2025009, + "CH", + "product_b", + 482250.81, + 207154.3, + 178737.23 + ], + [ + 2025009, + "CH", + "product_c", + 396707.9, + 205878.24, + 145895.5 + ], + [ + 2025009, + "JP", + "product_a", + 112621.69, + 57853.21, + 42786.64 + ], + [ + 2025009, + "JP", + "product_b", + 658192.23, + 357318.77, + 190031.57 + ], + [ + 2025009, + "JP", + "product_c", + 720227.45, + 301253.92, + 296465.66 + ], + [ + 2025010, + "US", + "product_a", + 392562.89, + 227032.43, + 108339.83 + ], + [ + 2025010, + "US", + "product_b", + 719366.66, + 325648.44, + 292073.28 + ], + [ + 2025010, + "US", + "product_c", + 186753.64, + 111147.1, + 44941.28 + ], + [ + 2025010, + "CH", + "product_a", + 226464.09, + 94389.44, + 106109.91 + ], + [ + 2025010, + "CH", + "product_b", + 631766.81, + 296887.51, + 243311.91 + ], + [ + 2025010, + "CH", + "product_c", + 600591.27, + 255431.5, + 283562.52 + ], + [ + 2025010, + "JP", + "product_a", + 956333.99, + 437594.14, + 407157.02 + ], + [ + 2025010, + "JP", + "product_b", + 970552.32, + 508488.75, + 362467.57 + ], + [ + 2025010, + "JP", + "product_c", + 505769.26, + 299983.96, + 148422.92 + ], + [ + 2025011, + "US", + "product_a", + 732979.6, + 374060.51, + 232885.1 + ], + [ + 2025011, + "US", + "product_b", + 870887.45, + 392455.89, + 341730.08 + ], + [ + 2025011, + "US", + "product_c", + 439435.11, + 193609.34, + 165449.12 + ], + [ + 2025011, + "CH", + "product_a", + 779961.29, + 404085.06, + 231809.21 + ], + [ + 2025011, + "CH", + "product_b", + 810429.69, + 411255.96, + 294956.47 + ], + [ + 2025011, + "CH", + "product_c", + 165180.24, + 84361.47, + 47919.3 + ], + [ + 2025011, + "JP", + "product_a", + 769879.43, + 433069.36, + 198758.52 + ], + [ + 2025011, + "JP", + "product_b", + 926353.07, + 506305.7, + 236142.17 + ], + [ + 2025011, + "JP", + "product_c", + 809579.22, + 435692.65, + 276765.56 + ], + [ + 2025012, + "US", + "product_a", + 768095.96, + 409728.92, + 275510.8 + ] + ], + "datasetInfos": [], + "dbfsResultPath": null, + "isJsonSchema": true, + "metadata": {}, + "overflow": false, + "plotOptions": { + "customPlotOptions": {}, + "displayType": "table", + "pivotAggregation": null, + "pivotColumns": null, + "xColumns": null, + "yColumns": null + }, + "removedWidgets": [], + "schema": [ + { + "metadata": "{}", + "name": "TestData[fiscper]", + "type": "\"long\"" + }, + { + "metadata": "{}", + "name": "TestData[country]", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "TestData[product]", + "type": "\"string\"" + }, + { + "metadata": "{}", + "name": "TestData[nsr]", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "TestData[cogs]", + "type": "\"double\"" + }, + { + "metadata": "{}", + "name": "TestData[net_income]", + "type": "\"double\"" + } + ], + "type": "table" + } + }, + "output_type": "display_data" + }, + { + "output_type": "stream", + "name": "stdout", + "output_type": "stream", + "text": [ + "\nTotal rows: 100\nColumns: ['TestData[fiscper]', 'TestData[country]', 'TestData[product]', 'TestData[nsr]', 'TestData[cogs]', 'TestData[net_income]']\n" + ] + } + ], + "source": [ + "# Simple query to get all data\n", + "# Replace 'test_data' with your actual table name\n", + "query = \"\"\"\n", + "EVALUATE\n", + "TOPN(\n", + " 100,\n", + " TestData\n", + ")\n", + "\"\"\"\n", + "\n", + "df_result = execute_dax_query(access_token, SEMANTIC_MODEL_ID, query)\n", + "if not df_result.empty:\n", + " print(\"\\nQuery Results:\")\n", + " display(df_result.head(100))\n", + " print(f\"\\nTotal rows: {len(df_result)}\")\n", + " print(f\"Columns: {list(df_result.columns)}\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "test_powerbi_connection_interactive", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file