diff --git a/src/intugle/mcp/adapter/router.py b/src/intugle/mcp/adapter/router.py index 126d761..7aad0cc 100644 --- a/src/intugle/mcp/adapter/router.py +++ b/src/intugle/mcp/adapter/router.py @@ -1,16 +1,16 @@ -from mcp.server.fastmcp import FastMCP +# from mcp.server.fastmcp import FastMCP -from intugle.core.settings import settings -from intugle.mcp.adapter.service import adapter_service +# from intugle.core.settings import settings +# from intugle.mcp.adapter.service import adapter_service -adapter_mcp = FastMCP( - name=settings.MCP_SERVER_NAME, - stateless_http=settings.MCP_SERVER_STATELESS_HTTP, -) +# adapter_mcp = FastMCP( +# name=settings.MCP_SERVER_NAME, +# stateless_http=settings.MCP_SERVER_STATELESS_HTTP, +# ) -@adapter_mcp.tool(name="execute_query", description="Return the result of a query execution") -async def execute_query(sql_query: str) -> list[dict]: - data = adapter_service.execute_query(sql_query) - print(data) - return data +# @adapter_mcp.tool(name="execute_query", description="Return the result of a query execution") +# async def execute_query(sql_query: str) -> list[dict]: +# data = adapter_service.execute_query(sql_query) +# print(data) +# return data diff --git a/src/intugle/mcp/adapter/service.py b/src/intugle/mcp/adapter/service.py index 55e5646..a3ca77c 100644 --- a/src/intugle/mcp/adapter/service.py +++ b/src/intugle/mcp/adapter/service.py @@ -1,44 +1,44 @@ -# from intugle.adapters.factory import AdapterFactory -from intugle.adapters.types.duckdb.duckdb import DuckdbAdapter -from intugle.analysis.models import DataSet -from intugle.mcp.manifest import manifest_loader +# # from intugle.adapters.factory import AdapterFactory +# from intugle.adapters.types.duckdb.duckdb import DuckdbAdapter +# from intugle.analysis.models import DataSet +# from intugle.mcp.manifest import manifest_loader -class AdapterService: - """ - Adapter service for executing queries. - """ +# class AdapterService: +# """ +# Adapter service for executing queries. +# """ - # Not good way to do it Need to create extandable and properly couple with adapter - def __init__(self, adapter: str = "duckdb"): - self.manifest = manifest_loader.manifest - self.adapter = DuckdbAdapter() - self.load_all() +# # Not good way to do it Need to create extandable and properly couple with adapter +# def __init__(self, adapter: str = "duckdb"): +# self.manifest = manifest_loader.manifest +# self.adapter = DuckdbAdapter() +# self.load_all() - def load_all(self): - sources = self.manifest.sources - for source in sources.values(): - table_name = source.table.name - details = source.table.details +# def load_all(self): +# sources = self.manifest.sources +# for source in sources.values(): +# table_name = source.table.name +# details = source.table.details - DataSet(data=details, name=table_name) +# DataSet(data=details, name=table_name) - async def execute_query(self, sql_query: str) -> list[dict]: - """ - Execute a SQL query and return the result. +# async def execute_query(self, sql_query: str) -> list[dict]: +# """ +# Execute a SQL query and return the result. - Args: - sql_query (str): The SQL query to execute. +# Args: +# sql_query (str): The SQL query to execute. - Returns: - list[dict]: The result of the query execution. - """ +# Returns: +# list[dict]: The result of the query execution. +# """ - data = self.adapter.execute(sql_query) +# data = self.adapter.execute(sql_query) - data = [dict(record) for record in data] if data else [] +# data = [dict(record) for record in data] if data else [] - return data +# return data -adapter_service = AdapterService() +# adapter_service = AdapterService() diff --git a/src/intugle/mcp/semantic_layer/router.py b/src/intugle/mcp/semantic_layer/router.py index dd2330f..04fd017 100644 --- a/src/intugle/mcp/semantic_layer/router.py +++ b/src/intugle/mcp/semantic_layer/router.py @@ -1,6 +1,8 @@ from mcp.server.fastmcp import FastMCP from intugle.core.settings import settings + +# from intugle.mcp.adapter.service import adapter_service from intugle.mcp.docs_search.service import docs_search_service from intugle.mcp.semantic_layer.prompt import Prompts from intugle.mcp.semantic_layer.service import semantic_layer_service diff --git a/src/intugle/mcp/server.py b/src/intugle/mcp/server.py index 668024f..25005ae 100644 --- a/src/intugle/mcp/server.py +++ b/src/intugle/mcp/server.py @@ -8,7 +8,8 @@ from starlette.routing import Mount from intugle.core.settings import settings -from intugle.mcp.adapter.router import adapter_mcp + +# from intugle.mcp.adapter.router import adapter_mcp from intugle.mcp.semantic_layer.router import semantic_layer_mcp log = getLogger(__name__) @@ -30,7 +31,7 @@ async def lifespan(app: Starlette): routes=[ # Using settings-based configuration Mount("/semantic_layer", app=semantic_layer_mcp.streamable_http_app()), - Mount("/adapter", app=adapter_mcp.streamable_http_app()), + # Mount("/adapter", app=adapter_mcp.streamable_http_app()), ], lifespan=lifespan ) diff --git a/src/intugle/parser/security.py b/src/intugle/parser/security.py new file mode 100644 index 0000000..4fd4bef --- /dev/null +++ b/src/intugle/parser/security.py @@ -0,0 +1,26 @@ +import re + +from typing import Optional + +# --------------------------------------------------------------------- +# SECURITY HELPERS +# --------------------------------------------------------------------- + +SAFE_IDENTIFIER_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def safe_identifier(name: str) -> str: + """Validate and return a safe SQL identifier (double-quote-wrapped).""" + if not name: + raise ValueError("Identifier cannot be empty.") + if not SAFE_IDENTIFIER_PATTERN.match(name): + raise ValueError(f"Unsafe identifier: {name!r}") + return f"\"{name}\"" + + +def escape_literal(value: Optional[str]) -> str: + """Escape literal values used inside SQL strings.""" + if value is None: + return "NULL" + # For comments, we just need to remove newlines, not wrap in quotes. + return str(value).replace("\n", " ").replace("\r", " ") diff --git a/src/intugle/parser/table_schema.py b/src/intugle/parser/table_schema.py index 9929a33..b8183c7 100644 --- a/src/intugle/parser/table_schema.py +++ b/src/intugle/parser/table_schema.py @@ -1,5 +1,6 @@ from intugle.common.exception import errors from intugle.models.manifest import Manifest +from intugle.parser.security import escape_literal, safe_identifier class TableSchema: @@ -28,21 +29,32 @@ def generate_table_schema(self, table_name: str) -> str: if not table_detail: raise errors.NotFoundError(f"Table {table_name} not found in manifest.") - # Start with the CREATE TABLE statement - schema = f"CREATE TABLE {table_detail.table.name} -- {table_detail.table.description}" - - # Iterate through the columns of the table and create the column definitions - columns_statements = [ - f"\"{column.name}\" {column.type}, -- {column.description}" for column in table_detail.table.columns - ] - - # join the column definitions into a single string - column_schema = "\n".join(columns_statements) - - # Add the column definitions to the schema - schema += "\n(" + column_schema + "\n);" - - return schema + # 1. Define the SQL template with placeholders + schema_template = "CREATE TABLE {table_name} -- {table_comment}\n(\n{column_definitions}\n);" + + # 2. Sanitize all dynamic parts that will go into the template + params = { + "table_name": safe_identifier(table_detail.table.name), + "table_comment": escape_literal(table_detail.table.description), + } + + # Sanitize each column definition separately + column_statements = [] + for column in table_detail.table.columns: + # Here we assume column.type is safe and doesn't come from user input. + # If it can be user-defined, it needs its own validation. + column_template = " {column_name} {column_type}, -- {column_comment}" + column_params = { + "column_name": safe_identifier(column.name), + "column_type": column.type, + "column_comment": escape_literal(column.description), + } + column_statements.append(column_template.format(**column_params)) + + params["column_definitions"] = "\n".join(column_statements) + + # 3. Format the final schema string with the sanitized parameters + return schema_template.format(**params) def get_table_schema(self, table_name: str): """Get the SQL schema for a specified table, generating it if not already cached.