diff --git a/README.md b/README.md index 70a7a9b..4dfcc99 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ SQL-first semantic layer for consistent metrics across your data stack. -- **Formats:** Sidemantic, Cube, MetricFlow (dbt), LookML, Hex, Rill, Superset, Omni, BSL, Snowflake Cortex, Malloy +- **Formats:** Sidemantic, Cube, MetricFlow (dbt), LookML, Hex, Rill, Superset, Omni, BSL, Snowflake Cortex, Malloy, ThoughtSpot TML (YAML; mature, REST API export/import, OSI participant) - **Databases:** DuckDB, MotherDuck, PostgreSQL, BigQuery, Snowflake, ClickHouse, Databricks, Spark SQL [Documentation](https://sidemantic.com) | [GitHub](https://github.com/sidequery/sidemantic) | [Discord](https://discord.com/invite/7MZ4UgSVvF) @@ -169,7 +169,7 @@ See `examples/` for more. - SQL query interface with automatic rewriting - Automatic joins across models -- Multi-format adapters (Cube, MetricFlow, LookML, Hex, Rill, Superset, Omni, BSL) +- Multi-format adapters (Cube, MetricFlow, LookML, Hex, Rill, Superset, Omni, BSL, ThoughtSpot TML) - SQLGlot-based SQL generation and transpilation - Pydantic validation and type safety - Pre-aggregations with automatic routing @@ -180,7 +180,7 @@ See `examples/` for more. ## Multi-Format Support -Auto-detects: Sidemantic (SQL/YAML), Cube, MetricFlow (dbt), LookML, Hex, Rill, Superset, Omni, BSL +Auto-detects: Sidemantic (SQL/YAML), Cube, MetricFlow (dbt), LookML, Hex, Rill, Superset, Omni, BSL, ThoughtSpot TML ```bash sidemantic query "SELECT revenue FROM orders" --models ./my_models diff --git a/sidemantic/adapters/thoughtspot.py b/sidemantic/adapters/thoughtspot.py new file mode 100644 index 0000000..56d40eb --- /dev/null +++ b/sidemantic/adapters/thoughtspot.py @@ -0,0 +1,919 @@ +"""ThoughtSpot TML adapter for importing/exporting semantic models.""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Any + +import yaml + +from sidemantic.adapters.base import BaseAdapter +from sidemantic.core.dimension import Dimension +from sidemantic.core.metric import Metric +from sidemantic.core.model import Model +from sidemantic.core.relationship import Relationship +from sidemantic.core.semantic_graph import SemanticGraph + +_BUCKET_MAP = { + "HOURLY": "hour", + "DAILY": "day", + "WEEKLY": "week", + "MONTHLY": "month", + "QUARTERLY": "quarter", + "YEARLY": "year", +} + +_NUMERIC_TYPES = {"DOUBLE", "FLOAT", "INT32", "INT64", "DECIMAL", "NUMBER"} +_TIME_TYPES = {"DATE", "TIME", "DATETIME", "TIMESTAMP"} +_BOOL_TYPES = {"BOOL", "BOOLEAN"} + +_AGGREGATION_MAP = { + "SUM": "sum", + "COUNT": "count", + "COUNT_DISTINCT": "count_distinct", + "AVERAGE": "avg", + "AVG": "avg", + "MIN": "min", + "MAX": "max", + "MEDIAN": "median", +} + +_UNSUPPORTED_AGG_FUNCS = { + "STD_DEVIATION": "STDDEV", + "VARIANCE": "VARIANCE", +} + +_SIMPLE_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)*$") +_TML_REF = re.compile(r"\[([^\]]+)\]") +_TML_DOT_REF = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)\b") + + +def _normalize(value: Any) -> str | None: + if value is None: + return None + return str(value).strip().upper() + + +def _map_bucket(bucket: str | None) -> str | None: + return _BUCKET_MAP.get(_normalize(bucket) or "") + + +def _map_dimension_type(data_type: str | None, bucket: str | None) -> tuple[str, str | None]: + if bucket: + return "time", bucket + + dtype = _normalize(data_type) + if dtype in _TIME_TYPES: + granularity = "day" if dtype == "DATE" else "hour" + return "time", granularity + if dtype in _BOOL_TYPES: + return "boolean", None + if dtype in _NUMERIC_TYPES: + return "numeric", None + return "categorical", None + + +def _map_aggregation(aggregation: str | None) -> tuple[str | None, str | None]: + if not aggregation: + return None, None + agg = _normalize(aggregation) + if agg in ("NONE", "NO_AGGREGATION"): + return None, None + if agg in _UNSUPPORTED_AGG_FUNCS: + return None, _UNSUPPORTED_AGG_FUNCS[agg] + return _AGGREGATION_MAP.get(agg), None + + +def _convert_tml_expr(expr: str | None, table_path_lookup: dict[str, str] | None = None) -> str | None: + if not expr: + return expr + + def _replace(match: re.Match[str]) -> str: + token = match.group(1) + if "::" in token: + table, column = token.split("::", 1) + if table_path_lookup and table in table_path_lookup: + table = table_path_lookup[table] + return f"{table}.{column}" + return token.replace("::", ".") + + return _TML_REF.sub(_replace, expr) + + +def _parse_ref_token(token: str) -> tuple[str | None, str]: + if "::" in token: + table, column = token.split("::", 1) + return table, column + if "." in token: + table, column = token.split(".", 1) + return table, column + return None, token + + +def _extract_join_refs( + expr: str | None, table_path_lookup: dict[str, str] | None = None +) -> tuple[tuple[str | None, str] | None, tuple[str | None, str] | None]: + if not expr: + return None, None + + tokens = _TML_REF.findall(expr) + if len(tokens) < 2: + tokens = [f"{t[0]}.{t[1]}" for t in _TML_DOT_REF.findall(expr)] + + if len(tokens) < 2: + return None, None + + left = _parse_ref_token(tokens[0]) + right = _parse_ref_token(tokens[1]) + + if table_path_lookup: + if left[0] in table_path_lookup: + left = (table_path_lookup[left[0]], left[1]) + if right[0] in table_path_lookup: + right = (table_path_lookup[right[0]], right[1]) + return left, right + + +def _split_sql_identifier(sql: str | None) -> tuple[str | None, str | None]: + if not sql or not _SIMPLE_IDENTIFIER.match(sql): + return None, None + if "." in sql: + table, column = sql.split(".", 1) + return table, column + return None, sql + + +def _sql_to_tml_expr(expr: str | None, base_table: str, tables: set[str]) -> str | None: + if not expr: + return expr + + expr = expr.replace("{model}.", f"{base_table}.") + + def _replace(match: re.Match[str]) -> str: + table = match.group(1) + column = match.group(2) + if table in tables: + return f"[{table}::{column}]" + return match.group(0) + + return _TML_DOT_REF.sub(_replace, expr) + + +def _split_table_name(table: str | None) -> tuple[str | None, str | None, str | None]: + if not table: + return None, None, None + + parts = table.split(".") + if len(parts) == 3: + return parts[0], parts[1], parts[2] + if len(parts) == 2: + return None, parts[0], parts[1] + return None, None, parts[0] + + +def _simple_column(sql: str | None, fallback: str | None) -> str | None: + if not sql: + return fallback + if _SIMPLE_IDENTIFIER.match(sql): + return sql + return fallback + + +class ThoughtSpotAdapter(BaseAdapter): + """Adapter for ThoughtSpot TML (YAML) tables and worksheets.""" + + def parse(self, source: str | Path) -> SemanticGraph: + """Parse ThoughtSpot TML files into semantic graph.""" + source_path = Path(source) + if not source_path.exists(): + raise FileNotFoundError(f"Path does not exist: {source_path}") + + graph = SemanticGraph() + tml_files: list[Path] = [] + if source_path.is_dir(): + tml_files = ( + list(source_path.rglob("*.tml")) + list(source_path.rglob("*.yml")) + list(source_path.rglob("*.yaml")) + ) + else: + tml_files = [source_path] + + for tml_file in tml_files: + model = self._parse_file(tml_file) + if model: + graph.add_model(model) + + return graph + + def _parse_file(self, file_path: Path) -> Model | None: + with open(file_path) as f: + data = yaml.safe_load(f) + + if not isinstance(data, dict): + return None + + if "table" in data: + return self._parse_table(data.get("table"), data) + if "worksheet" in data: + return self._parse_worksheet(data.get("worksheet"), data) + if "model" in data: + return self._parse_worksheet(data.get("model"), data) + + return None + + def _parse_table(self, table_def: dict[str, Any] | None, full_def: dict[str, Any]) -> Model | None: + if not table_def: + return None + + name = table_def.get("name") or table_def.get("id") + if not name: + return None + + db = table_def.get("db") + schema = table_def.get("schema") + db_table = table_def.get("db_table") or name + + table_name = ".".join([part for part in [db, schema, db_table] if part]) if db_table else None + + dimensions: list[Dimension] = [] + metrics: list[Metric] = [] + + for col_def in table_def.get("columns") or []: + col_name = col_def.get("name") + if not col_name: + continue + + properties = col_def.get("properties") or {} + column_type = _normalize(properties.get("column_type")) or "ATTRIBUTE" + bucket = _map_bucket(properties.get("default_date_bucket")) + data_type = col_def.get("data_type") or (col_def.get("db_column_properties") or {}).get("data_type") + label = col_def.get("custom_name") or col_def.get("display_name") + description = col_def.get("description") + format_pattern = properties.get("format_pattern") + sql = col_def.get("db_column_name") or col_name + + if column_type == "MEASURE": + agg, unsupported_func = _map_aggregation(properties.get("aggregation")) + metric_sql = _convert_tml_expr(sql) + if agg: + metric = Metric( + name=col_name, + agg=agg, + sql=metric_sql, + label=label, + description=description, + format=format_pattern, + ) + else: + if unsupported_func: + metric_sql = f"{unsupported_func}({metric_sql})" if metric_sql else unsupported_func + metric = Metric( + name=col_name, + type="derived", + sql=metric_sql, + label=label, + description=description, + format=format_pattern, + ) + metrics.append(metric) + else: + dim_type, granularity = _map_dimension_type(data_type, bucket) + dim = Dimension( + name=col_name, + type=dim_type, + sql=_convert_tml_expr(sql), + granularity=granularity, + label=label, + description=description, + format=format_pattern, + ) + dimensions.append(dim) + + default_time_dimension = None + default_grain = None + for dim in dimensions: + if dim.type == "time": + default_time_dimension = dim.name + default_grain = dim.granularity + break + + primary_key = "id" + if any(d.name.lower() == "id" for d in dimensions): + primary_key = next(d.name for d in dimensions if d.name.lower() == "id") + + relationships = self._parse_table_relationships(table_def.get("joins_with") or []) + + model = Model( + name=name, + table=table_name, + description=table_def.get("description"), + primary_key=primary_key, + dimensions=dimensions, + metrics=metrics, + relationships=relationships, + default_time_dimension=default_time_dimension, + default_grain=default_grain, + ) + setattr(model, "_source_tml_type", "table") + return model + + def _parse_worksheet(self, worksheet_def: dict[str, Any] | None, full_def: dict[str, Any]) -> Model | None: + if not worksheet_def: + return None + + name = worksheet_def.get("name") + if not name: + return None + + description = worksheet_def.get("description") + tables = worksheet_def.get("tables") or [] + joins = worksheet_def.get("joins") or [] + table_paths = worksheet_def.get("table_paths") or [] + + table_name_lookup = self._table_name_lookup(tables) + table_path_lookup = { + tp.get("id"): table_name_lookup.get(tp.get("table"), tp.get("table")) for tp in table_paths if tp.get("id") + } + + sql, base_table = self._build_join_sql(tables, joins, table_path_lookup, table_name_lookup) + relationships = self._parse_join_relationships(joins, table_path_lookup, table_name_lookup) + + formulas = worksheet_def.get("formulas") or [] + formula_by_id = {f.get("id"): f for f in formulas if f.get("id")} + formula_by_name = {f.get("name"): f for f in formulas if f.get("name")} + + dimensions: list[Dimension] = [] + metrics: list[Metric] = [] + + for col_def in worksheet_def.get("worksheet_columns") or []: + col_name = col_def.get("name") + column_id = col_def.get("column_id") + formula_id = col_def.get("formula_id") + if not col_name: + if formula_id and formula_id in formula_by_id: + col_name = formula_by_id[formula_id].get("name") + elif column_id: + col_name = column_id.split("::")[-1] + + if not col_name: + continue + + properties = col_def.get("properties") or {} + column_type = _normalize(properties.get("column_type")) or "ATTRIBUTE" + bucket = _map_bucket(properties.get("default_date_bucket")) + label = col_def.get("custom_name") or col_def.get("display_name") + description = col_def.get("description") + format_pattern = properties.get("format_pattern") + + sql_expr = None + if formula_id and formula_id in formula_by_id: + sql_expr = formula_by_id[formula_id].get("expr") + elif formula_id and formula_id in formula_by_name: + sql_expr = formula_by_name[formula_id].get("expr") + elif col_name in formula_by_name: + sql_expr = formula_by_name[col_name].get("expr") + + if not sql_expr and column_id: + if "::" in column_id: + path_id, col_ref = column_id.split("::", 1) + table_name = table_path_lookup.get(path_id) or table_name_lookup.get(path_id) + if table_name: + sql_expr = f"{table_name}.{col_ref}" + else: + sql_expr = col_ref + else: + sql_expr = column_id + + sql_expr = _convert_tml_expr(sql_expr, table_path_lookup) + + if column_type == "MEASURE": + agg, unsupported_func = _map_aggregation(properties.get("aggregation")) + metric_sql = sql_expr + if agg: + metric = Metric( + name=col_name, + agg=agg, + sql=metric_sql, + label=label, + description=description, + format=format_pattern, + ) + else: + if unsupported_func: + metric_sql = f"{unsupported_func}({metric_sql})" if metric_sql else unsupported_func + metric = Metric( + name=col_name, + type="derived", + sql=metric_sql, + label=label, + description=description, + format=format_pattern, + ) + metrics.append(metric) + else: + data_type = col_def.get("data_type") or (col_def.get("db_column_properties") or {}).get("data_type") + dim_type, granularity = _map_dimension_type(data_type, bucket) + dim = Dimension( + name=col_name, + type=dim_type, + sql=sql_expr, + granularity=granularity, + label=label, + description=description, + format=format_pattern, + ) + dimensions.append(dim) + + default_time_dimension = None + default_grain = None + for dim in dimensions: + if dim.type == "time": + default_time_dimension = dim.name + default_grain = dim.granularity + break + + primary_key = "id" + if any(d.name.lower() == "id" for d in dimensions): + primary_key = next(d.name for d in dimensions if d.name.lower() == "id") + + model = Model( + name=name, + table=base_table if not sql else None, + sql=sql, + description=description, + primary_key=primary_key, + dimensions=dimensions, + metrics=metrics, + relationships=relationships, + default_time_dimension=default_time_dimension, + default_grain=default_grain, + ) + + if base_table: + setattr(model, "_worksheet_base_table", base_table) + setattr(model, "_source_tml_type", "worksheet") + + return model + + def _build_join_sql( + self, + tables: list[dict[str, Any]], + joins: list[dict[str, Any]], + table_path_lookup: dict[str, str] | None = None, + table_name_lookup: dict[str, str] | None = None, + ) -> tuple[str | None, str | None]: + base_table = None + if tables: + base_table = tables[0].get("name") or tables[0].get("id") + + joined: set[str] = set() + if base_table: + joined.add(base_table) + + clauses: list[str] = [] + if base_table: + clauses.append(base_table) + + for join_def in joins: + source = join_def.get("source") + destination = join_def.get("destination") + join_type = _normalize(join_def.get("type")) or "INNER" + on_value = join_def.get("on") + if on_value is None and True in join_def: + on_value = join_def.get(True) + on_expr = _convert_tml_expr(on_value, table_path_lookup or table_name_lookup) + + if not source or not destination or not on_expr: + continue + + if table_name_lookup: + source = table_name_lookup.get(source, source) + destination = table_name_lookup.get(destination, destination) + + if not base_table: + base_table = source + clauses.append(base_table) + joined.add(base_table) + + if source in joined and destination not in joined: + right = destination + elif destination in joined and source not in joined: + right = source + else: + right = destination + + join_keyword = { + "LEFT_OUTER": "LEFT", + "RIGHT_OUTER": "RIGHT", + "OUTER": "FULL OUTER", + "FULL_OUTER": "FULL OUTER", + "INNER": "INNER", + }.get(join_type, "INNER") + + clauses.append(f"{join_keyword} JOIN {right} ON {on_expr}") + joined.add(right) + + if not clauses: + return None, None + + if len(clauses) == 1: + return None, clauses[0] + + sql = "SELECT * FROM " + clauses[0] + for clause in clauses[1:]: + sql += f"\n{clause}" + + return sql, base_table + + def _parse_join_relationships( + self, + joins: list[dict[str, Any]], + table_path_lookup: dict[str, str] | None = None, + table_name_lookup: dict[str, str] | None = None, + ) -> list[Relationship]: + relationships: list[Relationship] = [] + + for join_def in joins: + source = join_def.get("source") + destination = join_def.get("destination") + if not source or not destination: + continue + + join_type = _normalize(join_def.get("type")) or "INNER" + on_value = join_def.get("on") + if on_value is None and True in join_def: + on_value = join_def.get(True) + lookup = table_path_lookup or table_name_lookup + left, right = _extract_join_refs(on_value, lookup) + + if table_name_lookup: + source = table_name_lookup.get(source, source) + destination = table_name_lookup.get(destination, destination) + + foreign_key = None + primary_key = None + + if left and right: + left_table, left_col = left + right_table, right_col = right + + if left_table == source and right_table == destination: + foreign_key = left_col + primary_key = right_col + elif left_table == destination and right_table == source: + foreign_key = right_col + primary_key = left_col + else: + if left_table == source: + foreign_key = left_col + if right_table == destination: + primary_key = right_col + + rel_type = "one_to_one" if join_def.get("is_one_to_one") else "many_to_one" + if join_type in {"RIGHT_OUTER", "FULL_OUTER", "OUTER"}: + rel_type = "many_to_many" + + relationships.append( + Relationship( + name=destination, + type=rel_type, + foreign_key=foreign_key, + primary_key=primary_key, + ) + ) + + return relationships + + def _parse_table_relationships(self, joins_with: list[dict[str, Any]]) -> list[Relationship]: + relationships: list[Relationship] = [] + for join_def in joins_with: + destination_def = join_def.get("destination") or {} + destination = destination_def.get("name") if isinstance(destination_def, dict) else destination_def + if not destination: + continue + + join_type = _normalize(join_def.get("type")) or "INNER" + on_value = join_def.get("on") + if on_value is None and True in join_def: + on_value = join_def.get(True) + left, right = _extract_join_refs(on_value) + + foreign_key = None + primary_key = None + if left and right: + foreign_key = left[1] + primary_key = right[1] + + rel_type = "many_to_one" + if join_def.get("is_one_to_one"): + rel_type = "one_to_one" + if join_type in {"RIGHT_OUTER", "FULL_OUTER", "OUTER"}: + rel_type = "many_to_many" + + relationships.append( + Relationship( + name=destination, + type=rel_type, + foreign_key=foreign_key, + primary_key=primary_key, + ) + ) + + return relationships + + def _table_name_lookup(self, tables: list[dict[str, Any]]) -> dict[str, str]: + lookup: dict[str, str] = {} + for table in tables: + table_id = table.get("id") or table.get("name") + table_name = table.get("name") or table.get("id") + if table_id and table_name: + lookup[table_id] = table_name + return lookup + + def export(self, graph: SemanticGraph, output_path: str | Path) -> None: + """Export semantic graph to ThoughtSpot table TML files.""" + output_path = Path(output_path) + + from sidemantic.core.inheritance import resolve_model_inheritance + + resolved_models = resolve_model_inheritance(graph.models) + + if output_path.is_dir() or not output_path.suffix: + output_path.mkdir(parents=True, exist_ok=True) + for model in resolved_models.values(): + tml = self._export_model(model) + file_path = output_path / f"{model.name}.{tml['__type']}.tml" + with open(file_path, "w") as f: + yaml.safe_dump(tml["data"], f, sort_keys=False) + else: + if resolved_models: + model = next(iter(resolved_models.values())) + tml = self._export_model(model) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w") as f: + yaml.safe_dump(tml["data"], f, sort_keys=False) + + def _export_model(self, model: Model) -> dict[str, Any]: + if self._should_export_worksheet(model): + return {"__type": "worksheet", "data": self._export_worksheet(model)} + return {"__type": "table", "data": self._export_table(model)} + + def _should_export_worksheet(self, model: Model) -> bool: + if getattr(model, "_source_tml_type", None) == "worksheet": + return True + if model.sql is not None: + return True + if model.relationships: + return True + return False + + def _export_table(self, model: Model) -> dict[str, Any]: + db, schema, table = _split_table_name(model.table) + + table_def: dict[str, Any] = { + "name": model.name, + "description": model.description, + } + + if db: + table_def["db"] = db + if schema: + table_def["schema"] = schema + if table: + table_def["db_table"] = table + + columns: list[dict[str, Any]] = [] + + for dim in model.dimensions: + data_type = "VARCHAR" + if dim.type == "numeric": + data_type = "DOUBLE" + elif dim.type == "boolean": + data_type = "BOOL" + elif dim.type == "time": + data_type = "DATETIME" if dim.granularity in {"hour", "minute", "second"} else "DATE" + + col_def: dict[str, Any] = { + "name": dim.name, + "db_column_name": _simple_column(dim.sql, dim.name), + "data_type": data_type, + "properties": { + "column_type": "ATTRIBUTE", + }, + } + + if dim.description: + col_def["description"] = dim.description + if dim.label: + col_def["custom_name"] = dim.label + if dim.format: + col_def["properties"]["format_pattern"] = dim.format + if dim.type == "time" and dim.granularity: + bucket = {v: k for k, v in _BUCKET_MAP.items()}.get(dim.granularity) + if bucket: + col_def["properties"]["default_date_bucket"] = bucket + + columns.append(col_def) + + for metric in model.metrics: + data_type = "DOUBLE" + col_def: dict[str, Any] = { + "name": metric.name, + "db_column_name": _simple_column(metric.sql, metric.name), + "data_type": data_type, + "properties": { + "column_type": "MEASURE", + }, + } + + if metric.description: + col_def["description"] = metric.description + if metric.label: + col_def["custom_name"] = metric.label + if metric.format: + col_def["properties"]["format_pattern"] = metric.format + + if metric.agg: + agg_map = { + "sum": "SUM", + "count": "COUNT", + "count_distinct": "COUNT_DISTINCT", + "avg": "AVERAGE", + "min": "MIN", + "max": "MAX", + "median": "MEDIAN", + } + col_def["properties"]["aggregation"] = agg_map.get(metric.agg, "NONE") + else: + col_def["properties"]["aggregation"] = "NONE" + + columns.append(col_def) + + if columns: + table_def["columns"] = columns + + return { + "table": table_def, + } + + def _export_worksheet(self, model: Model) -> dict[str, Any]: + base_table = getattr(model, "_worksheet_base_table", None) + if not base_table: + base_table = model.table or model.name + + tables = [{"name": base_table}] + joins: list[dict[str, Any]] = [] + table_paths: list[dict[str, Any]] = [{"id": base_table, "table": base_table}] + + for rel in model.relationships: + tables.append({"name": rel.name}) + join_name = f"{base_table}_{rel.name}" + join_type = "LEFT_OUTER" if rel.type in {"many_to_one", "one_to_one"} else "OUTER" + if rel.type in {"one_to_many", "one_to_one"}: + left_table = rel.name + left_key = rel.sql_expr + right_table = base_table + right_key = model.primary_key + else: + left_table = base_table + left_key = rel.sql_expr + right_table = rel.name + right_key = rel.related_key + on_expr = f"[{left_table}::{left_key}] = [{right_table}::{right_key}]" + joins.append( + { + "name": join_name, + "source": base_table, + "destination": rel.name, + "type": join_type, + "on": on_expr, + "is_one_to_one": rel.type == "one_to_one", + } + ) + table_paths.append( + { + "id": rel.name, + "table": rel.name, + "join_path": [{"join": [join_name]}], + } + ) + + tables_set = {t["name"] for t in tables} + + formulas: list[dict[str, Any]] = [] + worksheet_columns: list[dict[str, Any]] = [] + formula_counter = 0 + + def add_formula(name: str, expr: str | None) -> str: + nonlocal formula_counter + formula_counter += 1 + formula_id = f"formula_{formula_counter}" + formulas.append( + { + "name": name, + "expr": expr, + "id": formula_id, + } + ) + return formula_id + + for dim in model.dimensions: + dim_sql = dim.sql or dim.name + table_ref, col_ref = _split_sql_identifier(dim_sql) + if not table_ref and col_ref: + table_ref = base_table + if table_ref in tables_set and col_ref: + column_id = f"{table_ref}::{col_ref}" + formula_id = None + else: + formula_id = add_formula(dim.name, _sql_to_tml_expr(dim_sql, base_table, tables_set)) + column_id = None + + props: dict[str, Any] = {"column_type": "ATTRIBUTE"} + if dim.type == "time" and dim.granularity: + bucket = {v: k for k, v in _BUCKET_MAP.items()}.get(dim.granularity) + if bucket: + props["default_date_bucket"] = bucket + if dim.format: + props["format_pattern"] = dim.format + + col_def: dict[str, Any] = { + "name": dim.name, + "properties": props, + } + + if dim.label: + col_def["custom_name"] = dim.label + if dim.description: + col_def["description"] = dim.description + if column_id: + col_def["column_id"] = column_id + if formula_id: + col_def["formula_id"] = formula_id + + worksheet_columns.append(col_def) + + for metric in model.metrics: + metric_sql = metric.sql or metric.name + table_ref, col_ref = _split_sql_identifier(metric_sql) + if not table_ref and col_ref: + table_ref = base_table + if table_ref in tables_set and col_ref: + column_id = f"{table_ref}::{col_ref}" + formula_id = None + else: + formula_id = add_formula(metric.name, _sql_to_tml_expr(metric_sql, base_table, tables_set)) + column_id = None + + props: dict[str, Any] = {"column_type": "MEASURE"} + if metric.format: + props["format_pattern"] = metric.format + + if metric.agg: + agg_map = { + "sum": "SUM", + "count": "COUNT", + "count_distinct": "COUNT_DISTINCT", + "avg": "AVERAGE", + "min": "MIN", + "max": "MAX", + "median": "MEDIAN", + } + props["aggregation"] = agg_map.get(metric.agg, "NONE") + else: + props["aggregation"] = "NONE" + + col_def: dict[str, Any] = { + "name": metric.name, + "properties": props, + } + + if metric.label: + col_def["custom_name"] = metric.label + if metric.description: + col_def["description"] = metric.description + if column_id: + col_def["column_id"] = column_id + if formula_id: + col_def["formula_id"] = formula_id + + worksheet_columns.append(col_def) + + worksheet_def: dict[str, Any] = { + "name": model.name, + "description": model.description, + "tables": tables, + } + + if joins: + worksheet_def["joins"] = joins + if table_paths: + worksheet_def["table_paths"] = table_paths + if formulas: + worksheet_def["formulas"] = formulas + if worksheet_columns: + worksheet_def["worksheet_columns"] = worksheet_columns + + return { + "worksheet": worksheet_def, + } diff --git a/sidemantic/loaders.py b/sidemantic/loaders.py index 928f8b6..72fed79 100644 --- a/sidemantic/loaders.py +++ b/sidemantic/loaders.py @@ -30,6 +30,7 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: from sidemantic.adapters.metricflow import MetricFlowAdapter from sidemantic.adapters.sidemantic import SidemanticAdapter from sidemantic.adapters.snowflake import SnowflakeAdapter + from sidemantic.adapters.thoughtspot import ThoughtSpotAdapter directory = Path(directory) if not directory.exists(): @@ -54,6 +55,8 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: elif suffix == ".sql": # Sidemantic SQL files (pure SQL or with YAML frontmatter) adapter = SidemanticAdapter() + elif suffix == ".tml": + adapter = ThoughtSpotAdapter() elif suffix in (".yml", ".yaml"): # Try to detect which format by reading the file content = file_path.read_text() @@ -66,6 +69,10 @@ def load_from_directory(layer: "SemanticLayer", directory: str | Path) -> None: adapter = MetricFlowAdapter() elif "base_sql_table:" in content and "measures:" in content: adapter = HexAdapter() + elif "table:" in content and "db_table:" in content and "columns:" in content: + adapter = ThoughtSpotAdapter() + elif "worksheet:" in content and "worksheet_columns:" in content: + adapter = ThoughtSpotAdapter() elif "tables:" in content and "base_table:" in content: # Snowflake Cortex Semantic Model format adapter = SnowflakeAdapter() diff --git a/tests/adapters/thoughtspot/__init__.py b/tests/adapters/thoughtspot/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/adapters/thoughtspot/test_conversion.py b/tests/adapters/thoughtspot/test_conversion.py new file mode 100644 index 0000000..4072c79 --- /dev/null +++ b/tests/adapters/thoughtspot/test_conversion.py @@ -0,0 +1,23 @@ +"""Tests for ThoughtSpot adapter - cross-format conversion.""" + +import tempfile +from pathlib import Path + +from sidemantic.adapters.sidemantic import SidemanticAdapter +from sidemantic.adapters.thoughtspot import ThoughtSpotAdapter + + +def test_thoughtspot_to_sidemantic_conversion(): + """Test converting ThoughtSpot TML to Sidemantic YAML.""" + ts_adapter = ThoughtSpotAdapter() + sidemantic_adapter = SidemanticAdapter() + + graph = ts_adapter.parse("tests/fixtures/thoughtspot/orders.table.tml") + + with tempfile.TemporaryDirectory() as tmpdir: + output_path = Path(tmpdir) / "orders.yml" + sidemantic_adapter.export(graph, output_path) + + assert output_path.exists() + converted = sidemantic_adapter.parse(output_path) + assert "orders" in converted.models diff --git a/tests/adapters/thoughtspot/test_parsing.py b/tests/adapters/thoughtspot/test_parsing.py new file mode 100644 index 0000000..ac3d559 --- /dev/null +++ b/tests/adapters/thoughtspot/test_parsing.py @@ -0,0 +1,444 @@ +"""Tests for ThoughtSpot adapter - parsing.""" + +import tempfile +from pathlib import Path + +import yaml + +from sidemantic import SemanticLayer +from sidemantic.adapters.thoughtspot import ThoughtSpotAdapter +from sidemantic.loaders import load_from_directory + +# ============================================================================= +# BASIC PARSING TESTS +# ============================================================================= + + +def test_import_real_thoughtspot_examples(): + """Test importing ThoughtSpot TML examples.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/orders.table.tml") + + assert "orders" in graph.models + + orders = graph.models["orders"] + assert orders.table == "analytics.public.orders" + assert orders.primary_key == "id" + + order_date = orders.get_dimension("order_date") + assert order_date is not None + assert order_date.type == "time" + assert order_date.granularity == "day" + + amount = orders.get_metric("amount") + assert amount is not None + assert amount.agg == "sum" + assert amount.format == "$#,##0.00" + + order_count = orders.get_metric("order_count") + assert order_count is not None + assert order_count.agg == "count" + + +def test_import_thoughtspot_kitchen_sink_table(): + """Test parsing a kitchen sink table TML fixture.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/kitchen_sink.table.tml") + + assert "sales" in graph.models + model = graph.models["sales"] + + order_date = model.get_dimension("order_date") + assert order_date is not None + assert order_date.type == "time" + assert order_date.granularity == "day" + + order_week = model.get_dimension("order_week") + assert order_week is not None + assert order_week.type == "time" + assert order_week.granularity == "week" + + order_hour = model.get_dimension("order_hour") + assert order_hour is not None + assert order_hour.type == "time" + assert order_hour.granularity == "hour" + + order_month = model.get_dimension("order_month") + assert order_month is not None + assert order_month.type == "time" + assert order_month.granularity == "month" + + order_quarter = model.get_dimension("order_quarter") + assert order_quarter is not None + assert order_quarter.type == "time" + assert order_quarter.granularity == "quarter" + + order_year = model.get_dimension("order_year") + assert order_year is not None + assert order_year.type == "time" + assert order_year.granularity == "year" + + is_active = model.get_dimension("is_active") + assert is_active is not None + assert is_active.type == "boolean" + + status = model.get_dimension("status") + assert status is not None + assert status.label == "Order Status" + assert status.description == "Current order state" + + gross_revenue = model.get_metric("gross_revenue") + assert gross_revenue is not None + assert gross_revenue.agg == "sum" + assert gross_revenue.format == "$#,##0.00" + assert gross_revenue.label == "Gross Revenue" + assert gross_revenue.description == "Total revenue before discounts" + + avg_order_value = model.get_metric("avg_order_value") + assert avg_order_value is not None + assert avg_order_value.agg == "avg" + + min_order_value = model.get_metric("min_order_value") + assert min_order_value is not None + assert min_order_value.agg == "min" + + max_order_value = model.get_metric("max_order_value") + assert max_order_value is not None + assert max_order_value.agg == "max" + + median_order_value = model.get_metric("median_order_value") + assert median_order_value is not None + assert median_order_value.agg == "median" + + distinct_customers = model.get_metric("distinct_customers") + assert distinct_customers is not None + assert distinct_customers.agg == "count_distinct" + + revenue_stddev = model.get_metric("revenue_stddev") + assert revenue_stddev is not None + assert revenue_stddev.type == "derived" + assert "STDDEV" in (revenue_stddev.sql or "") + + revenue_raw = model.get_metric("revenue_raw") + assert revenue_raw is not None + assert revenue_raw.type == "derived" + assert revenue_raw.sql == "gross_revenue" + + +def test_import_thoughtspot_kitchen_sink_worksheet(): + """Test parsing a kitchen sink worksheet TML fixture.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/kitchen_sink.worksheet.tml") + + assert "sales_worksheet_full" in graph.models + model = graph.models["sales_worksheet_full"] + + assert model.sql is not None + assert "JOIN customers" in model.sql + + relationships = {rel.name: rel for rel in model.relationships} + assert relationships["customers"].type == "many_to_one" + assert relationships["customers"].foreign_key == "customer_id" + assert relationships["customers"].primary_key == "id" + assert relationships["regions"].type == "one_to_one" + + order_date = model.get_dimension("order_date") + assert order_date is not None + assert order_date.type == "time" + assert order_date.label == "Order Date" + + is_active = model.get_dimension("is_active") + assert is_active is not None + assert is_active.type == "boolean" + + net_revenue = model.get_metric("net_revenue") + assert net_revenue is not None + assert net_revenue.agg == "sum" + assert "gross_revenue" in (net_revenue.sql or "") + + avg_order_value = model.get_metric("avg_order_value") + assert avg_order_value is not None + assert avg_order_value.agg == "avg" + assert "/" in (avg_order_value.sql or "") + + revenue_stddev = model.get_metric("revenue_stddev") + assert revenue_stddev is not None + assert revenue_stddev.type == "derived" + assert "STDDEV" in (revenue_stddev.sql or "") + + distinct_customers = model.get_metric("distinct_customers") + assert distinct_customers is not None + assert distinct_customers.agg == "count_distinct" + + min_order_value = model.get_metric("min_order_value") + assert min_order_value is not None + assert min_order_value.agg == "min" + + max_order_value = model.get_metric("max_order_value") + assert max_order_value is not None + assert max_order_value.agg == "max" + + median_order_value = model.get_metric("median_order_value") + assert median_order_value is not None + assert median_order_value.agg == "median" + + +def test_import_thoughtspot_basic_worksheet(): + """Test importing a basic worksheet example.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/sales.worksheet.tml") + + assert "sales_worksheet" in graph.models + worksheet = graph.models["sales_worksheet"] + assert worksheet.sql is not None + assert "JOIN customers" in worksheet.sql + + total_revenue = worksheet.get_metric("total_revenue") + assert total_revenue is not None + assert total_revenue.agg == "sum" + assert "amount" in (total_revenue.sql or "") + + relationships = worksheet.relationships + assert len(relationships) == 1 + rel = relationships[0] + assert rel.name == "customers" + assert rel.type == "many_to_one" + assert rel.foreign_key == "customer_id" + assert rel.primary_key == "id" + + +def test_import_thoughtspot_table_joins(): + """Test table-level joins_with relationships.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/table_joins.table.tml") + + assert "orders" in graph.models + model = graph.models["orders"] + + assert len(model.relationships) == 1 + rel = model.relationships[0] + assert rel.name == "customers" + assert rel.type == "many_to_one" + assert rel.foreign_key == "customer_id" + assert rel.primary_key == "id" + + +def test_import_thoughtspot_worksheet_ids(): + """Test worksheet parsing when tables use ids and fqn.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/worksheet_ids.worksheet.tml") + + assert "sales_ids" in graph.models + model = graph.models["sales_ids"] + + assert model.sql is not None + assert "JOIN customers_table" in model.sql + + customer_name = model.get_dimension("customer_name") + assert customer_name is not None + assert customer_name.sql == "customers_table.name" + + net_revenue = model.get_metric("net_revenue") + assert net_revenue is not None + assert "gross_revenue" in (net_revenue.sql or "") + + +def test_import_thoughtspot_multi_join_worksheet(): + """Test multi-join worksheet with chained formulas and mixed join predicates.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/worksheet_multi_join.worksheet.tml") + + assert "sales_multi_join" in graph.models + model = graph.models["sales_multi_join"] + assert model.sql is not None + assert "JOIN dim_product_main" in model.sql + assert "JOIN dim_product_alt" in model.sql + assert "country_code" in model.sql + + relationships = {rel.name: rel for rel in model.relationships} + assert relationships["dim_product_main"].type == "many_to_one" + assert relationships["dim_product_alt"].type == "one_to_one" + + gross_revenue = model.get_metric("gross_revenue") + assert gross_revenue is not None + assert gross_revenue.agg == "sum" + assert "price" in (gross_revenue.sql or "") + + net_revenue = model.get_metric("net_revenue") + assert net_revenue is not None + assert net_revenue.agg == "sum" + assert "gross_revenue" in (net_revenue.sql or "") + + +def test_import_thoughtspot_formula_name_id(): + """Test formula_id matching when formula has no id field.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/worksheet_formula_name.worksheet.tml") + + assert "sales_formula_name" in graph.models + model = graph.models["sales_formula_name"] + metric = model.get_metric("net_revenue") + assert metric is not None + assert "gross_revenue" in (metric.sql or "") + + +def test_import_thoughtspot_db_column_properties(): + """Test parsing data types from db_column_properties.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/table_db_column_properties.table.tml") + + assert "inventory" in graph.models + model = graph.models["inventory"] + + sku = model.get_dimension("sku") + assert sku is not None + assert sku.type == "categorical" + + in_stock = model.get_dimension("in_stock") + assert in_stock is not None + assert in_stock.type == "boolean" + + last_updated = model.get_dimension("last_updated") + assert last_updated is not None + assert last_updated.type == "time" + assert last_updated.granularity == "hour" + + quantity = model.get_metric("quantity") + assert quantity is not None + assert quantity.agg == "sum" + + +def test_import_thoughtspot_unbracketed_expr(): + """Test parsing unbracketed table.column expressions.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/worksheet_unbracketed_expr.worksheet.tml") + + assert "sales_unbracketed" in graph.models + model = graph.models["sales_unbracketed"] + net_revenue = model.get_metric("net_revenue") + assert net_revenue is not None + assert "sales.gross_revenue" in (net_revenue.sql or "") + + +# ============================================================================= +# DATA TYPE MAPPING TESTS +# ============================================================================= + + +def test_thoughtspot_table_data_type_mapping(): + """Test ThoughtSpot data type mappings.""" + tml_def = { + "table": { + "name": "test", + "db_table": "test", + "columns": [ + { + "name": "flag", + "db_column_name": "flag", + "data_type": "BOOL", + "properties": {"column_type": "ATTRIBUTE"}, + }, + { + "name": "amount", + "db_column_name": "amount", + "data_type": "DOUBLE", + "properties": {"column_type": "ATTRIBUTE"}, + }, + { + "name": "event_date", + "db_column_name": "event_date", + "data_type": "DATE", + "properties": {"column_type": "ATTRIBUTE", "default_date_bucket": "DAILY"}, + }, + ], + } + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".tml", delete=False) as f: + yaml.safe_dump(tml_def, f, sort_keys=False) + temp_path = Path(f.name) + + try: + adapter = ThoughtSpotAdapter() + graph = adapter.parse(temp_path) + model = graph.models["test"] + + assert model.get_dimension("flag").type == "boolean" + assert model.get_dimension("amount").type == "numeric" + assert model.get_dimension("event_date").type == "time" + finally: + temp_path.unlink(missing_ok=True) + + +# ============================================================================= +# AGGREGATION MAPPING TESTS +# ============================================================================= + + +def test_thoughtspot_aggregation_mapping(): + """Test ThoughtSpot aggregation mappings.""" + tml_def = { + "table": { + "name": "metrics", + "db_table": "metrics", + "columns": [ + { + "name": "total", + "db_column_name": "amount", + "data_type": "DOUBLE", + "properties": {"column_type": "MEASURE", "aggregation": "SUM"}, + }, + { + "name": "unique_users", + "db_column_name": "user_id", + "data_type": "INT64", + "properties": {"column_type": "MEASURE", "aggregation": "COUNT_DISTINCT"}, + }, + { + "name": "avg_amount", + "db_column_name": "amount", + "data_type": "DOUBLE", + "properties": {"column_type": "MEASURE", "aggregation": "AVERAGE"}, + }, + ], + } + } + + with tempfile.NamedTemporaryFile(mode="w", suffix=".tml", delete=False) as f: + yaml.safe_dump(tml_def, f, sort_keys=False) + temp_path = Path(f.name) + + try: + adapter = ThoughtSpotAdapter() + graph = adapter.parse(temp_path) + model = graph.models["metrics"] + + assert model.get_metric("total").agg == "sum" + assert model.get_metric("unique_users").agg == "count_distinct" + assert model.get_metric("avg_amount").agg == "avg" + finally: + temp_path.unlink(missing_ok=True) + + +# ============================================================================= +# LOADER AUTO-DETECTION +# ============================================================================= + + +def test_thoughtspot_auto_detect_loader(): + """Test ThoughtSpot TML auto-detection in loaders.py.""" + with tempfile.TemporaryDirectory() as tmpdir: + tml_file = Path(tmpdir) / "orders.table.tml" + tml_file.write_text( + """ + table: + name: orders + db_table: orders + columns: [] + """ + ) + + layer = SemanticLayer() + load_from_directory(layer, tmpdir) + + assert "orders" in layer.graph.models diff --git a/tests/adapters/thoughtspot/test_roundtrip.py b/tests/adapters/thoughtspot/test_roundtrip.py new file mode 100644 index 0000000..f2d82da --- /dev/null +++ b/tests/adapters/thoughtspot/test_roundtrip.py @@ -0,0 +1,84 @@ +"""Tests for ThoughtSpot adapter roundtrip (parse -> export -> parse).""" + +from pathlib import Path + +import yaml + +from sidemantic.adapters.thoughtspot import ThoughtSpotAdapter +from sidemantic.core.model import Model +from sidemantic.core.relationship import Relationship +from sidemantic.core.semantic_graph import SemanticGraph + + +def test_thoughtspot_roundtrip_table(tmp_path: Path): + """Test roundtrip of a ThoughtSpot table TML file.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/orders.table.tml") + + output_file = tmp_path / "roundtrip.tml" + adapter.export(graph, output_file) + + graph2 = adapter.parse(output_file) + assert "orders" in graph2.models + + model = graph2.models["orders"] + amount = model.get_metric("amount") + assert amount is not None + assert amount.agg == "sum" + + order_date = model.get_dimension("order_date") + assert order_date is not None + assert order_date.type == "time" + + +def test_thoughtspot_export_worksheet(tmp_path: Path): + """Test exporting a ThoughtSpot worksheet TML file.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/sales.worksheet.tml") + + output_file = tmp_path / "worksheet.tml" + adapter.export(graph, output_file) + + data = output_file.read_text() + assert "worksheet:" in data + assert "joins:" in data + + +def test_thoughtspot_roundtrip_worksheet(tmp_path: Path): + """Test roundtrip of worksheet export preserves joins and formulas.""" + adapter = ThoughtSpotAdapter() + graph = adapter.parse("tests/fixtures/thoughtspot/worksheet_multi_join.worksheet.tml") + + output_file = tmp_path / "worksheet_roundtrip.tml" + adapter.export(graph, output_file) + + data = yaml.safe_load(output_file.read_text()) + assert data["worksheet"]["joins"][0]["source"] == "fact_sales" + + graph2 = adapter.parse(output_file) + assert "sales_multi_join" in graph2.models + model = graph2.models["sales_multi_join"] + assert model.sql is not None + + net_revenue = model.get_metric("net_revenue") + assert net_revenue is not None + assert "gross_revenue" in (net_revenue.sql or "") + + +def test_thoughtspot_export_one_to_many_join_direction(tmp_path: Path): + """Ensure one_to_many joins point to foreign key on related model.""" + adapter = ThoughtSpotAdapter() + model = Model( + name="customers", + table="customers", + relationships=[Relationship(name="orders", type="one_to_many", foreign_key="customer_id")], + ) + graph = SemanticGraph() + graph.add_model(model) + + output_file = tmp_path / "customers_worksheet.tml" + adapter.export(graph, output_file) + + data = yaml.safe_load(output_file.read_text()) + join_on = data["worksheet"]["joins"][0]["on"] + assert join_on == "[orders::customer_id] = [customers::id]" diff --git a/tests/fixtures/thoughtspot/kitchen_sink.table.tml b/tests/fixtures/thoughtspot/kitchen_sink.table.tml new file mode 100644 index 0000000..86df919 --- /dev/null +++ b/tests/fixtures/thoughtspot/kitchen_sink.table.tml @@ -0,0 +1,115 @@ +guid: "table-kitchen-sink" +table: + name: sales + description: Full table example + db: ANALYTICS + schema: PUBLIC + db_table: sales + connection: + name: Warehouse + columns: + - name: id + db_column_name: id + data_type: INT64 + properties: + column_type: ATTRIBUTE + - name: order_date + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + format_pattern: "YYYY-MM-DD" + - name: order_week + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: WEEKLY + - name: order_hour + db_column_name: order_ts + data_type: TIMESTAMP + properties: + column_type: ATTRIBUTE + default_date_bucket: HOURLY + - name: order_month + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: MONTHLY + - name: order_quarter + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: QUARTERLY + - name: order_year + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: YEARLY + - name: is_active + db_column_name: is_active + data_type: BOOLEAN + properties: + column_type: ATTRIBUTE + - name: status + db_column_name: status + data_type: VARCHAR + custom_name: Order Status + description: Current order state + properties: + column_type: ATTRIBUTE + - name: gross_revenue + db_column_name: gross_revenue + data_type: DOUBLE + custom_name: Gross Revenue + description: Total revenue before discounts + properties: + column_type: MEASURE + aggregation: SUM + format_pattern: "$#,##0.00" + - name: avg_order_value + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: AVERAGE + - name: min_order_value + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: MIN + - name: max_order_value + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: MAX + - name: median_order_value + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: MEDIAN + - name: distinct_customers + db_column_name: customer_id + data_type: INT64 + properties: + column_type: MEASURE + aggregation: COUNT_DISTINCT + - name: revenue_stddev + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: STD_DEVIATION + - name: revenue_raw + db_column_name: gross_revenue + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: NONE diff --git a/tests/fixtures/thoughtspot/kitchen_sink.worksheet.tml b/tests/fixtures/thoughtspot/kitchen_sink.worksheet.tml new file mode 100644 index 0000000..ee45e51 --- /dev/null +++ b/tests/fixtures/thoughtspot/kitchen_sink.worksheet.tml @@ -0,0 +1,112 @@ +guid: "worksheet-kitchen-sink" +worksheet: + name: sales_worksheet_full + description: Full worksheet example + tables: + - name: sales + id: sales_table + fqn: ANALYTICS.PUBLIC.sales + - name: customers + id: customers_table + fqn: ANALYTICS.PUBLIC.customers + - name: regions + id: regions_table + fqn: ANALYTICS.PUBLIC.regions + joins: + - name: sales_customers + source: sales + destination: customers + type: LEFT_OUTER + on: "[sales::customer_id] = [customers::id]" + is_one_to_one: false + - name: sales_regions + source: sales + destination: regions + type: INNER + on: "[sales::region_id] = [regions::id] AND [sales::country_code] = [regions::country_code]" + is_one_to_one: true + table_paths: + - id: sales_path + table: sales + join_path: + - {} + - id: customers_path + table: customers + join_path: + - join: + - sales_customers + - id: regions_path + table: regions + join_path: + - join: + - sales_regions + formulas: + - name: net_revenue + expr: "[sales::gross_revenue] - [sales::discount]" + id: net_rev + - name: avg_order_value + expr: "[sales::gross_revenue] / [sales::order_count]" + worksheet_columns: + - name: order_id + description: Order identifier + column_id: sales_path::id + properties: + column_type: ATTRIBUTE + - name: order_date + column_id: sales_path::order_date + data_type: DATE + custom_name: Order Date + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + format_pattern: "YYYY-MM-DD" + - name: is_active + column_id: sales_path::is_active + data_type: BOOLEAN + properties: + column_type: ATTRIBUTE + - name: customer_name + column_id: customers_path::name + custom_name: Customer + properties: + column_type: ATTRIBUTE + - name: gross_revenue + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: SUM + format_pattern: "$#,##0.00" + - name: net_revenue + formula_id: net_rev + properties: + column_type: MEASURE + aggregation: SUM + - name: avg_order_value + properties: + column_type: MEASURE + aggregation: AVERAGE + - name: distinct_customers + column_id: sales_path::customer_id + properties: + column_type: MEASURE + aggregation: COUNT_DISTINCT + - name: min_order_value + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: MIN + - name: max_order_value + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: MAX + - name: median_order_value + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: MEDIAN + - name: revenue_stddev + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: STD_DEVIATION diff --git a/tests/fixtures/thoughtspot/orders.table.tml b/tests/fixtures/thoughtspot/orders.table.tml new file mode 100644 index 0000000..bf20116 --- /dev/null +++ b/tests/fixtures/thoughtspot/orders.table.tml @@ -0,0 +1,44 @@ +guid: "table-001" +table: + name: orders + description: Order facts + db: analytics + schema: public + db_table: orders + connection: + name: prod + columns: + - name: id + db_column_name: id + data_type: INT64 + properties: + column_type: ATTRIBUTE + - name: customer_id + db_column_name: customer_id + data_type: INT64 + properties: + column_type: ATTRIBUTE + - name: status + db_column_name: status + data_type: VARCHAR + properties: + column_type: ATTRIBUTE + - name: order_date + db_column_name: order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + - name: amount + db_column_name: amount + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: SUM + format_pattern: "$#,##0.00" + - name: order_count + db_column_name: id + data_type: INT64 + properties: + column_type: MEASURE + aggregation: COUNT diff --git a/tests/fixtures/thoughtspot/sales.worksheet.tml b/tests/fixtures/thoughtspot/sales.worksheet.tml new file mode 100644 index 0000000..6611522 --- /dev/null +++ b/tests/fixtures/thoughtspot/sales.worksheet.tml @@ -0,0 +1,40 @@ +guid: "worksheet-001" +worksheet: + name: sales_worksheet + description: Sales worksheet + tables: + - name: orders + - name: customers + joins: + - name: orders_customers + source: orders + destination: customers + type: LEFT_OUTER + on: "[orders::customer_id] = [customers::id]" + is_one_to_one: false + table_paths: + - id: orders_path + table: orders + join_path: + - join: + - orders_customers + formulas: + - name: revenue_base + expr: "[orders::amount]" + id: revenue_formula + worksheet_columns: + - name: order_id + column_id: orders_path::id + properties: + column_type: ATTRIBUTE + - name: order_date + column_id: orders_path::order_date + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + - name: total_revenue + formula_id: revenue_formula + properties: + column_type: MEASURE + aggregation: SUM + format_pattern: "$#,##0.00" diff --git a/tests/fixtures/thoughtspot/table_db_column_properties.table.tml b/tests/fixtures/thoughtspot/table_db_column_properties.table.tml new file mode 100644 index 0000000..de44540 --- /dev/null +++ b/tests/fixtures/thoughtspot/table_db_column_properties.table.tml @@ -0,0 +1,32 @@ +guid: "table-db-column-properties" +table: + name: inventory + db: ANALYTICS + schema: PUBLIC + db_table: inventory + columns: + - name: sku + db_column_name: sku + properties: + column_type: ATTRIBUTE + db_column_properties: + data_type: VARCHAR + - name: in_stock + db_column_name: in_stock + properties: + column_type: ATTRIBUTE + db_column_properties: + data_type: BOOLEAN + - name: last_updated + db_column_name: last_updated + properties: + column_type: ATTRIBUTE + db_column_properties: + data_type: TIMESTAMP + - name: quantity + db_column_name: quantity + properties: + column_type: MEASURE + aggregation: SUM + db_column_properties: + data_type: INT64 diff --git a/tests/fixtures/thoughtspot/table_joins.table.tml b/tests/fixtures/thoughtspot/table_joins.table.tml new file mode 100644 index 0000000..b001eea --- /dev/null +++ b/tests/fixtures/thoughtspot/table_joins.table.tml @@ -0,0 +1,30 @@ +guid: "table-joins" +table: + id: orders + db: ANALYTICS + schema: PUBLIC + db_table: orders + columns: + - name: id + db_column_name: id + data_type: INT64 + properties: + column_type: ATTRIBUTE + - name: customer_id + db_column_name: customer_id + data_type: INT64 + properties: + column_type: ATTRIBUTE + - name: amount + db_column_name: amount + data_type: DOUBLE + properties: + column_type: MEASURE + aggregation: SUM + joins_with: + - name: orders_customers + destination: + name: customers + on: "[orders::customer_id] = [customers::id]" + type: INNER + is_one_to_one: false diff --git a/tests/fixtures/thoughtspot/worksheet_formula_name.worksheet.tml b/tests/fixtures/thoughtspot/worksheet_formula_name.worksheet.tml new file mode 100644 index 0000000..f0138c2 --- /dev/null +++ b/tests/fixtures/thoughtspot/worksheet_formula_name.worksheet.tml @@ -0,0 +1,14 @@ +guid: "worksheet-formula-name" +worksheet: + name: sales_formula_name + tables: + - name: sales + formulas: + - name: net_revenue + expr: "[sales::gross_revenue] - [sales::discount]" + worksheet_columns: + - name: net_revenue + formula_id: net_revenue + properties: + column_type: MEASURE + aggregation: SUM diff --git a/tests/fixtures/thoughtspot/worksheet_ids.worksheet.tml b/tests/fixtures/thoughtspot/worksheet_ids.worksheet.tml new file mode 100644 index 0000000..5593c5f --- /dev/null +++ b/tests/fixtures/thoughtspot/worksheet_ids.worksheet.tml @@ -0,0 +1,55 @@ +guid: "worksheet-ids" +worksheet: + name: sales_ids + description: Uses table IDs and fqn + tables: + - id: sales_table + fqn: ANALYTICS.PUBLIC.sales + - id: customers_table + fqn: ANALYTICS.PUBLIC.customers + joins: + - name: sales_customers + source: sales_table + destination: customers_table + type: LEFT_OUTER + on: "[sales_table::customer_id] = [customers_table::id]" + is_one_to_one: false + table_paths: + - id: sales_path + table: sales_table + join_path: + - {} + - id: customers_path + table: customers_table + join_path: + - join: + - sales_customers + formulas: + - name: net_revenue + expr: "[sales_path::gross_revenue] - [sales_path::discount]" + id: net_revenue_formula + worksheet_columns: + - name: order_id + column_id: sales_path::id + properties: + column_type: ATTRIBUTE + - name: customer_name + column_id: customers_path::name + properties: + column_type: ATTRIBUTE + - name: order_date + column_id: sales_path::order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + - name: gross_revenue + column_id: sales_path::gross_revenue + properties: + column_type: MEASURE + aggregation: SUM + - name: net_revenue + formula_id: net_revenue_formula + properties: + column_type: MEASURE + aggregation: SUM diff --git a/tests/fixtures/thoughtspot/worksheet_multi_join.worksheet.tml b/tests/fixtures/thoughtspot/worksheet_multi_join.worksheet.tml new file mode 100644 index 0000000..935f9ac --- /dev/null +++ b/tests/fixtures/thoughtspot/worksheet_multi_join.worksheet.tml @@ -0,0 +1,80 @@ +guid: "worksheet-multi-join" +worksheet: + name: sales_multi_join + description: Multiple join paths and formulas + tables: + - id: fact_sales + fqn: ANALYTICS.PUBLIC.fact_sales + - id: dim_product_main + fqn: ANALYTICS.PUBLIC.dim_product + - id: dim_product_alt + fqn: ANALYTICS.PUBLIC.dim_product_alt + joins: + - name: sales_product_main + source: fact_sales + destination: dim_product_main + type: INNER + on: "[fact_sales::product_id] = [dim_product_main::id]" + is_one_to_one: false + - name: sales_product_alt + source: fact_sales + destination: dim_product_alt + type: LEFT_OUTER + on: "[fact_sales::alt_product_id] = [dim_product_alt::id] AND [fact_sales::country_code] = [dim_product_alt::country_code]" + is_one_to_one: true + table_paths: + - id: fact_sales_path + table: fact_sales + join_path: + - {} + - id: product_main_path + table: dim_product_main + join_path: + - join: + - sales_product_main + - id: product_alt_path + table: dim_product_alt + join_path: + - join: + - sales_product_alt + formulas: + - name: gross_revenue + expr: "[fact_sales_path::price] * [fact_sales_path::quantity]" + id: gross_rev_formula + - name: net_revenue + expr: "[gross_revenue] - [fact_sales_path::discount]" + id: net_rev_formula + worksheet_columns: + - name: order_id + column_id: fact_sales_path::order_id + properties: + column_type: ATTRIBUTE + - name: order_date + column_id: fact_sales_path::order_date + data_type: DATE + properties: + column_type: ATTRIBUTE + default_date_bucket: DAILY + - name: product_main + column_id: product_main_path::name + properties: + column_type: ATTRIBUTE + - name: product_alt + column_id: product_alt_path::name + properties: + column_type: ATTRIBUTE + - name: gross_revenue + formula_id: gross_rev_formula + properties: + column_type: MEASURE + aggregation: SUM + - name: net_revenue + formula_id: net_rev_formula + properties: + column_type: MEASURE + aggregation: SUM + - name: order_count + column_id: fact_sales_path::order_id + properties: + column_type: MEASURE + aggregation: COUNT diff --git a/tests/fixtures/thoughtspot/worksheet_unbracketed_expr.worksheet.tml b/tests/fixtures/thoughtspot/worksheet_unbracketed_expr.worksheet.tml new file mode 100644 index 0000000..a1f10e7 --- /dev/null +++ b/tests/fixtures/thoughtspot/worksheet_unbracketed_expr.worksheet.tml @@ -0,0 +1,27 @@ +guid: "worksheet-unbracketed" +worksheet: + name: sales_unbracketed + tables: + - name: sales + - name: customers + joins: + - name: sales_customers + source: sales + destination: customers + type: LEFT_OUTER + on: "sales.customer_id = customers.id" + is_one_to_one: false + table_paths: + - id: sales_path + table: sales + join_path: + - {} + formulas: + - name: net_revenue + expr: "sales.gross_revenue - sales.discount" + worksheet_columns: + - name: net_revenue + formula_id: net_revenue + properties: + column_type: MEASURE + aggregation: SUM