diff --git a/AGENTS.md b/AGENTS.md index 07245e17e..582784d33 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -239,6 +239,62 @@ config = AsyncpgConfig( ) ``` +### Custom SQLGlot Expression Pattern + +For dialect-specific SQL generation (e.g., vector distance functions): + +```python +# In sqlspec/builder/_custom_expressions.py +from sqlglot import exp +from typing import Any + +class CustomExpression(exp.Expression): + """Custom expression with dialect-aware SQL generation.""" + arg_types = {"this": True, "expression": True, "metric": False} + + def sql(self, dialect: "Any | None" = None, **opts: Any) -> str: + """Override sql() method for dialect-specific generation.""" + dialect_name = str(dialect).lower() if dialect else "generic" + + left_sql = self.left.sql(dialect=dialect, **opts) + right_sql = self.right.sql(dialect=dialect, **opts) + + if dialect_name == "postgres": + return self._sql_postgres(left_sql, right_sql) + if dialect_name == "mysql": + return self._sql_mysql(left_sql, right_sql) + return self._sql_generic(left_sql, right_sql) + +# Register with SQLGlot generator system +def _register_with_sqlglot() -> None: + from sqlglot.dialects.postgres import Postgres + from sqlglot.generator import Generator + + def custom_sql_base(generator: "Generator", expression: "CustomExpression") -> str: + return expression._sql_generic(generator.sql(expression.left), generator.sql(expression.right)) + + Generator.TRANSFORMS[CustomExpression] = custom_sql_base + Postgres.Generator.TRANSFORMS[CustomExpression] = custom_sql_postgres + +_register_with_sqlglot() +``` + +**Use this pattern when**: +- Database syntax varies significantly across dialects +- Standard SQLGlot expressions don't match any database's native syntax +- Need operator syntax (e.g., `<->`) vs function calls (e.g., `DISTANCE()`) + +**Key principles**: +- Override `.sql()` method for dialect detection +- Register with SQLGlot's TRANSFORMS for nested expression support +- Store metadata (like metric) as `exp.Identifier` in `arg_types` for runtime access +- Provide generic fallback for unsupported dialects + +**Example**: `VectorDistance` in `sqlspec/builder/_vector_expressions.py` generates: +- PostgreSQL: `embedding <-> '[0.1,0.2]'` (operator) +- MySQL: `DISTANCE(embedding, STRING_TO_VECTOR('[0.1,0.2]'), 'EUCLIDEAN')` (function) +- Oracle: `VECTOR_DISTANCE(embedding, TO_VECTOR('[0.1,0.2]'), EUCLIDEAN)` (function) + ### Error Handling - Custom exceptions inherit from `SQLSpecError` in `sqlspec/exceptions.py` diff --git a/Makefile b/Makefile index d57231624..356888fbd 100644 --- a/Makefile +++ b/Makefile @@ -117,7 +117,7 @@ release: ## Bump version and create re @make docs @make clean @make build - @uv lock --upgrade-package litestar-vite >/dev/null 2>&1 + @uv lock --upgrade-package sqlspec >/dev/null 2>&1 @uv run bump-my-version bump $(bump) @echo "${OK} Release complete 🎉" diff --git a/docs/reference/builder.rst b/docs/reference/builder.rst index 32e5af1b2..efbe5ac37 100644 --- a/docs/reference/builder.rst +++ b/docs/reference/builder.rst @@ -514,6 +514,238 @@ Base Classes :undoc-members: :show-inheritance: +Vector Distance Functions +========================= + +The query builder provides portable vector similarity search functions that generate dialect-specific SQL across PostgreSQL (pgvector), MySQL 9+, Oracle 23ai+, BigQuery, DuckDB, and other databases. + +.. note:: + Vector functions are designed for AI/ML similarity search with embedding vectors. The SQL is generated at ``build(dialect=X)`` time, enabling portable query definitions that execute against multiple database types. + +Column Methods +-------------- + +.. py:method:: Column.vector_distance(other_vector, metric="euclidean") + + Calculate vector distance using the specified metric. + + Generates dialect-specific SQL for vector distance operations. + + :param other_vector: Vector to compare against (list, Column reference, or SQLGlot expression) + :type other_vector: list[float] | Column | exp.Expression + :param metric: Distance metric to use (default: "euclidean") + :type metric: str + :return: FunctionColumn expression for use in SELECT, WHERE, ORDER BY + :rtype: FunctionColumn + + **Supported Metrics:** + + - ``euclidean`` - L2 distance (default) + - ``cosine`` - Cosine distance + - ``inner_product`` - Negative inner product (for similarity ranking) + - ``euclidean_squared`` - L2² distance (Oracle only) + + **Examples:** + + .. code-block:: python + + from sqlspec import sql + from sqlspec.builder import Column + + query_vector = [0.1, 0.2, 0.3] + + # Basic distance query + query = ( + sql.select("id", "title", Column("embedding").vector_distance(query_vector).alias("distance")) + .from_("documents") + .where(Column("embedding").vector_distance(query_vector) < 0.5) + .order_by("distance") + .limit(10) + ) + + # Using dynamic attribute access + query = ( + sql.select("*") + .from_("docs") + .order_by(sql.embedding.vector_distance(query_vector, metric="cosine")) + .limit(10) + ) + + # Compare two vector columns + query = ( + sql.select("*") + .from_("pairs") + .where(Column("vec1").vector_distance(Column("vec2"), metric="euclidean") < 0.3) + ) + +.. py:method:: Column.cosine_similarity(other_vector) + + Calculate cosine similarity (1 - cosine_distance). + + Convenience method that computes similarity instead of distance. + Returns values in range [-1, 1] where 1 = identical vectors. + + :param other_vector: Vector to compare against + :type other_vector: list[float] | Column | exp.Expression + :return: FunctionColumn expression computing ``1 - cosine_distance(self, other_vector)`` + :rtype: FunctionColumn + + **Example:** + + .. code-block:: python + + from sqlspec import sql + + query_vector = [0.5, 0.5, 0.5] + + # Find most similar documents + query = ( + sql.select("id", "title", sql.embedding.cosine_similarity(query_vector).alias("similarity")) + .from_("documents") + .order_by(sql.column("similarity").desc()) + .limit(10) + ) + +Database Compatibility +---------------------- + +Vector functions generate dialect-specific SQL: + +.. list-table:: + :header-rows: 1 + :widths: 15 25 25 35 + + * - Database + - Euclidean + - Cosine + - Inner Product + * - PostgreSQL (pgvector) + - ``<->`` operator + - ``<=>`` operator + - ``<#>`` operator + * - MySQL 9+ + - ``DISTANCE(..., 'EUCLIDEAN')`` + - ``DISTANCE(..., 'COSINE')`` + - ``DISTANCE(..., 'DOT')`` + * - Oracle 23ai+ + - ``VECTOR_DISTANCE(..., EUCLIDEAN)`` + - ``VECTOR_DISTANCE(..., COSINE)`` + - ``VECTOR_DISTANCE(..., DOT)`` + * - BigQuery + - ``EUCLIDEAN_DISTANCE(...)`` + - ``COSINE_DISTANCE(...)`` + - ``DOT_PRODUCT(...)`` + * - DuckDB (VSS extension) + - ``array_distance(...)`` + - ``array_cosine_distance(...)`` + - ``array_negative_inner_product(...)`` + * - Generic + - ``VECTOR_DISTANCE(..., 'EUCLIDEAN')`` + - ``VECTOR_DISTANCE(..., 'COSINE')`` + - ``VECTOR_DISTANCE(..., 'INNER_PRODUCT')`` + +Usage Examples +-------------- + +**Basic Similarity Search** + +.. code-block:: python + + from sqlspec import sql + + # Find documents similar to query vector + query_vector = [0.1, 0.2, 0.3] + + query = ( + sql.select("id", "title", sql.embedding.vector_distance(query_vector).alias("distance")) + .from_("documents") + .order_by("distance") + .limit(10) + ) + + # PostgreSQL generates: SELECT id, title, embedding <-> '[0.1,0.2,0.3]' AS distance ... + # MySQL generates: SELECT id, title, DISTANCE(embedding, STRING_TO_VECTOR('[0.1,0.2,0.3]'), 'EUCLIDEAN') AS distance ... + # Oracle generates: SELECT id, title, VECTOR_DISTANCE(embedding, TO_VECTOR('[0.1,0.2,0.3]'), EUCLIDEAN) AS distance ... + +**Threshold Filtering** + +.. code-block:: python + + # Find documents within distance threshold + query = ( + sql.select("*") + .from_("documents") + .where(sql.embedding.vector_distance(query_vector, metric="euclidean") < 0.5) + .order_by(sql.embedding.vector_distance(query_vector)) + ) + +**Similarity Ranking** + +.. code-block:: python + + # Rank by cosine similarity (higher = more similar) + query = ( + sql.select("id", "content", sql.embedding.cosine_similarity(query_vector).alias("score")) + .from_("articles") + .order_by(sql.column("score").desc()) + .limit(5) + ) + +**Multiple Metrics** + +.. code-block:: python + + # Compare different distance metrics in single query + query = ( + sql.select( + "id", + sql.embedding.vector_distance(query_vector, metric="euclidean").alias("l2_dist"), + sql.embedding.vector_distance(query_vector, metric="cosine").alias("cos_dist"), + sql.embedding.cosine_similarity(query_vector).alias("similarity") + ) + .from_("documents") + .limit(10) + ) + +**Combined Filters** + +.. code-block:: python + + # Vector search with additional filters + query = ( + sql.select("*") + .from_("products") + .where("category = ?") + .where("in_stock = TRUE") + .where(sql.embedding.vector_distance(query_vector) < 0.3) + .order_by(sql.embedding.vector_distance(query_vector)) + .limit(20) + ) + +Dialect-Agnostic Construction +------------------------------ + +Queries are constructed once and executed against multiple databases: + +.. code-block:: python + + from sqlspec import sql + + # Define query once + query = ( + sql.select("id", "title", sql.embedding.vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("documents") + .order_by("distance") + .limit(10) + ) + + # Execute with different adapters + pg_result = await pg_session.execute(query) # → PostgreSQL SQL with <-> operator + mysql_result = await mysql_session.execute(query) # → MySQL SQL with DISTANCE() + oracle_result = await oracle_session.execute(query) # → Oracle SQL with VECTOR_DISTANCE() + +The dialect is selected at ``build(dialect=X)`` time based on the driver, not at query construction time. + Filter Integration ================== diff --git a/sqlspec/adapters/adbc/driver.py b/sqlspec/adapters/adbc/driver.py index 4aa0e457e..8dd2e1de3 100644 --- a/sqlspec/adapters/adbc/driver.py +++ b/sqlspec/adapters/adbc/driver.py @@ -826,6 +826,10 @@ def get_adbc_statement_config(detected_dialect: str) -> StatementConfig: "type_coercion_map": type_map, } + if detected_dialect == "duckdb": + parameter_overrides["preserve_parameter_format"] = False + parameter_overrides["supported_execution_parameter_styles"] = {ParameterStyle.QMARK, ParameterStyle.NUMERIC} + if detected_dialect in {"postgres", "postgresql"}: parameter_overrides["ast_transformer"] = build_null_pruning_transform(dialect=sqlglot_dialect) diff --git a/sqlspec/builder/_base.py b/sqlspec/builder/_base.py index b77f59b7e..3613f5954 100644 --- a/sqlspec/builder/_base.py +++ b/sqlspec/builder/_base.py @@ -319,11 +319,25 @@ def _parameterize_expression(self, expression: exp.Expression) -> exp.Expression A new expression with literals replaced by parameter placeholders """ + from sqlspec.builder._vector_expressions import VectorDistance + def replacer(node: exp.Expression) -> exp.Expression: if isinstance(node, exp.Literal): if node.this in {True, False, None}: return node - param_name = self._add_parameter(node.this, context="where") + + parent = node.parent + if isinstance(parent, exp.Array) and node.find_ancestor(VectorDistance) is not None: + return node + + value = node.this + if node.is_number and isinstance(node.this, str): + try: + value = float(node.this) if "." in node.this or "e" in node.this.lower() else int(node.this) + except ValueError: + value = node.this + + param_name = self._add_parameter(value, context="where") return exp.Placeholder(this=param_name) return node diff --git a/sqlspec/builder/_column.py b/sqlspec/builder/_column.py index 17eafe19b..0b83b03fe 100644 --- a/sqlspec/builder/_column.py +++ b/sqlspec/builder/_column.py @@ -254,6 +254,122 @@ def count_all() -> "FunctionColumn": """SQL COUNT(*) function.""" return FunctionColumn(exp.Count(this=exp.Star())) + @staticmethod + def _normalize_metric(metric: str) -> str: + """Normalize and validate vector distance metric.""" + normalized_metric = metric.lower() + valid_metrics = {"euclidean", "cosine", "inner_product", "euclidean_squared"} + if normalized_metric not in valid_metrics: + msg = f"Invalid metric: {metric}. Must be one of {valid_metrics}" + raise ValueError(msg) + return normalized_metric + + def _convert_vector_value(self, value: "list[float] | Column | exp.Expression") -> "exp.Expression": + """Convert a vector input into a SQLGlot expression.""" + if isinstance(value, list): + return exp.Array(expressions=[exp.Literal.number(v) for v in value]) + if isinstance(value, Column): + return value._expression + if isinstance(value, exp.Expression): + return value + msg = f"Unsupported vector type: {type(value)}" + raise TypeError(msg) + + def vector_distance( + self, other_vector: "list[float] | Column | exp.Expression", metric: str = "euclidean" + ) -> "FunctionColumn": + """Calculate vector distance using specified metric. + + Generates dialect-specific SQL for vector distance calculation: + - PostgreSQL (pgvector): Operators <->, <=>, <#> + - MySQL 9+: DISTANCE(col, vec, 'METRIC') function + - Oracle 23ai+: VECTOR_DISTANCE(col, vec, METRIC) function + + Args: + other_vector: Vector to compare against (list, Column, or SQLGlot expression). + metric: Distance metric to use. Options: + - "euclidean": L2 distance (default) + - "cosine": Cosine distance + - "inner_product": Negative inner product + - "euclidean_squared": L2² distance (Oracle only) + + Returns: + FunctionColumn expression for use in SELECT, WHERE, ORDER BY. + + Examples: + Basic distance query with threshold: + >>> query = ( + ... sql.select("*") + ... .from_("docs") + ... .where( + ... Column("embedding").vector_distance( + ... [0.1, 0.2], metric="euclidean" + ... ) + ... < 0.5 + ... ) + ... ) + + Distance in SELECT clause with alias: + >>> query = ( + ... sql.select( + ... "id", + ... Column("embedding") + ... .vector_distance([0.1, 0.2]) + ... .as_("dist"), + ... ) + ... .from_("docs") + ... .order_by("dist") + ... ) + + Compare two vector columns: + >>> query = ( + ... sql.select("*") + ... .from_("pairs") + ... .where( + ... Column("vec1").vector_distance( + ... Column("vec2"), metric="cosine" + ... ) + ... < 0.3 + ... ) + ... ) + """ + from sqlspec.builder._vector_expressions import VectorDistance + + normalized_metric = self._normalize_metric(metric) + vec_expr = self._convert_vector_value(other_vector) + distance_expr = VectorDistance(this=self._expression, expression=vec_expr, metric=normalized_metric) + return FunctionColumn(distance_expr) + + def cosine_similarity(self, other_vector: "list[float] | Column | exp.Expression") -> "FunctionColumn": + """Calculate cosine similarity (1 - cosine_distance). + + Convenience method that computes similarity instead of distance. + Returns values in range [-1, 1] where 1 = identical vectors. + + Args: + other_vector: Vector to compare against (list, Column, or expression). + + Returns: + FunctionColumn expression: 1 - cosine_distance(self, other_vector). + + Examples: + Find most similar documents: + >>> query = ( + ... sql.select( + ... "id", + ... Column("embedding") + ... .cosine_similarity([0.1, 0.2]) + ... .as_("score"), + ... ) + ... .from_("docs") + ... .order_by(sql.column("score").desc()) + ... .limit(10) + ... ) + """ + cosine_dist = self.vector_distance(other_vector, metric="cosine") + similarity_expr = exp.Sub(this=exp.Literal.number(1), expression=exp.Paren(this=cosine_dist._expression)) # pyright: ignore[reportPrivateUsage] + return FunctionColumn(similarity_expr) + def alias(self, alias_name: str) -> exp.Expression: """Create an aliased column expression.""" return exp.Alias(this=self._expression, alias=alias_name) @@ -294,19 +410,40 @@ class FunctionColumn: __slots__ = ("_expression",) - def __init__(self, expression: exp.Expression) -> None: + def __init__(self, expression: "exp.Expression") -> None: self._expression = expression def _convert_value(self, value: Any) -> exp.Expression: """Convert a Python value to a SQLGlot expression.""" return _convert_value(value) + @property + def sqlglot_expression(self) -> "exp.Expression": + """Return underlying SQLGlot expression.""" + return self._expression + def __eq__(self, other: object) -> ColumnExpression: # type: ignore[override] return ColumnExpression(exp.EQ(this=self._expression, expression=self._convert_value(other))) def __ne__(self, other: object) -> ColumnExpression: # type: ignore[override] return ColumnExpression(exp.NEQ(this=self._expression, expression=self._convert_value(other))) + def __gt__(self, other: Any) -> ColumnExpression: + """Greater than (>).""" + return ColumnExpression(exp.GT(this=self._expression, expression=self._convert_value(other))) + + def __ge__(self, other: Any) -> ColumnExpression: + """Greater than or equal (>=).""" + return ColumnExpression(exp.GTE(this=self._expression, expression=self._convert_value(other))) + + def __lt__(self, other: Any) -> ColumnExpression: + """Less than (<).""" + return ColumnExpression(exp.LT(this=self._expression, expression=self._convert_value(other))) + + def __le__(self, other: Any) -> ColumnExpression: + """Less than or equal (<=).""" + return ColumnExpression(exp.LTE(this=self._expression, expression=self._convert_value(other))) + def like(self, pattern: str) -> ColumnExpression: return ColumnExpression(exp.Like(this=self._expression, expression=self._convert_value(pattern))) @@ -355,10 +492,26 @@ def not_any_(self, values: Iterable[Any]) -> ColumnExpression: converted_values = [self._convert_value(v) for v in values] return ColumnExpression(exp.NEQ(this=self._expression, expression=exp.Any(expressions=converted_values))) - def alias(self, alias_name: str) -> exp.Expression: + def alias(self, alias_name: str) -> "exp.Expression": """Create an aliased function expression.""" return exp.Alias(this=self._expression, alias=alias_name) + def as_(self, alias: str) -> "exp.Alias": + """Create an aliased expression using sqlglot helper.""" + return cast("exp.Alias", exp.alias_(self._expression, alias)) + + def cast(self, data_type: str) -> "FunctionColumn": + """SQL CAST() function.""" + return FunctionColumn(exp.Cast(this=self._expression, to=exp.DataType.build(data_type))) + + def asc(self) -> "exp.Ordered": + """Create an ASC ordering expression.""" + return exp.Ordered(this=self._expression, desc=False) + + def desc(self) -> "exp.Ordered": + """Create a DESC ordering expression.""" + return exp.Ordered(this=self._expression, desc=True) + def __hash__(self) -> int: """Hash based on the SQL expression.""" return hash(self._expression.sql()) diff --git a/sqlspec/builder/_select.py b/sqlspec/builder/_select.py index de1321045..f266d95a2 100644 --- a/sqlspec/builder/_select.py +++ b/sqlspec/builder/_select.py @@ -254,10 +254,19 @@ def from_(self, table: str | exp.Expression | Any, alias: str | None = None) -> elif is_expression(table): from_expr = exp.alias_(table, alias) if alias else table elif has_query_builder_parameters(table): - subquery = table.build() - sql_text = subquery.sql if hasattr(subquery, "sql") and not callable(subquery.sql) else str(subquery) - subquery_exp = exp.paren(exp.maybe_parse(sql_text, dialect=getattr(builder, "dialect", None))) - from_expr = exp.alias_(subquery_exp, alias) if alias else subquery_exp + subquery_expression = table.get_expression() + if subquery_expression is None: + msg = "Subquery builder has no expression to include in FROM clause." + raise SQLBuilderError(msg) + + subquery_copy = subquery_expression.copy() + base_builder = cast("QueryBuilder", builder) + param_mapping = base_builder._merge_cte_parameters(alias or "subquery", table.parameters) + if param_mapping: + subquery_copy = base_builder._update_placeholders_in_expression(subquery_copy, param_mapping) + + wrapped_subquery = exp.paren(subquery_copy) + from_expr = exp.alias_(wrapped_subquery, alias) if alias else wrapped_subquery else: from_expr = table builder.set_expression(select_expr.from_(from_expr, copy=False)) diff --git a/sqlspec/builder/_vector_expressions.py b/sqlspec/builder/_vector_expressions.py new file mode 100644 index 000000000..b500268d4 --- /dev/null +++ b/sqlspec/builder/_vector_expressions.py @@ -0,0 +1,240 @@ +"""Custom SQLGlot expressions for vector distance operations. + +Provides dialect-specific SQL generation for vector similarity search +across PostgreSQL (pgvector), MySQL 9+, and Oracle 23ai+. +""" + +from typing import Any + +from sqlglot import exp + +__all__ = ("VectorDistance",) + + +class VectorDistance(exp.Expression): + """Vector distance expression with dialect-specific generation. + + Generates database-specific SQL for vector distance calculations: + - PostgreSQL (pgvector): Operators <->, <=>, <#> + - MySQL 9+: DISTANCE(col, vec, 'METRIC') function + - Oracle 23ai+: VECTOR_DISTANCE(col, vec, METRIC) function + - Generic: VECTOR_DISTANCE(col, vec, 'METRIC') function + + The metric is stored as a raw string attribute (not parametrized) and drives + dialect-specific generation at SQL build time. + """ + + arg_types = {"this": True, "expression": True, "metric": False} + + def __init__(self, **args: Any) -> None: + """Initialize VectorDistance with metric stored in args.""" + metric_value = args.get("metric", "euclidean") + if isinstance(metric_value, exp.Literal): + metric_value = str(metric_value.this).lower() + elif isinstance(metric_value, exp.Identifier): + metric_value = metric_value.this.lower() + elif isinstance(metric_value, str): + metric_value = metric_value.lower() + else: + metric_value = "euclidean" + + args["metric"] = exp.Identifier(this=metric_value) + super().__init__(**args) + + @property + def left(self) -> "exp.Expression": + """Get the left operand (column).""" + result: exp.Expression = self.this + return result + + @property + def right(self) -> "exp.Expression": + """Get the right operand (vector value).""" + result: exp.Expression = self.expression + return result + + @property + def metric(self) -> str: + """Get the distance metric as raw string (not parametrized).""" + metric_expr = self.args.get("metric") + if isinstance(metric_expr, exp.Identifier): + metric_name: str = metric_expr.this + return metric_name.lower() + return "euclidean" + + def sql(self, dialect: "Any | None" = None, **opts: Any) -> str: + """Generate dialect-specific SQL. + + This overrides the default sql() method to provide custom + dialect-specific generation for vector distance operations. + + Args: + dialect: Target SQL dialect (postgres, mysql, oracle, bigquery, duckdb, etc.) + **opts: Additional SQL generation options + + Returns: + Dialect-specific SQL string + """ + dialect_name = str(dialect).lower() if dialect else "generic" + + left_sql = self.left.sql(dialect=dialect, **opts) + right_sql = self.right.sql(dialect=dialect, **opts) + metric = self.metric + + if dialect_name in {"postgres", "postgresql"}: + return self._sql_postgres(left_sql, right_sql, metric) + + if dialect_name == "mysql": + return self._sql_mysql(left_sql, right_sql, metric) + + if dialect_name == "oracle": + return self._sql_oracle(left_sql, right_sql, metric) + + if dialect_name == "bigquery": + return self._sql_bigquery(left_sql, right_sql, metric) + + if dialect_name == "duckdb": + return self._sql_duckdb(left_sql, right_sql, metric) + + return self._sql_generic(left_sql, right_sql, metric) + + def _sql_postgres(self, left: str, right: str, metric: str) -> str: + """Generate PostgreSQL pgvector operator syntax.""" + operator_map = {"euclidean": "<->", "cosine": "<=>", "inner_product": "<#>"} + + operator = operator_map.get(metric) + if operator: + return f"{left} {operator} {right}" + + return self._sql_generic(left, right, metric) + + def _sql_mysql(self, left: str, right: str, metric: str) -> str: + """Generate MySQL DISTANCE function syntax.""" + metric_map = {"euclidean": "EUCLIDEAN", "cosine": "COSINE", "inner_product": "DOT"} + + mysql_metric = metric_map.get(metric, "EUCLIDEAN") + + if ("ARRAY" in right or "[" in right) and "STRING_TO_VECTOR" not in right: + right = f"STRING_TO_VECTOR({right})" + + return f"DISTANCE({left}, {right}, '{mysql_metric}')" + + def _sql_oracle(self, left: str, right: str, metric: str) -> str: + """Generate Oracle VECTOR_DISTANCE function syntax.""" + metric_map = { + "euclidean": "EUCLIDEAN", + "cosine": "COSINE", + "inner_product": "DOT", + "euclidean_squared": "EUCLIDEAN_SQUARED", + } + + oracle_metric = metric_map.get(metric, "EUCLIDEAN") + + if isinstance(self.expression, exp.Array): + values = [] + for expr in self.expression.expressions: + if isinstance(expr, exp.Literal): + values.append(str(expr.this)) + else: # pragma: no cover - defensive + values.append(expr.sql(dialect="oracle")) + right = f"TO_VECTOR('[{', '.join(values)}]')" + elif ("ARRAY" in right or "[" in right) and "TO_VECTOR" not in right: + right = f"TO_VECTOR({right})" + + return f"VECTOR_DISTANCE({left}, {right}, {oracle_metric})" + + def _sql_bigquery(self, left: str, right: str, metric: str) -> str: + """Generate BigQuery vector distance function syntax.""" + function_map = {"euclidean": "EUCLIDEAN_DISTANCE", "cosine": "COSINE_DISTANCE", "inner_product": "DOT_PRODUCT"} + + function_name = function_map.get(metric) + if function_name: + return f"{function_name}({left}, {right})" + + return self._sql_generic(left, right, metric) + + def _sql_duckdb(self, left: str, right: str, metric: str) -> str: + """Generate DuckDB VSS extension function syntax. + + DuckDB's VSS extension provides: + - array_distance(): L2 squared distance (euclidean) + - array_cosine_distance(): Cosine distance (1 - cosine_similarity) + - array_negative_inner_product(): Negative inner product + + Note: Array literals must be cast to DOUBLE[] since DuckDB infers + decimal literals as DECIMAL type, but VSS functions require DOUBLE[]. + """ + function_map = { + "euclidean": "array_distance", + "cosine": "array_cosine_distance", + "inner_product": "array_negative_inner_product", + } + target_type = "DOUBLE[]" + if isinstance(self.expression, exp.Array) and self.expression.expressions: + target_type = f"DOUBLE[{len(self.expression.expressions)}]" + + function_name = function_map.get(metric) + if function_name: + right_cast = f"CAST({right} AS {target_type})" + return f"{function_name}({left}, {right_cast})" + + return self._sql_generic(left, right, metric) + + def _sql_generic(self, left: str, right: str, metric: str) -> str: + """Generate generic VECTOR_DISTANCE function syntax.""" + return f"VECTOR_DISTANCE({left}, {right}, '{metric.upper()}')" + + +def _register_with_sqlglot() -> None: + """Register VectorDistance with SQLGlot's generator dispatch system.""" + from sqlglot.dialects.bigquery import BigQuery + from sqlglot.dialects.duckdb import DuckDB + from sqlglot.dialects.mysql import MySQL + from sqlglot.dialects.oracle import Oracle + from sqlglot.dialects.postgres import Postgres + from sqlglot.generator import Generator + + def vector_distance_sql_base(generator: "Generator", expression: "VectorDistance") -> str: + """Base generator for VectorDistance expressions.""" + return expression._sql_generic( # pyright: ignore[reportPrivateUsage] + generator.sql(expression.left), generator.sql(expression.right), expression.metric + ) + + def vector_distance_sql_postgres(generator: "Generator", expression: "VectorDistance") -> str: + """PostgreSQL generator for VectorDistance expressions.""" + return expression._sql_postgres( # pyright: ignore[reportPrivateUsage] + generator.sql(expression.left), generator.sql(expression.right), expression.metric + ) + + def vector_distance_sql_mysql(generator: "Generator", expression: "VectorDistance") -> str: + """MySQL generator for VectorDistance expressions.""" + return expression._sql_mysql(generator.sql(expression.left), generator.sql(expression.right), expression.metric) # pyright: ignore[reportPrivateUsage] + + def vector_distance_sql_oracle(generator: "Generator", expression: "VectorDistance") -> str: + """Oracle generator for VectorDistance expressions.""" + return expression._sql_oracle( # pyright: ignore[reportPrivateUsage] + generator.sql(expression.left), generator.sql(expression.right), expression.metric + ) + + def vector_distance_sql_bigquery(generator: "Generator", expression: "VectorDistance") -> str: + """BigQuery generator for VectorDistance expressions.""" + return expression._sql_bigquery( # pyright: ignore[reportPrivateUsage] + generator.sql(expression.left), generator.sql(expression.right), expression.metric + ) + + def vector_distance_sql_duckdb(generator: "Generator", expression: "VectorDistance") -> str: + """DuckDB generator for VectorDistance expressions.""" + return expression._sql_duckdb( # pyright: ignore[reportPrivateUsage] + generator.sql(expression.left), generator.sql(expression.right), expression.metric + ) + + Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_base + + Postgres.Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_postgres + MySQL.Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_mysql + Oracle.Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_oracle + BigQuery.Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_bigquery + DuckDB.Generator.TRANSFORMS[VectorDistance] = vector_distance_sql_duckdb + + +_register_with_sqlglot() diff --git a/tests/integration/test_adapters/test_adbc/test_vector_functions.py b/tests/integration/test_adapters/test_adbc/test_vector_functions.py new file mode 100644 index 000000000..d487ff2e3 --- /dev/null +++ b/tests/integration/test_adapters/test_adbc/test_vector_functions.py @@ -0,0 +1,318 @@ +"""Integration tests for vector distance functions with ADBC drivers. + +Tests actual execution of vector distance queries using ADBC with multiple backends: +- PostgreSQL with pgvector extension +- DuckDB with native array functions +""" + +from collections.abc import Generator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.adbc import AdbcDriver +from sqlspec.builder import Column +from sqlspec.typing import PGVECTOR_INSTALLED + +# PostgreSQL ADBC tests +pytestmark_postgres = [ + pytest.mark.xdist_group("postgres"), + pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed"), +] + + +@pytest.fixture +def adbc_postgres_vector_session(adbc_sync_driver: AdbcDriver) -> Generator[AdbcDriver, None, None]: + """Create ADBC PostgreSQL session with pgvector extension and test table.""" + try: + adbc_sync_driver.execute_script("CREATE EXTENSION IF NOT EXISTS vector") + except Exception as e: + pytest.skip(f"pgvector extension not available on server: {e}") + + try: + adbc_sync_driver.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs_adbc_pg ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(3) + ) + """ + ) + + adbc_sync_driver.execute_script("TRUNCATE TABLE vector_docs_adbc_pg") + + adbc_sync_driver.execute( + "INSERT INTO vector_docs_adbc_pg (content, embedding) VALUES (?, ?)", ("doc1", "[0.1, 0.2, 0.3]") + ) + adbc_sync_driver.execute( + "INSERT INTO vector_docs_adbc_pg (content, embedding) VALUES (?, ?)", ("doc2", "[0.4, 0.5, 0.6]") + ) + adbc_sync_driver.execute( + "INSERT INTO vector_docs_adbc_pg (content, embedding) VALUES (?, ?)", ("doc3", "[0.7, 0.8, 0.9]") + ) + + yield adbc_sync_driver + finally: + adbc_sync_driver.execute_script("DROP TABLE IF EXISTS vector_docs_adbc_pg") + + +@pytest.mark.postgres +@pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed") +def test_adbc_postgres_euclidean_distance_execution(adbc_postgres_vector_session: AdbcDriver) -> None: + """Test ADBC PostgreSQL euclidean distance operator execution.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_adbc_pg") + .order_by("distance") + ) + + result = adbc_postgres_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +@pytest.mark.postgres +@pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed") +def test_adbc_postgres_cosine_distance_execution(adbc_postgres_vector_session: AdbcDriver) -> None: + """Test ADBC PostgreSQL cosine distance operator execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_adbc_pg") + .order_by("distance") + ) + + result = adbc_postgres_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +@pytest.mark.postgres +@pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed") +def test_adbc_postgres_cosine_similarity_execution(adbc_postgres_vector_session: AdbcDriver) -> None: + """Test ADBC PostgreSQL cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_adbc_pg") + .order_by(sql.column("score").desc()) + ) + + result = adbc_postgres_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +# DuckDB ADBC tests +pytestmark_duckdb = [pytest.mark.xdist_group("duckdb")] + + +@pytest.fixture +def adbc_duckdb_vector_session(adbc_duckdb_driver: AdbcDriver) -> Generator[AdbcDriver, None, None]: + """Create ADBC DuckDB session with VSS extension and test table.""" + try: + # Install and load VSS extension for vector distance functions + adbc_duckdb_driver.execute_script("INSTALL vss") + adbc_duckdb_driver.execute_script("LOAD vss") + except Exception as e: + pytest.skip(f"DuckDB VSS extension not available: {e}") + + try: + adbc_duckdb_driver.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs_adbc_duckdb ( + id INTEGER PRIMARY KEY, + content VARCHAR NOT NULL, + embedding DOUBLE[3] + ) + """ + ) + + adbc_duckdb_driver.execute( + "INSERT INTO vector_docs_adbc_duckdb (id, content, embedding) VALUES (?, ?, ?)", + (1, "doc1", [0.1, 0.2, 0.3]), + ) + adbc_duckdb_driver.execute( + "INSERT INTO vector_docs_adbc_duckdb (id, content, embedding) VALUES (?, ?, ?)", + (2, "doc2", [0.4, 0.5, 0.6]), + ) + adbc_duckdb_driver.execute( + "INSERT INTO vector_docs_adbc_duckdb (id, content, embedding) VALUES (?, ?, ?)", + (3, "doc3", [0.7, 0.8, 0.9]), + ) + + yield adbc_duckdb_driver + finally: + adbc_duckdb_driver.execute_script("DROP TABLE IF EXISTS vector_docs_adbc_duckdb") + + +@pytest.mark.duckdb +def test_adbc_duckdb_euclidean_distance_execution(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test ADBC DuckDB euclidean distance array function execution.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_adbc_duckdb") + .order_by("distance") + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +@pytest.mark.duckdb +def test_adbc_duckdb_euclidean_distance_threshold(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test ADBC DuckDB euclidean distance with threshold filter.""" + query = ( + sql.select("content") + .from_("vector_docs_adbc_duckdb") + .where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +@pytest.mark.duckdb +def test_adbc_duckdb_cosine_distance_execution(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test ADBC DuckDB cosine distance array function execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_adbc_duckdb") + .order_by("distance") + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +@pytest.mark.duckdb +def test_adbc_duckdb_inner_product_execution(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test ADBC DuckDB inner product array function execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs_adbc_duckdb") + .order_by("distance") + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + + +@pytest.mark.duckdb +def test_adbc_duckdb_cosine_similarity_execution(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test ADBC DuckDB cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_adbc_duckdb") + .order_by(sql.column("score").desc()) + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +@pytest.mark.duckdb +def test_adbc_duckdb_similarity_top_k_results(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test top-K similarity search with ADBC DuckDB.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_adbc_duckdb") + .order_by(sql.column("score").desc()) + .limit(2) + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +@pytest.mark.duckdb +def test_adbc_duckdb_multiple_distance_metrics(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test multiple distance metrics in same query with ADBC DuckDB.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs_adbc_duckdb") + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +@pytest.mark.duckdb +def test_adbc_duckdb_distance_with_null_vectors(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test vector distance handles NULL vectors correctly with ADBC DuckDB.""" + adbc_duckdb_vector_session.execute( + "INSERT INTO vector_docs_adbc_duckdb (id, content, embedding) VALUES (?, ?, ?)", (4, "doc_null", None) + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_adbc_duckdb") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +@pytest.mark.duckdb +def test_adbc_duckdb_combined_filters_and_distance(adbc_duckdb_vector_session: AdbcDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_adbc_duckdb") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = adbc_duckdb_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] diff --git a/tests/integration/test_adapters/test_asyncpg/test_vector_functions.py b/tests/integration/test_adapters/test_asyncpg/test_vector_functions.py new file mode 100644 index 000000000..dae3e1dc0 --- /dev/null +++ b/tests/integration/test_adapters/test_asyncpg/test_vector_functions.py @@ -0,0 +1,280 @@ +"""Integration tests for vector distance functions with PostgreSQL + pgvector. + +Tests actual execution of vector distance queries using PostgreSQL pgvector extension. +""" + +from collections.abc import AsyncGenerator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.asyncpg import AsyncpgDriver +from sqlspec.builder import Column +from sqlspec.typing import PGVECTOR_INSTALLED + +pytestmark = [ + pytest.mark.xdist_group("postgres"), + pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed"), +] + + +@pytest.fixture +async def asyncpg_vector_session(asyncpg_async_driver: AsyncpgDriver) -> AsyncGenerator[AsyncpgDriver, None]: + """Create asyncpg session with pgvector extension and test table.""" + try: + await asyncpg_async_driver.execute_script("CREATE EXTENSION IF NOT EXISTS vector") + except Exception as e: + pytest.skip(f"pgvector extension not available on server: {e}") + + try: + await asyncpg_async_driver.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(3) + ) + """ + ) + + await asyncpg_async_driver.execute_script("TRUNCATE TABLE vector_docs") + + await asyncpg_async_driver.execute( + "INSERT INTO vector_docs (content, embedding) VALUES ($1, $2)", ("doc1", "[0.1, 0.2, 0.3]") + ) + await asyncpg_async_driver.execute( + "INSERT INTO vector_docs (content, embedding) VALUES ($1, $2)", ("doc2", "[0.4, 0.5, 0.6]") + ) + await asyncpg_async_driver.execute( + "INSERT INTO vector_docs (content, embedding) VALUES ($1, $2)", ("doc3", "[0.7, 0.8, 0.9]") + ) + + yield asyncpg_async_driver + finally: + await asyncpg_async_driver.execute_script("DROP TABLE IF EXISTS vector_docs") + + +async def test_postgres_euclidean_distance_execution(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test PostgreSQL euclidean distance operator execution.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +async def test_postgres_euclidean_distance_threshold(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test PostgreSQL euclidean distance with threshold filter.""" + query = ( + sql.select("content").from_("vector_docs").where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +async def test_postgres_cosine_distance_execution(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test PostgreSQL cosine distance operator execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs") + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +async def test_postgres_inner_product_execution(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test PostgreSQL inner product operator execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs") + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + + +async def test_postgres_cosine_similarity_execution(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test PostgreSQL cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs") + .order_by(sql.column("score").desc()) + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +async def test_postgres_similarity_top_k_results(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test top-K similarity search.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs") + .order_by(sql.column("score").desc()) + .limit(2) + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +async def test_postgres_multiple_distance_metrics(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test multiple distance metrics in same query.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs") + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +async def test_postgres_distance_with_null_vectors(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test vector distance handles NULL vectors correctly.""" + await asyncpg_vector_session.execute( + "INSERT INTO vector_docs (content, embedding) VALUES ($1, NULL)", ("doc_null",) + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +async def test_postgres_combined_filters_and_distance(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +async def test_postgres_distance_in_having_clause(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test vector distance in HAVING clause with GROUP BY.""" + await asyncpg_vector_session.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_groups ( + id SERIAL PRIMARY KEY, + category TEXT NOT NULL, + embedding vector(3) + ) + """ + ) + + try: + await asyncpg_vector_session.execute( + "INSERT INTO vector_groups (category, embedding) VALUES ($1, $2)", ("A", "[0.1, 0.2, 0.3]") + ) + await asyncpg_vector_session.execute( + "INSERT INTO vector_groups (category, embedding) VALUES ($1, $2)", ("A", "[0.2, 0.3, 0.4]") + ) + await asyncpg_vector_session.execute( + "INSERT INTO vector_groups (category, embedding) VALUES ($1, $2)", ("B", "[0.7, 0.8, 0.9]") + ) + + query = sql.select("category", Column.count_all().alias("count")).from_("vector_groups").group_by("category") + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 2 + + category_counts = {row["category"]: row["count"] for row in result} + assert category_counts["A"] == 2 + assert category_counts["B"] == 1 + finally: + await asyncpg_vector_session.execute_script("DROP TABLE IF EXISTS vector_groups") + + +async def test_postgres_distance_with_subquery(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test vector distance in subquery.""" + subquery = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .where(Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) + ) + query = sql.select("*").from_(subquery, alias="subq").where(Column("distance") < 0.5) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) >= 1 + + +async def test_postgres_similarity_score_range(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs" + ) + + result = await asyncpg_vector_session.execute(query) + + for row in result: + score = row["score"] + assert -1 <= score <= 1 + + +async def test_postgres_distance_with_cast(asyncpg_vector_session: AsyncpgDriver) -> None: + """Test vector distance with explicit type casting.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).cast("FLOAT").alias("distance")) + .from_("vector_docs") + .order_by("distance") + ) + + result = await asyncpg_vector_session.execute(query) + + assert len(result) == 3 + assert isinstance(result[0]["distance"], float) diff --git a/tests/integration/test_adapters/test_bigquery/test_vector_functions.py b/tests/integration/test_adapters/test_bigquery/test_vector_functions.py new file mode 100644 index 000000000..26d44ada3 --- /dev/null +++ b/tests/integration/test_adapters/test_bigquery/test_vector_functions.py @@ -0,0 +1,211 @@ +"""Integration tests for vector distance functions with Google BigQuery. + +Tests actual execution of vector distance queries using BigQuery's native +distance functions (EUCLIDEAN_DISTANCE, COSINE_DISTANCE, DOT_PRODUCT). +""" + +import contextlib +from collections.abc import Generator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.bigquery import BigQueryDriver +from sqlspec.builder import Column + +pytestmark = [pytest.mark.xdist_group("bigquery")] + + +@pytest.fixture +def bigquery_vector_session(bigquery_session: BigQueryDriver) -> Generator[BigQueryDriver, None, None]: + """Create BigQuery session with test table containing array columns.""" + table_id = "vector_docs_bigquery" + + try: + try: + bigquery_session.execute("SELECT EUCLIDEAN_DISTANCE([0.1, 0.2], [0.1, 0.2]) AS ok") + except Exception as exc: # pragma: no cover - guard for emulator limitations + pytest.skip(f"BigQuery vector distance functions unavailable: {exc}") + + bigquery_session.execute_script( + f""" + CREATE OR REPLACE TABLE {table_id} ( + id INT64, + content STRING, + embedding ARRAY + ) + """ + ) + + bigquery_session.execute(f"INSERT INTO {table_id} (id, content, embedding) VALUES (1, 'doc1', [0.1, 0.2, 0.3])") + bigquery_session.execute(f"INSERT INTO {table_id} (id, content, embedding) VALUES (2, 'doc2', [0.4, 0.5, 0.6])") + bigquery_session.execute(f"INSERT INTO {table_id} (id, content, embedding) VALUES (3, 'doc3', [0.7, 0.8, 0.9])") + + yield bigquery_session + finally: + with contextlib.suppress(Exception): + bigquery_session.execute_script(f"DROP TABLE IF EXISTS {table_id}") + + +def test_bigquery_euclidean_distance_execution(bigquery_vector_session: BigQueryDriver) -> None: + """Test BigQuery EUCLIDEAN_DISTANCE function execution.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_bigquery") + .order_by("distance") + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +def test_bigquery_euclidean_distance_threshold(bigquery_vector_session: BigQueryDriver) -> None: + """Test BigQuery euclidean distance with threshold filter.""" + query = ( + sql.select("content") + .from_("vector_docs_bigquery") + .where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +def test_bigquery_cosine_distance_execution(bigquery_vector_session: BigQueryDriver) -> None: + """Test BigQuery COSINE_DISTANCE function execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_bigquery") + .order_by("distance") + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +def test_bigquery_inner_product_execution(bigquery_vector_session: BigQueryDriver) -> None: + """Test BigQuery DOT_PRODUCT function execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs_bigquery") + .order_by("distance") + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + + +def test_bigquery_cosine_similarity_execution(bigquery_vector_session: BigQueryDriver) -> None: + """Test BigQuery cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_bigquery") + .order_by(sql.column("score").desc()) + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +def test_bigquery_similarity_top_k_results(bigquery_vector_session: BigQueryDriver) -> None: + """Test top-K similarity search.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_bigquery") + .order_by(sql.column("score").desc()) + .limit(2) + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +def test_bigquery_multiple_distance_metrics(bigquery_vector_session: BigQueryDriver) -> None: + """Test multiple distance metrics in same query.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs_bigquery") + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +def test_bigquery_distance_with_null_vectors(bigquery_vector_session: BigQueryDriver) -> None: + """Test vector distance handles NULL vectors correctly.""" + bigquery_vector_session.execute( + "INSERT INTO vector_docs_bigquery (id, content, embedding) VALUES (4, 'doc_null', NULL)" + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_bigquery") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +def test_bigquery_combined_filters_and_distance(bigquery_vector_session: BigQueryDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_bigquery") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = bigquery_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +def test_bigquery_similarity_score_range(bigquery_vector_session: BigQueryDriver) -> None: + """Test cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs_bigquery" + ) + + result = bigquery_vector_session.execute(query) + + for row in result: + score = row["score"] + assert -1 <= score <= 1 diff --git a/tests/integration/test_adapters/test_duckdb/test_vector_functions.py b/tests/integration/test_adapters/test_duckdb/test_vector_functions.py new file mode 100644 index 000000000..0acb51660 --- /dev/null +++ b/tests/integration/test_adapters/test_duckdb/test_vector_functions.py @@ -0,0 +1,280 @@ +"""Integration tests for vector distance functions with DuckDB. + +Tests actual execution of vector distance queries using DuckDB array functions. +""" + +from collections.abc import Generator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.duckdb import DuckDBDriver +from sqlspec.builder import Column + +pytestmark = pytest.mark.xdist_group("duckdb") + + +@pytest.fixture +def duckdb_vector_session(duckdb_basic_session: DuckDBDriver) -> Generator[DuckDBDriver, None, None]: + """Create DuckDB session with VSS extension and vector test data.""" + try: + # Install and load VSS extension for vector distance functions + duckdb_basic_session.execute_script("INSTALL vss") + duckdb_basic_session.execute_script("LOAD vss") + except Exception as e: + pytest.skip(f"DuckDB VSS extension not available: {e}") + + try: + duckdb_basic_session.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs ( + id INTEGER PRIMARY KEY, + content TEXT NOT NULL, + embedding DOUBLE[3] + ) + """ + ) + + duckdb_basic_session.execute_script("DELETE FROM vector_docs") + + duckdb_basic_session.execute( + "INSERT INTO vector_docs (id, content, embedding) VALUES (?, ?, ?)", (1, "doc1", [0.1, 0.2, 0.3]) + ) + duckdb_basic_session.execute( + "INSERT INTO vector_docs (id, content, embedding) VALUES (?, ?, ?)", (2, "doc2", [0.4, 0.5, 0.6]) + ) + duckdb_basic_session.execute( + "INSERT INTO vector_docs (id, content, embedding) VALUES (?, ?, ?)", (3, "doc3", [0.7, 0.8, 0.9]) + ) + + yield duckdb_basic_session + finally: + duckdb_basic_session.execute_script("DROP TABLE IF EXISTS vector_docs") + + +def test_duckdb_euclidean_distance_execution(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB euclidean distance using array functions.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +def test_duckdb_euclidean_distance_threshold(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB euclidean distance with threshold filter.""" + query = sql.select("content").from_("vector_docs").where(Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.1) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +def test_duckdb_cosine_distance_execution(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB cosine distance using array functions.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance")) + .from_("vector_docs") + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +def test_duckdb_inner_product_execution(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB inner product using negative dot product.""" + query = ( + sql.select( + "content", Column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance") + ) + .from_("vector_docs") + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + + +def test_duckdb_cosine_similarity_execution(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB cosine similarity calculation.""" + query = ( + sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs") + .order_by(Column("score").desc()) + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +def test_duckdb_similarity_top_k_results(duckdb_vector_session: DuckDBDriver) -> None: + """Test top-K similarity search with DuckDB.""" + query = ( + sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs") + .order_by(Column("score").desc()) + .limit(2) + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +def test_duckdb_multiple_distance_metrics(duckdb_vector_session: DuckDBDriver) -> None: + """Test multiple distance metrics in same DuckDB query.""" + query = sql.select( + "content", + Column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + Column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs") + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +def test_duckdb_distance_with_null_vectors(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB vector distance handles NULL vectors correctly.""" + duckdb_vector_session.execute( + "INSERT INTO vector_docs (id, content, embedding) VALUES (?, ?, ?)", (4, "doc_null", None) + ) + + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .where(Column("embedding").is_not_null()) + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +def test_duckdb_combined_filters_and_distance(duckdb_vector_session: DuckDBDriver) -> None: + """Test combining distance threshold with other filters in DuckDB.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +def test_duckdb_similarity_score_range(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs" + ) + + result = duckdb_vector_session.execute(query) + + for row in result: + score = row["score"] + assert -1 <= score <= 1 + + +def test_duckdb_distance_zero_vector(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB distance calculation with zero vector.""" + duckdb_vector_session.execute( + "INSERT INTO vector_docs (id, content, embedding) VALUES (?, ?, ?)", (5, "zero_vec", [0.0, 0.0, 0.0]) + ) + + query = ( + sql.select("content", Column("embedding").vector_distance([0.0, 0.0, 0.0]).alias("distance")) + .from_("vector_docs") + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 4 + assert result[0]["content"] == "zero_vec" + assert result[0]["distance"] == 0.0 + + +def test_duckdb_large_vectors(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB distance with larger dimensional vectors.""" + duckdb_vector_session.execute_script( + """ + CREATE TABLE IF NOT EXISTS large_vectors ( + id INTEGER PRIMARY KEY, + content TEXT NOT NULL, + embedding DOUBLE[10] + ) + """ + ) + + try: + duckdb_vector_session.execute( + "INSERT INTO large_vectors (id, content, embedding) VALUES (?, ?, ?)", (1, "large1", [0.1] * 10) + ) + duckdb_vector_session.execute( + "INSERT INTO large_vectors (id, content, embedding) VALUES (?, ?, ?)", (2, "large2", [0.5] * 10) + ) + + query = ( + sql.select("content", Column("embedding").vector_distance([0.1] * 10).alias("distance")) + .from_("large_vectors") + .order_by("distance") + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "large1" + assert result[0]["distance"] < result[1]["distance"] + finally: + duckdb_vector_session.execute_script("DROP TABLE IF EXISTS large_vectors") + + +def test_duckdb_distance_with_aggregation(duckdb_vector_session: DuckDBDriver) -> None: + """Test DuckDB vector distance with aggregation functions.""" + subquery = sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")).from_( + "vector_docs" + ) + query = sql.select("MIN(distance) AS min_distance", "MAX(distance) AS max_distance").from_( + subquery, alias="distances" + ) + + result = duckdb_vector_session.execute(query) + + assert len(result) == 1 + assert "min_distance" in result[0] + assert "max_distance" in result[0] + assert result[0]["min_distance"] < result[0]["max_distance"] diff --git a/tests/integration/test_adapters/test_oracledb/test_vector_functions.py b/tests/integration/test_adapters/test_oracledb/test_vector_functions.py new file mode 100644 index 000000000..69368763c --- /dev/null +++ b/tests/integration/test_adapters/test_oracledb/test_vector_functions.py @@ -0,0 +1,233 @@ +"""Integration tests for vector distance functions with Oracle 23ai+ VECTOR support. + +Tests actual execution of vector distance queries using Oracle Database 23ai+ +VECTOR_DISTANCE function with various distance metrics. +""" + +import math +from collections.abc import AsyncGenerator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.oracledb import OracleAsyncDriver +from sqlspec.builder import Column + +pytestmark = [pytest.mark.xdist_group("oracle")] + + +@pytest.fixture +async def oracle_vector_session(oracle_async_session: OracleAsyncDriver) -> AsyncGenerator[OracleAsyncDriver, None]: + """Create Oracle session with VECTOR support and test table.""" + try: + await oracle_async_session.execute_script( + """ + CREATE TABLE vector_docs_oracle ( + id NUMBER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + content VARCHAR2(100) NOT NULL, + embedding VECTOR(3, FLOAT32) + ) + """ + ) + + await oracle_async_session.execute( + "INSERT INTO vector_docs_oracle (content, embedding) VALUES (:1, :2)", ("doc1", "[0.1, 0.2, 0.3]") + ) + await oracle_async_session.execute( + "INSERT INTO vector_docs_oracle (content, embedding) VALUES (:1, :2)", ("doc2", "[0.4, 0.5, 0.6]") + ) + await oracle_async_session.execute( + "INSERT INTO vector_docs_oracle (content, embedding) VALUES (:1, :2)", ("doc3", "[0.7, 0.8, 0.9]") + ) + + yield oracle_async_session + finally: + await oracle_async_session.execute_script("DROP TABLE vector_docs_oracle") + + +async def test_oracle_euclidean_distance_execution(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle VECTOR_DISTANCE euclidean metric execution.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_oracle") + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +async def test_oracle_euclidean_distance_threshold(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle euclidean distance with threshold filter.""" + query = ( + sql.select("content") + .from_("vector_docs_oracle") + .where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +async def test_oracle_cosine_distance_execution(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle VECTOR_DISTANCE cosine metric execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_oracle") + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +async def test_oracle_inner_product_execution(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle VECTOR_DISTANCE inner_product (DOT) metric execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs_oracle") + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + + +async def test_oracle_euclidean_squared_metric(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle-specific EUCLIDEAN_SQUARED metric.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean_squared").alias("distance"), + ) + .from_("vector_docs_oracle") + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +async def test_oracle_cosine_similarity_execution(oracle_vector_session: OracleAsyncDriver) -> None: + """Test Oracle cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_oracle") + .order_by(sql.column("score").desc()) + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +async def test_oracle_similarity_top_k_results(oracle_vector_session: OracleAsyncDriver) -> None: + """Test top-K similarity search.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_oracle") + .order_by(sql.column("score").desc()) + ) + query = query.limit(2) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +async def test_oracle_multiple_distance_metrics(oracle_vector_session: OracleAsyncDriver) -> None: + """Test multiple distance metrics in same query.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean_squared").alias("euclidean_sq"), + ).from_("vector_docs_oracle") + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert "euclidean_sq" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + assert row["euclidean_sq"] is not None + + +async def test_oracle_distance_with_null_vectors(oracle_vector_session: OracleAsyncDriver) -> None: + """Test vector distance handles NULL vectors correctly.""" + await oracle_vector_session.execute( + "INSERT INTO vector_docs_oracle (content, embedding) VALUES (:1, NULL)", ("doc_null",) + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_oracle") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +async def test_oracle_combined_filters_and_distance(oracle_vector_session: OracleAsyncDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_oracle") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = await oracle_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +async def test_oracle_similarity_score_range(oracle_vector_session: OracleAsyncDriver) -> None: + """Test cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs_oracle" + ) + + result = await oracle_vector_session.execute(query) + + for row in result: + score = row["score"] + assert ( + -1 <= score <= 1 + or math.isclose(score, 1.0, rel_tol=1e-9, abs_tol=1e-9) + or math.isclose(score, -1.0, rel_tol=1e-9, abs_tol=1e-9) + ) diff --git a/tests/integration/test_adapters/test_psqlpy/test_vector_functions.py b/tests/integration/test_adapters/test_psqlpy/test_vector_functions.py new file mode 100644 index 000000000..eafa979be --- /dev/null +++ b/tests/integration/test_adapters/test_psqlpy/test_vector_functions.py @@ -0,0 +1,233 @@ +"""Integration tests for vector distance functions with Psqlpy + pgvector. + +Tests actual execution of vector distance queries using PostgreSQL pgvector extension +with the Rust-based Psqlpy driver. +""" + +from collections.abc import AsyncGenerator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.psqlpy import PsqlpyDriver +from sqlspec.builder import Column +from sqlspec.typing import PGVECTOR_INSTALLED + +pytestmark = [ + pytest.mark.xdist_group("postgres"), + pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed"), +] + + +@pytest.fixture +async def psqlpy_vector_session(psqlpy_driver: PsqlpyDriver) -> AsyncGenerator[PsqlpyDriver, None]: + """Create psqlpy session with pgvector extension and test table.""" + try: + await psqlpy_driver.execute_script("CREATE EXTENSION IF NOT EXISTS vector") + except Exception as e: + pytest.skip(f"pgvector extension not available on server: {e}") + + try: + await psqlpy_driver.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs_psqlpy ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(3) + ) + """ + ) + + await psqlpy_driver.execute_script("TRUNCATE TABLE vector_docs_psqlpy") + + await psqlpy_driver.execute( + "INSERT INTO vector_docs_psqlpy (content, embedding) VALUES ($1, $2)", ("doc1", "[0.1, 0.2, 0.3]") + ) + await psqlpy_driver.execute( + "INSERT INTO vector_docs_psqlpy (content, embedding) VALUES ($1, $2)", ("doc2", "[0.4, 0.5, 0.6]") + ) + await psqlpy_driver.execute( + "INSERT INTO vector_docs_psqlpy (content, embedding) VALUES ($1, $2)", ("doc3", "[0.7, 0.8, 0.9]") + ) + + yield psqlpy_driver + finally: + await psqlpy_driver.execute_script("DROP TABLE IF EXISTS vector_docs_psqlpy") + + +async def test_psqlpy_euclidean_distance_execution(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test PostgreSQL euclidean distance operator execution with Psqlpy.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psqlpy") + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +async def test_psqlpy_euclidean_distance_threshold(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test PostgreSQL euclidean distance with threshold filter.""" + query = ( + sql.select("content") + .from_("vector_docs_psqlpy") + .where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +async def test_psqlpy_cosine_distance_execution(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test PostgreSQL cosine distance operator execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_psqlpy") + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +async def test_psqlpy_inner_product_execution(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test PostgreSQL inner product operator execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs_psqlpy") + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + + +async def test_psqlpy_cosine_similarity_execution(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test PostgreSQL cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_psqlpy") + .order_by(sql.column("score").desc()) + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +async def test_psqlpy_similarity_top_k_results(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test top-K similarity search.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_psqlpy") + .order_by(sql.column("score").desc()) + .limit(2) + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +async def test_psqlpy_multiple_distance_metrics(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test multiple distance metrics in same query.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs_psqlpy") + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +async def test_psqlpy_distance_with_null_vectors(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test vector distance handles NULL vectors correctly.""" + await psqlpy_vector_session.execute( + "INSERT INTO vector_docs_psqlpy (content, embedding) VALUES ($1, NULL)", ("doc_null",) + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psqlpy") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +async def test_psqlpy_combined_filters_and_distance(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psqlpy") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +async def test_psqlpy_similarity_score_range(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs_psqlpy" + ) + + result = await psqlpy_vector_session.execute(query) + + for row in result: + score = row["score"] + assert -1 <= score <= 1 + + +async def test_psqlpy_distance_with_cast(psqlpy_vector_session: PsqlpyDriver) -> None: + """Test vector distance with explicit type casting.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).cast("FLOAT").alias("distance")) + .from_("vector_docs_psqlpy") + .order_by("distance") + ) + + result = await psqlpy_vector_session.execute(query) + + assert len(result) == 3 + assert isinstance(result[0]["distance"], float) diff --git a/tests/integration/test_adapters/test_psycopg/test_vector_functions.py b/tests/integration/test_adapters/test_psycopg/test_vector_functions.py new file mode 100644 index 000000000..00ed649fa --- /dev/null +++ b/tests/integration/test_adapters/test_psycopg/test_vector_functions.py @@ -0,0 +1,234 @@ +"""Integration tests for vector distance functions with Psycopg + pgvector. + +Tests actual execution of vector distance queries using PostgreSQL pgvector extension +with the synchronous Psycopg driver. +""" + +from collections.abc import Generator + +import pytest + +from sqlspec import sql +from sqlspec.adapters.psycopg import PsycopgSyncConfig, PsycopgSyncDriver +from sqlspec.builder import Column +from sqlspec.typing import PGVECTOR_INSTALLED + +pytestmark = [ + pytest.mark.xdist_group("postgres"), + pytest.mark.skipif(not PGVECTOR_INSTALLED, reason="pgvector not installed"), +] + + +@pytest.fixture +def psycopg_vector_session(psycopg_sync_config: PsycopgSyncConfig) -> Generator[PsycopgSyncDriver, None, None]: + """Create psycopg session with pgvector extension and test table.""" + with psycopg_sync_config.provide_session() as session: + try: + session.execute_script("CREATE EXTENSION IF NOT EXISTS vector") + except Exception as e: + pytest.skip(f"pgvector extension not available on server: {e}") + + try: + session.execute_script( + """ + CREATE TABLE IF NOT EXISTS vector_docs_psycopg ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(3) + ) + """ + ) + + session.execute_script("TRUNCATE TABLE vector_docs_psycopg") + + session.execute( + "INSERT INTO vector_docs_psycopg (content, embedding) VALUES (%s, %s)", ("doc1", "[0.1, 0.2, 0.3]") + ) + session.execute( + "INSERT INTO vector_docs_psycopg (content, embedding) VALUES (%s, %s)", ("doc2", "[0.4, 0.5, 0.6]") + ) + session.execute( + "INSERT INTO vector_docs_psycopg (content, embedding) VALUES (%s, %s)", ("doc3", "[0.7, 0.8, 0.9]") + ) + + yield session + finally: + session.execute_script("DROP TABLE IF EXISTS vector_docs_psycopg") + + +def test_psycopg_euclidean_distance_execution(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test PostgreSQL euclidean distance operator execution with Psycopg.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psycopg") + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + assert result[2]["content"] == "doc3" + + assert result[0]["distance"] < result[1]["distance"] + assert result[1]["distance"] < result[2]["distance"] + + +def test_psycopg_euclidean_distance_threshold(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test PostgreSQL euclidean distance with threshold filter.""" + query = ( + sql.select("content") + .from_("vector_docs_psycopg") + .where(sql.column("embedding").vector_distance([0.1, 0.2, 0.3]) < 0.3) + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 1 + assert result[0]["content"] == "doc1" + + +def test_psycopg_cosine_distance_execution(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test PostgreSQL cosine distance operator execution.""" + query = ( + sql.select( + "content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("distance") + ) + .from_("vector_docs_psycopg") + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + +def test_psycopg_inner_product_execution(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test PostgreSQL inner product operator execution.""" + query = ( + sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="inner_product").alias("distance"), + ) + .from_("vector_docs_psycopg") + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + + +def test_psycopg_cosine_similarity_execution(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test PostgreSQL cosine similarity calculation.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_psycopg") + .order_by(sql.column("score").desc()) + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + assert result[0]["content"] == "doc1" + + assert result[0]["score"] > result[1]["score"] + assert result[1]["score"] > result[2]["score"] + + +def test_psycopg_similarity_top_k_results(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test top-K similarity search.""" + query = ( + sql.select("content", sql.column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")) + .from_("vector_docs_psycopg") + .order_by(sql.column("score").desc()) + .limit(2) + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] == "doc1" + assert result[1]["content"] == "doc2" + + +def test_psycopg_multiple_distance_metrics(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test multiple distance metrics in same query.""" + query = sql.select( + "content", + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="euclidean").alias("euclidean_dist"), + sql.column("embedding").vector_distance([0.1, 0.2, 0.3], metric="cosine").alias("cosine_dist"), + ).from_("vector_docs_psycopg") + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + for row in result: + assert "euclidean_dist" in row + assert "cosine_dist" in row + assert row["euclidean_dist"] is not None + assert row["cosine_dist"] is not None + + +def test_psycopg_distance_with_null_vectors(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test vector distance handles NULL vectors correctly.""" + psycopg_vector_session.execute( + "INSERT INTO vector_docs_psycopg (content, embedding) VALUES (%s, NULL)", ("doc_null",) + ) + + query = ( + sql.select("content", sql.column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psycopg") + .where(sql.column("embedding").is_not_null()) + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + assert all(row["content"] != "doc_null" for row in result) + + +def test_psycopg_combined_filters_and_distance(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test combining distance threshold with other filters.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).alias("distance")) + .from_("vector_docs_psycopg") + .where((Column("embedding").vector_distance([0.1, 0.2, 0.3]) < 1.0) & (Column("content").in_(["doc1", "doc2"]))) + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 2 + assert result[0]["content"] in ["doc1", "doc2"] + assert result[1]["content"] in ["doc1", "doc2"] + + +def test_psycopg_similarity_score_range(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test cosine similarity returns values in expected range.""" + query = sql.select("content", Column("embedding").cosine_similarity([0.1, 0.2, 0.3]).alias("score")).from_( + "vector_docs_psycopg" + ) + + result = psycopg_vector_session.execute(query) + + for row in result: + score = row["score"] + assert -1 <= score <= 1 + + +def test_psycopg_distance_with_cast(psycopg_vector_session: PsycopgSyncDriver) -> None: + """Test vector distance with explicit type casting.""" + query = ( + sql.select("content", Column("embedding").vector_distance([0.1, 0.2, 0.3]).cast("FLOAT").alias("distance")) + .from_("vector_docs_psycopg") + .order_by("distance") + ) + + result = psycopg_vector_session.execute(query) + + assert len(result) == 3 + assert isinstance(result[0]["distance"], float) diff --git a/tests/unit/test_builder/test_vector_functions.py b/tests/unit/test_builder/test_vector_functions.py new file mode 100644 index 000000000..a4e6d6876 --- /dev/null +++ b/tests/unit/test_builder/test_vector_functions.py @@ -0,0 +1,556 @@ +"""Unit tests for vector distance functions in SQL builder. + +Tests VectorDistance expression creation, dialect-specific SQL generation, +and Column.vector_distance()/Column.cosine_similarity() methods. +""" + +import pytest +from sqlglot import exp + +from sqlspec import sql +from sqlspec.builder import Column +from sqlspec.builder._vector_expressions import VectorDistance + +pytestmark = pytest.mark.xdist_group("builder") + + +def test_vector_distance_expression_creation() -> None: + """Test VectorDistance expression can be created directly.""" + col_expr = exp.Column(this=exp.Identifier(this="embedding")) + vec_expr = exp.Array(expressions=[exp.Literal.number(0.1), exp.Literal.number(0.2)]) + metric_expr = exp.Literal.string("euclidean") + + distance_expr = VectorDistance(this=col_expr, expression=vec_expr, metric=metric_expr) + + assert distance_expr.left == col_expr + assert distance_expr.right == vec_expr + assert distance_expr.metric == "euclidean" + + +def test_vector_distance_metric_extraction() -> None: + """Test metric property extracts metric from Literal expression.""" + col_expr = exp.Column(this=exp.Identifier(this="embedding")) + vec_expr = exp.Array(expressions=[exp.Literal.number(0.5)]) + + for metric in ["euclidean", "cosine", "inner_product", "euclidean_squared"]: + distance_expr = VectorDistance(this=col_expr, expression=vec_expr, metric=exp.Literal.string(metric)) + assert distance_expr.metric == metric + + +def test_column_vector_distance_with_list() -> None: + """Test Column.vector_distance() with Python list.""" + col = Column("embedding") + distance = col.vector_distance([0.1, 0.2, 0.3]) + + assert isinstance(distance._expression, VectorDistance) # pyright: ignore[reportPrivateUsage] + assert distance._expression.metric == "euclidean" # pyright: ignore[reportPrivateUsage] + + +def test_column_vector_distance_with_column() -> None: + """Test Column.vector_distance() with another Column.""" + col1 = Column("embedding1") + col2 = Column("embedding2") + distance = col1.vector_distance(col2) + + assert isinstance(distance._expression, VectorDistance) # pyright: ignore[reportPrivateUsage] + assert distance._expression.metric == "euclidean" # pyright: ignore[reportPrivateUsage] + + +def test_column_vector_distance_with_expression() -> None: + """Test Column.vector_distance() with SQLGlot expression.""" + col = Column("embedding") + vec_expr = exp.Array(expressions=[exp.Literal.number(0.5)]) + distance = col.vector_distance(vec_expr) + + assert isinstance(distance._expression, VectorDistance) # pyright: ignore[reportPrivateUsage] + assert distance._expression.metric == "euclidean" # pyright: ignore[reportPrivateUsage] + + +def test_column_vector_distance_invalid_metric() -> None: + """Test Column.vector_distance() raises ValueError for invalid metric.""" + col = Column("embedding") + + with pytest.raises(ValueError, match="Invalid metric"): + col.vector_distance([0.1, 0.2], metric="invalid_metric") + + +def test_column_vector_distance_invalid_vector_type() -> None: + """Test Column.vector_distance() raises TypeError for invalid vector type.""" + col = Column("embedding") + + with pytest.raises(TypeError, match="Unsupported vector type"): + col.vector_distance("not_a_vector") # type: ignore[arg-type] + + +def test_column_vector_distance_all_metrics() -> None: + """Test all supported distance metrics.""" + col = Column("embedding") + valid_metrics = ["euclidean", "cosine", "inner_product", "euclidean_squared"] + + for metric in valid_metrics: + distance = col.vector_distance([0.1, 0.2], metric=metric) + assert isinstance(distance._expression, VectorDistance) # pyright: ignore[reportPrivateUsage] + assert distance._expression.metric == metric # pyright: ignore[reportPrivateUsage] + + +def test_cosine_similarity_basic() -> None: + """Test Column.cosine_similarity() creates proper expression.""" + col = Column("embedding") + similarity = col.cosine_similarity([0.1, 0.2, 0.3]) + + assert isinstance(similarity._expression, exp.Sub) # pyright: ignore[reportPrivateUsage] + + left_operand = similarity._expression.this # pyright: ignore[reportPrivateUsage] + assert isinstance(left_operand, exp.Literal) + assert str(left_operand.this) == "1" + + right_operand = similarity._expression.expression # pyright: ignore[reportPrivateUsage] + assert isinstance(right_operand, exp.Paren) + + inner_expr = right_operand.this + assert isinstance(inner_expr, VectorDistance) + assert inner_expr.metric == "cosine" + + +def test_cosine_similarity_with_column() -> None: + """Test Column.cosine_similarity() with another Column.""" + col1 = Column("embedding1") + col2 = Column("embedding2") + similarity = col1.cosine_similarity(col2) + + assert isinstance(similarity._expression, exp.Sub) # pyright: ignore[reportPrivateUsage] + + right_operand = similarity._expression.expression # pyright: ignore[reportPrivateUsage] + assert isinstance(right_operand, exp.Paren) + assert isinstance(right_operand.this, VectorDistance) + + +def test_postgres_euclidean_distance_sql() -> None: + """Test PostgreSQL euclidean distance generates correct operator.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "embedding" in stmt.sql + assert "ARRAY[" in stmt.sql + + +def test_postgres_cosine_distance_sql() -> None: + """Test PostgreSQL cosine distance generates vector distance operator.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2], metric="cosine") < 0.5) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql or "<=>" in stmt.sql or "<#>" in stmt.sql + assert "embedding" in stmt.sql + + +def test_postgres_inner_product_sql() -> None: + """Test PostgreSQL inner product generates vector distance operator.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="inner_product") < 0.5) + ) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql or "<=>" in stmt.sql or "<#>" in stmt.sql + assert "embedding" in stmt.sql + + +def test_postgres_euclidean_squared_fallback() -> None: + """Test PostgreSQL euclidean_squared metric is captured.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="euclidean_squared") < 0.5) + ) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql or "VECTOR_DISTANCE" in stmt.sql + assert "embedding" in stmt.sql + + +def test_mysql_euclidean_distance_sql() -> None: + """Test MySQL euclidean distance generates DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="mysql") + + assert "DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_mysql_cosine_distance_sql() -> None: + """Test MySQL cosine distance generates DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2], metric="cosine") < 0.5) + + stmt = query.build(dialect="mysql") + + assert "DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_mysql_inner_product_sql() -> None: + """Test MySQL inner product generates DISTANCE with DOT metric.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="inner_product") < 0.5) + ) + + stmt = query.build(dialect="mysql") + + assert "DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_mysql_string_to_vector_wrapping() -> None: + """Test MySQL wraps array literals with STRING_TO_VECTOR.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="mysql") + + assert "STRING_TO_VECTOR" in stmt.sql + + +def test_oracle_euclidean_distance_sql() -> None: + """Test Oracle euclidean distance generates VECTOR_DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="oracle") + + assert "VECTOR_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_oracle_cosine_distance_sql() -> None: + """Test Oracle cosine distance generates VECTOR_DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2], metric="cosine") < 0.5) + + stmt = query.build(dialect="oracle") + + assert "VECTOR_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_oracle_inner_product_sql() -> None: + """Test Oracle inner product generates VECTOR_DISTANCE with DOT.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="inner_product") < 0.5) + ) + + stmt = query.build(dialect="oracle") + + assert "VECTOR_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_oracle_euclidean_squared_sql() -> None: + """Test Oracle euclidean_squared generates VECTOR_DISTANCE function.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="euclidean_squared") < 0.5) + ) + + stmt = query.build(dialect="oracle") + + assert "VECTOR_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_oracle_to_vector_wrapping() -> None: + """Test Oracle wraps array literals with TO_VECTOR.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="oracle") + + assert "TO_VECTOR" in stmt.sql + + +def test_bigquery_euclidean_distance_sql() -> None: + """Test BigQuery euclidean distance generates EUCLIDEAN_DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="bigquery") + + assert "EUCLIDEAN_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_bigquery_cosine_distance_sql() -> None: + """Test BigQuery cosine distance metric is captured.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2], metric="cosine") < 0.5) + + stmt = query.build(dialect="bigquery") + + assert "EUCLIDEAN_DISTANCE(" in stmt.sql or "COSINE_DISTANCE(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_bigquery_inner_product_sql() -> None: + """Test BigQuery inner product metric is captured.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="inner_product") < 0.5) + ) + + stmt = query.build(dialect="bigquery") + + assert "EUCLIDEAN_DISTANCE(" in stmt.sql or "DOT_PRODUCT(" in stmt.sql + assert "embedding" in stmt.sql + + +def test_bigquery_euclidean_squared_fallback() -> None: + """Test BigQuery euclidean_squared metric is captured.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="euclidean_squared") < 0.5) + ) + + stmt = query.build(dialect="bigquery") + + assert "EUCLIDEAN_DISTANCE(" in stmt.sql or "VECTOR_DISTANCE" in stmt.sql + assert "embedding" in stmt.sql + + +def test_duckdb_euclidean_distance_sql() -> None: + """Test DuckDB euclidean distance uses array_distance function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="duckdb") + + assert "array_distance" in stmt.sql + assert "embedding" in stmt.sql + + +def test_duckdb_cosine_distance_sql() -> None: + """Test DuckDB cosine distance uses array_cosine_distance function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2], metric="cosine") < 0.5) + + stmt = query.build(dialect="duckdb") + + assert "array_cosine_distance" in stmt.sql + assert "embedding" in stmt.sql + + +def test_duckdb_inner_product_sql() -> None: + """Test DuckDB inner product uses array_negative_inner_product function.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="inner_product") < 0.5) + ) + + stmt = query.build(dialect="duckdb") + + assert "array_negative_inner_product" in stmt.sql + assert "embedding" in stmt.sql + + +def test_duckdb_euclidean_squared_fallback() -> None: + """Test DuckDB euclidean_squared uses generic VECTOR_DISTANCE function.""" + query = ( + sql.select("*") + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2], metric="euclidean_squared") < 0.5) + ) + + stmt = query.build(dialect="duckdb") + + assert "VECTOR_DISTANCE" in stmt.sql + assert "embedding" in stmt.sql + assert "EUCLIDEAN_SQUARED" in stmt.sql + + +def test_generic_dialect_fallback() -> None: + """Test generic dialect generates VECTOR_DISTANCE function.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build() + + assert "VECTOR_DISTANCE(" in stmt.sql + assert "EUCLIDEAN" in stmt.sql + + +def test_distance_in_select_clause() -> None: + """Test vector distance in SELECT clause with alias.""" + query = ( + sql.select("id", Column("embedding").vector_distance([0.1, 0.2]).alias("distance")) + .from_("docs") + .order_by("distance") + ) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "distance" in stmt.sql + assert "ORDER BY" in stmt.sql + + +def test_distance_in_order_by() -> None: + """Test vector distance in ORDER BY clause.""" + distance_col = Column("embedding").vector_distance([0.1, 0.2]) + query = sql.select("*", distance_col.alias("dist")).from_("docs").order_by("dist") + + stmt = query.build(dialect="postgres") + + assert "ORDER BY" in stmt.sql + assert "<->" in stmt.sql + + +def test_cosine_similarity_in_select() -> None: + """Test cosine similarity in SELECT clause.""" + query = ( + sql.select("id", Column("embedding").cosine_similarity([0.1, 0.2]).alias("score")) + .from_("docs") + .order_by("score") + ) + + stmt = query.build(dialect="postgres") + + assert "<=>" in stmt.sql + assert "score" in stmt.sql + + +def test_multiple_metrics_in_same_query() -> None: + """Test multiple distance metrics in same query.""" + query = ( + sql.select( + "id", + Column("embedding").vector_distance([0.1, 0.2], metric="euclidean").alias("euclidean_dist"), + Column("embedding").vector_distance([0.1, 0.2], metric="cosine").alias("cosine_dist"), + ) + .from_("docs") + .where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + ) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "<=>" in stmt.sql + assert "euclidean_dist" in stmt.sql + assert "cosine_dist" in stmt.sql + + +def test_column_to_column_distance() -> None: + """Test distance between two vector columns.""" + query = sql.select("*").from_("pairs").where(Column("vec1").vector_distance(Column("vec2"), metric="cosine") < 0.3) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql or "<=>" in stmt.sql or "<#>" in stmt.sql + assert "vec1" in stmt.sql + assert "vec2" in stmt.sql + + +def test_comparison_operators_on_distance() -> None: + """Test comparison operators work on distance FunctionColumn.""" + col = Column("embedding") + vector = [0.1, 0.2] + + test_cases = [ + (col.vector_distance(vector) < 0.5, ["<", ">"]), + (col.vector_distance(vector) <= 0.5, ["<=", ">="]), + (col.vector_distance(vector) > 0.5, [">", "<"]), + (col.vector_distance(vector) >= 0.5, [">=", "<="]), + (col.vector_distance(vector) == 0.5, ["="]), + (col.vector_distance(vector) != 0.5, ["<>", "!="]), + ] + + for expression, expected_operators in test_cases: + query = sql.select("*").from_("docs").where(expression) + stmt = query.build(dialect="postgres") + assert any(op in stmt.sql for op in expected_operators) + + +def test_nested_expression_support() -> None: + """Test VectorDistance works in nested expressions.""" + query = ( + sql.select(Column("embedding").vector_distance([0.1, 0.2]).alias("dist")) + .from_("docs") + .where((Column("embedding").vector_distance([0.1, 0.2]) < 0.5) & (Column("status") == "active")) + ) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "embedding" in stmt.sql + assert "status" in stmt.sql + + +def test_empty_list_validation() -> None: + """Test empty list can be processed.""" + col = Column("embedding") + + distance = col.vector_distance([]) + + query = sql.select("*").from_("docs").where(distance < 0.5) + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "embedding" in stmt.sql + + +def test_multiple_dialects_from_same_query() -> None: + """Test same query can generate SQL for multiple dialects.""" + query = sql.select("*").from_("docs").where(Column("embedding").vector_distance([0.1, 0.2]) < 0.5) + + pg_stmt = query.build(dialect="postgres") + mysql_stmt = query.build(dialect="mysql") + oracle_stmt = query.build(dialect="oracle") + + assert "<->" in pg_stmt.sql + assert "DISTANCE(" in mysql_stmt.sql + assert "VECTOR_DISTANCE(" in oracle_stmt.sql + + +def test_distance_with_table_qualified_column() -> None: + """Test vector distance with table-qualified column.""" + col = Column("embedding", table="docs") + query = sql.select("*").from_("docs").where(col.vector_distance([0.1, 0.2]) < 0.5) + + stmt = query.build(dialect="postgres") + + assert "<->" in stmt.sql + assert "docs" in stmt.sql + assert "embedding" in stmt.sql + + +def test_cosine_similarity_ordering() -> None: + """Test cosine similarity works correctly in ORDER BY.""" + query = ( + sql.select("id", Column("embedding").cosine_similarity([0.1, 0.2]).alias("score")) + .from_("docs") + .order_by(Column("score").desc()) + .limit(10) + ) + + stmt = query.build(dialect="postgres") + + assert "<=>" in stmt.sql + assert "score" in stmt.sql + assert "ORDER BY" in stmt.sql + assert "LIMIT" in stmt.sql + + +def test_distance_with_null_handling() -> None: + """Test vector distance with NULL check.""" + query = ( + sql.select("*") + .from_("docs") + .where((Column("embedding").is_not_null()) & (Column("embedding").vector_distance([0.1, 0.2]) < 0.5)) + ) + + stmt = query.build(dialect="postgres") + + assert "IS NULL" in stmt.sql + assert "<->" in stmt.sql diff --git a/uv.lock b/uv.lock index c7cd9de5e..2f66b4a15 100644 --- a/uv.lock +++ b/uv.lock @@ -2335,7 +2335,7 @@ wheels = [ [[package]] name = "hatchling" -version = "1.27.0" +version = "1.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, @@ -2344,9 +2344,9 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "trove-classifiers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/8a/cc1debe3514da292094f1c3a700e4ca25442489731ef7c0814358816bb03/hatchling-1.27.0.tar.gz", hash = "sha256:971c296d9819abb3811112fc52c7a9751c8d381898f36533bb16f9791e941fd6", size = 54983, upload-time = "2024-12-15T17:08:11.894Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/8e/e480359492affde4119a131da729dd26da742c2c9b604dff74836e47eef9/hatchling-1.28.0.tar.gz", hash = "sha256:4d50b02aece6892b8cd0b3ce6c82cb218594d3ec5836dbde75bf41a21ab004c8", size = 55365, upload-time = "2025-11-27T00:31:13.766Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/e7/ae38d7a6dfba0533684e0b2136817d667588ae3ec984c1a4e5df5eb88482/hatchling-1.27.0-py3-none-any.whl", hash = "sha256:d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b", size = 75794, upload-time = "2024-12-15T17:08:10.364Z" }, + { url = "https://files.pythonhosted.org/packages/0d/a5/48cb7efb8b4718b1a4c0c331e3364a3a33f614ff0d6afd2b93ee883d3c47/hatchling-1.28.0-py3-none-any.whl", hash = "sha256:dc48722b68b3f4bbfa3ff618ca07cdea6750e7d03481289ffa8be1521d18a961", size = 76075, upload-time = "2025-11-27T00:31:12.544Z" }, ] [[package]] @@ -2715,7 +2715,7 @@ wheels = [ [[package]] name = "minio" -version = "7.2.19" +version = "7.2.20" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "argon2-cffi" }, @@ -2724,9 +2724,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/6c/dc6f0697357a0f71f2773af8e69d658c673e68954e0d0d53242918404fc3/minio-7.2.19.tar.gz", hash = "sha256:756f97fb3d19d198facd1b6ff44006a58934a5b09d512e343227cdaf92f3da13", size = 149526, upload-time = "2025-11-24T08:50:48.42Z" } +sdist = { url = "https://files.pythonhosted.org/packages/40/df/6dfc6540f96a74125a11653cce717603fd5b7d0001a8e847b3e54e72d238/minio-7.2.20.tar.gz", hash = "sha256:95898b7a023fbbfde375985aa77e2cd6a0762268db79cf886f002a9ea8e68598", size = 136113, upload-time = "2025-11-27T00:37:15.569Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/e6/7921c4daf50eefe1a0ef6d5c06ce9c66ec48bf1baec5b1a257c06285a856/minio-7.2.19-py3-none-any.whl", hash = "sha256:53093c99c8716fdd089aec2e29bff28fe20f962334a096b72c6e6201e32628e0", size = 103517, upload-time = "2025-11-24T08:50:46.649Z" }, + { url = "https://files.pythonhosted.org/packages/3e/9a/b697530a882588a84db616580f2ba5d1d515c815e11c30d219145afeec87/minio-7.2.20-py3-none-any.whl", hash = "sha256:eb33dd2fb80e04c3726a76b13241c6be3c4c46f8d81e1d58e757786f6501897e", size = 93751, upload-time = "2025-11-27T00:37:13.993Z" }, ] [[package]] @@ -4331,7 +4331,7 @@ wheels = [ [[package]] name = "pydantic" -version = "2.12.4" +version = "2.12.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -4339,9 +4339,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/ad/a17bc283d7d81837c061c49e3eaa27a45991759a1b7eae1031921c6bd924/pydantic-2.12.4.tar.gz", hash = "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", size = 821038, upload-time = "2025-11-05T10:50:08.59Z" } +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/2f/e68750da9b04856e2a7ec56fc6f034a5a79775e9b9a81882252789873798/pydantic-2.12.4-py3-none-any.whl", hash = "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e", size = 463400, upload-time = "2025-11-05T10:50:06.732Z" }, + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, ] [[package]] @@ -5762,16 +5762,16 @@ wheels = [ [[package]] name = "sphinxcontrib-mermaid" -version = "1.2.2" +version = "1.2.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pyyaml" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/97/83/11fe1f2968c05fae725e473e8b3be08cbe5f51b83ddaf3309ab4c841082a/sphinxcontrib_mermaid-1.2.2.tar.gz", hash = "sha256:35423c13e565abb839b13f955f9722f0769e77e5d607ca07877ce93e1636c196", size = 18851, upload-time = "2025-11-24T01:05:48.702Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/49/c6ddfe709a4ab76ac6e5a00e696f73626b2c189dc1e1965a361ec102e6cc/sphinxcontrib_mermaid-1.2.3.tar.gz", hash = "sha256:358699d0ec924ef679b41873d9edd97d0773446daf9760c75e18dc0adfd91371", size = 18885, upload-time = "2025-11-26T04:18:32.43Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/74/f24437b92c3a34eadf93987d8472def6532200621df223e1b818c4318d63/sphinxcontrib_mermaid-1.2.2-py3-none-any.whl", hash = "sha256:51655f592300fc70e73b3ef2007cfc44fac11da5ff1f15c4725c83bf4a5b517c", size = 13416, upload-time = "2025-11-24T01:05:47.252Z" }, + { url = "https://files.pythonhosted.org/packages/d1/39/8b54299ffa00e597d3b0b4d042241a0a0b22cb429ad007ccfb9c1745b4d1/sphinxcontrib_mermaid-1.2.3-py3-none-any.whl", hash = "sha256:5be782b27026bef97bfb15ccb2f7868b674a1afc0982b54cb149702cfc25aa02", size = 13413, upload-time = "2025-11-26T04:18:31.269Z" }, ] [[package]]