1- """methods for generating SQL WHERE clauses from datajoint restriction conditions"""
1+ """
2+ SQL WHERE clause generation from DataJoint restriction conditions.
3+
4+ This module provides utilities for converting various restriction formats
5+ (dicts, strings, QueryExpressions) into SQL WHERE clauses.
6+ """
7+
8+ from __future__ import annotations
29
310import collections
411import datetime
916import re
1017import uuid
1118from dataclasses import dataclass
19+ from typing import TYPE_CHECKING , Any
1220
1321import numpy
1422import pandas
1523
1624from .errors import DataJointError
1725
26+ if TYPE_CHECKING :
27+ from .expression import QueryExpression
28+
1829logger = logging .getLogger (__name__ .split ("." )[0 ])
1930
2031JSON_PATTERN = re .compile (r"^(?P<attr>\w+)(\.(?P<path>[\w.*\[\]]+))?(:(?P<type>[\w(,\s)]+))?$" )
2132
2233
23- def translate_attribute (key ):
34+ def translate_attribute (key : str ) -> tuple [dict | None , str ]:
35+ """
36+ Translate an attribute key, handling JSON path notation.
37+
38+ Parameters
39+ ----------
40+ key : str
41+ Attribute name, optionally with JSON path (e.g., ``"attr.path.field"``).
42+
43+ Returns
44+ -------
45+ tuple
46+ (match_dict, sql_expression) where match_dict contains parsed
47+ components or None if no JSON path.
48+ """
2449 match = JSON_PATTERN .match (key )
2550 if match is None :
2651 return match , key
@@ -35,26 +60,35 @@ def translate_attribute(key):
3560
3661class PromiscuousOperand :
3762 """
38- A container for an operand to ignore join compatibility
63+ Wrapper to bypass join compatibility checking.
64+
65+ Used when you want to force a natural join without semantic matching.
66+
67+ Parameters
68+ ----------
69+ operand : QueryExpression
70+ The operand to wrap.
3971 """
4072
41- def __init__ (self , operand ) :
73+ def __init__ (self , operand : QueryExpression ) -> None :
4274 self .operand = operand
4375
4476
4577class AndList (list ):
4678 """
47- A list of conditions to by applied to a query expression by logical conjunction: the
48- conditions are AND-ed. All other collections (lists, sets, other entity sets, etc) are
49- applied by logical disjunction (OR).
50-
51- Example:
52- expr2 = expr & dj.AndList((cond1, cond2, cond3))
53- is equivalent to
54- expr2 = expr & cond1 & cond2 & cond3
79+ List of conditions combined with logical AND.
80+
81+ All conditions in the list are AND-ed together. Other collections
82+ (lists, sets, QueryExpressions) are OR-ed.
83+
84+ Examples
85+ --------
86+ >>> expr & dj.AndList((cond1, cond2, cond3))
87+ # equivalent to
88+ >>> expr & cond1 & cond2 & cond3
5589 """
5690
57- def append (self , restriction ) :
91+ def append (self , restriction : Any ) -> None :
5892 if isinstance (restriction , AndList ):
5993 # extend to reduce nesting
6094 self .extend (restriction )
@@ -65,15 +99,25 @@ def append(self, restriction):
6599@dataclass
66100class Top :
67101 """
68- A restriction to the top entities of a query.
69- In SQL, this corresponds to ORDER BY ... LIMIT ... OFFSET
102+ Restrict query to top N entities with ordering.
103+
104+ In SQL, corresponds to ``ORDER BY ... LIMIT ... OFFSET``.
105+
106+ Parameters
107+ ----------
108+ limit : int, optional
109+ Maximum number of rows to return. Default 1.
110+ order_by : str or list[str], optional
111+ Attributes to order by. ``"KEY"`` for primary key. Default ``"KEY"``.
112+ offset : int, optional
113+ Number of rows to skip. Default 0.
70114 """
71115
72116 limit : int | None = 1
73117 order_by : str | list [str ] = "KEY"
74118 offset : int = 0
75119
76- def __post_init__ (self ):
120+ def __post_init__ (self ) -> None :
77121 self .order_by = self .order_by or ["KEY" ]
78122 self .offset = self .offset or 0
79123
@@ -92,30 +136,54 @@ def __post_init__(self):
92136
93137
94138class Not :
95- """invert restriction"""
96-
97- def __init__ (self , restriction ):
98- self .restriction = restriction
139+ """
140+ Invert a restriction condition.
99141
142+ Parameters
143+ ----------
144+ restriction : any
145+ Restriction condition to negate.
100146
101- def assert_join_compatibility (expr1 , expr2 , semantic_check = True ):
147+ Examples
148+ --------
149+ >>> table - condition # equivalent to table & Not(condition)
102150 """
103- Determine if expressions expr1 and expr2 are join-compatible.
104151
105- With semantic_check=True (default):
106- Raises an error if there are non-homologous namesakes (same name, different lineage).
107- This prevents accidental joins on attributes that share names but represent
108- different entities.
152+ def __init__ (self , restriction : Any ) -> None :
153+ self .restriction = restriction
109154
110- If the ~lineage table doesn't exist for either schema, a warning is issued
111- and semantic checking is disabled (join proceeds as natural join).
112155
113- With semantic_check=False:
114- No lineage checking. All namesake attributes are matched (natural join behavior).
156+ def assert_join_compatibility (
157+ expr1 : QueryExpression ,
158+ expr2 : QueryExpression ,
159+ semantic_check : bool = True ,
160+ ) -> None :
161+ """
162+ Check if two expressions are join-compatible.
163+
164+ Parameters
165+ ----------
166+ expr1 : QueryExpression
167+ First expression.
168+ expr2 : QueryExpression
169+ Second expression.
170+ semantic_check : bool, optional
171+ If True (default), use semantic matching and error on non-homologous
172+ namesakes (same name, different lineage). If False, use natural join.
173+
174+ Raises
175+ ------
176+ DataJointError
177+ If semantic_check is True and expressions have non-homologous namesakes.
178+
179+ Notes
180+ -----
181+ With semantic_check=True:
182+ Prevents accidental joins on attributes that share names but represent
183+ different entities. If ~lineage table doesn't exist, a warning is issued.
115184
116- :param expr1: A QueryExpression object
117- :param expr2: A QueryExpression object
118- :param semantic_check: If True (default), use semantic matching and error on conflicts
185+ With semantic_check=False:
186+ All namesake attributes are matched (natural join behavior).
119187 """
120188 from .expression import QueryExpression , U
121189
@@ -151,16 +219,44 @@ def assert_join_compatibility(expr1, expr2, semantic_check=True):
151219 )
152220
153221
154- def make_condition (query_expression , condition , columns , semantic_check = True ):
222+ def make_condition (
223+ query_expression : QueryExpression ,
224+ condition : Any ,
225+ columns : set [str ],
226+ semantic_check : bool = True ,
227+ ) -> str | bool :
155228 """
156- Translate the input condition into the equivalent SQL condition (a string)
157-
158- :param query_expression: a dj.QueryExpression object to apply condition
159- :param condition: any valid restriction object.
160- :param columns: a set passed by reference to collect all column names used in the
161- condition.
162- :param semantic_check: If True (default), use semantic matching and error on conflicts.
163- :return: an SQL condition string or a boolean value.
229+ Translate a restriction into an SQL WHERE clause condition.
230+
231+ Parameters
232+ ----------
233+ query_expression : QueryExpression
234+ The expression to apply the condition to.
235+ condition : any
236+ Valid restriction: str, dict, bool, QueryExpression, AndList,
237+ numpy.void, pandas.DataFrame, or iterable of restrictions.
238+ columns : set[str]
239+ Set passed by reference to collect column names used in the condition.
240+ semantic_check : bool, optional
241+ If True (default), use semantic matching and error on conflicts.
242+
243+ Returns
244+ -------
245+ str or bool
246+ SQL condition string, or bool if condition evaluates to constant.
247+
248+ Notes
249+ -----
250+ Restriction types are processed as follows:
251+
252+ - ``str``: Used directly as SQL condition
253+ - ``dict``: AND of equality conditions for matching attributes
254+ - ``bool``: Returns the boolean value (possibly negated)
255+ - ``QueryExpression``: Generates subquery (semijoin/antijoin)
256+ - ``AndList``: AND of all conditions
257+ - ``list/set/tuple``: OR of all conditions
258+ - ``numpy.void``: Like dict, from record array
259+ - ``pandas.DataFrame``: Converted to records, then OR-ed
164260 """
165261 from .expression import Aggregation , QueryExpression , U
166262
@@ -296,14 +392,27 @@ def combine_conditions(negate, conditions):
296392 return f"{ 'NOT ' if negate else '' } ({ ' OR ' .join (or_list )} )" if or_list else negate
297393
298394
299- def extract_column_names (sql_expression ) :
395+ def extract_column_names (sql_expression : str ) -> set [ str ] :
300396 """
301- extract all presumed column names from an sql expression such as the WHERE clause,
302- for example.
397+ Extract column names from an SQL expression.
398+
399+ Parameters
400+ ----------
401+ sql_expression : str
402+ SQL expression (e.g., WHERE clause) to parse.
403+
404+ Returns
405+ -------
406+ set[str]
407+ Set of extracted column names.
408+
409+ Notes
410+ -----
411+ Parsing is MySQL-specific. Identifies columns by:
303412
304- :param sql_expression: a string containing an SQL expression
305- :return: set of extracted column names
306- This may be MySQL-specific for now.
413+ 1. Names in backticks (``\`column\```)
414+ 2. Bare identifiers not followed by ``(`` (excludes functions)
415+ 3. Excludes SQL reserved words (IS, IN, AND, OR, etc.)
307416 """
308417 assert isinstance (sql_expression , str )
309418 result = set ()
0 commit comments