Skip to content

Commit fe85a82

Browse files
Fix merge conflicts.
2 parents afa77f9 + 322f17f commit fe85a82

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+9688
-1194
lines changed

CHANGELOG.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
## Release notes
22

3-
### 0.13.0 -- TBD
3+
### 0.13.0 -- Feb 15, 2021
4+
* Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484). PR #754
5+
* Re-implement cascading deletes for better performance. PR #839.
6+
* Add table method `.update1` to update a row in the table with new values PR #763
7+
* Python datatypes are now enabled by default in blobs (#761). PR #785
8+
* Added permissive join and restriction operators `@` and `^` (#785) PR #754
49
* Support DataJoint datatype and connection plugins (#715, #729) PR 730, #735
5-
* Allow updating specified secondary attributes using `update1` PR #763
6-
* add dj.key_hash reference to dj.hash.key_hash, treat as 'public api'
10+
* add `dj.key_hash` alias to `dj.hash.key_hash`
711
* default enable_python_native_blobs to True
8-
* Remove python 3.5 support
12+
* Drop support for Python 3.5
913

1014
### 0.12.8 -- Jan 12, 2021
1115
* table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833

LNX-docker-compose.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ services:
7575
command: >
7676
/bin/sh -c
7777
"
78-
pip install --user nose nose-cov coveralls .;
78+
pip install --user -r test_requirements.txt;
79+
pip install --user .;
7980
pip freeze | grep datajoint;
8081
nosetests -vsw tests --with-coverage --cover-package=datajoint && coveralls;
8182
# jupyter notebook;

datajoint/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"""
1616

1717
__author__ = "DataJoint Contributors"
18-
__date__ = "February 7, 2019"
18+
__date__ = "November 7, 2020"
1919
__all__ = ['__author__', '__version__',
2020
'config', 'conn', 'Connection',
2121
'Schema', 'schema', 'VirtualModule', 'create_virtual_module',

datajoint/autopopulate.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from tqdm import tqdm
88
from .expression import QueryExpression, AndList
99
from .errors import DataJointError, LostConnectionError
10-
from .table import FreeTable
1110
import signal
1211

1312
# noinspection PyExceptionInherit,PyCallingNonCallable
@@ -58,15 +57,16 @@ def make(self, key):
5857
@property
5958
def target(self):
6059
"""
61-
relation to be populated.
62-
Typically, AutoPopulate are mixed into a Relation object and the target is self.
60+
:return: table to be populated.
61+
In the typical case, dj.AutoPopulate is mixed into a dj.Table class by inheritance and the target is self.
6362
"""
6463
return self
6564

6665
def _job_key(self, key):
6766
"""
6867
:param key: they key returned for the job from the key source
6968
:return: the dict to use to generate the job reservation hash
69+
This method allows subclasses to control the job reservation granularity.
7070
"""
7171
return key
7272

@@ -136,7 +136,7 @@ def handler(signum, frame):
136136

137137
make = self._make_tuples if hasattr(self, '_make_tuples') else self.make
138138

139-
for key in (tqdm(keys) if display_progress else keys):
139+
for key in (tqdm(keys, desc=self.__class__.__name__) if display_progress else keys):
140140
if max_calls is not None and call_count >= max_calls:
141141
break
142142
if not reserve_jobs or jobs.reserve(self.target.table_name, self._job_key(key)):

datajoint/blob.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111
import uuid
1212
import numpy as np
1313
from .errors import DataJointError
14-
from .utils import OrderedDict
1514
from .settings import config
1615

1716

18-
mxClassID = OrderedDict((
17+
mxClassID = dict((
1918
# see http://www.mathworks.com/help/techdoc/apiref/mxclassid.html
2019
('mxUNKNOWN_CLASS', None),
2120
('mxCELL_CLASS', None),
@@ -346,8 +345,8 @@ def pack_set(self, t):
346345
len_u64(it) + it for it in (self.pack_blob(i) for i in t))
347346

348347
def read_dict(self):
349-
return OrderedDict((self.read_blob(self.read_value()), self.read_blob(self.read_value()))
350-
for _ in range(self.read_value()))
348+
return dict((self.read_blob(self.read_value()), self.read_blob(self.read_value()))
349+
for _ in range(self.read_value()))
351350

352351
def pack_dict(self, d):
353352
return b"\4" + len_u64(d) + b"".join(

datajoint/condition.py

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
""" methods for generating SQL WHERE clauses from datajoint restriction conditions """
2+
3+
import inspect
4+
import collections
5+
import re
6+
import uuid
7+
import datetime
8+
import decimal
9+
import numpy
10+
import pandas
11+
from .errors import DataJointError
12+
13+
14+
class PromiscuousOperand:
15+
"""
16+
A container for an operand to ignore join compatibility
17+
"""
18+
def __init__(self, operand):
19+
self.operand = operand
20+
21+
22+
class AndList(list):
23+
"""
24+
A list of conditions to by applied to a query expression by logical conjunction: the conditions are AND-ed.
25+
All other collections (lists, sets, other entity sets, etc) are applied by logical disjunction (OR).
26+
27+
Example:
28+
expr2 = expr & dj.AndList((cond1, cond2, cond3))
29+
is equivalent to
30+
expr2 = expr & cond1 & cond2 & cond3
31+
"""
32+
def append(self, restriction):
33+
if isinstance(restriction, AndList):
34+
# extend to reduce nesting
35+
self.extend(restriction)
36+
else:
37+
super().append(restriction)
38+
39+
40+
class Not:
41+
""" invert restriction """
42+
def __init__(self, restriction):
43+
self.restriction = restriction
44+
45+
46+
def assert_join_compatibility(expr1, expr2):
47+
"""
48+
Determine if expressions expr1 and expr2 are join-compatible. To be join-compatible, the matching attributes
49+
in the two expressions must be in the primary key of one or the other expression.
50+
Raises an exception if not compatible.
51+
:param expr1: A QueryExpression object
52+
:param expr2: A QueryExpression object
53+
"""
54+
from .expression import QueryExpression, U
55+
56+
for rel in (expr1, expr2):
57+
if not isinstance(rel, (U, QueryExpression)):
58+
raise DataJointError('Object %r is not a QueryExpression and cannot be joined.' % rel)
59+
if not isinstance(expr1, U) and not isinstance(expr2, U): # dj.U is always compatible
60+
try:
61+
raise DataJointError("Cannot join query expressions on dependent attribute `%s`" % next(r for r in set(
62+
expr1.heading.secondary_attributes).intersection(expr2.heading.secondary_attributes)))
63+
except StopIteration:
64+
pass
65+
66+
67+
def make_condition(query_expression, condition, columns):
68+
"""
69+
Translate the input condition into the equivalent SQL condition (a string)
70+
:param query_expression: a dj.QueryExpression object to apply condition
71+
:param condition: any valid restriction object.
72+
:param columns: a set passed by reference to collect all column names used in the condition.
73+
:return: an SQL condition string or a boolean value.
74+
"""
75+
from .expression import QueryExpression, Aggregation, U
76+
77+
def prep_value(k, v):
78+
"""prepare value v for inclusion as a string in an SQL condition"""
79+
if query_expression.heading[k].uuid:
80+
if not isinstance(v, uuid.UUID):
81+
try:
82+
v = uuid.UUID(v)
83+
except (AttributeError, ValueError):
84+
raise DataJointError('Badly formed UUID {v} in restriction by `{k}`'.format(k=k, v=v)) from None
85+
return "X'%s'" % v.bytes.hex()
86+
if isinstance(v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)):
87+
return '"%s"' % v
88+
if isinstance(v, str):
89+
return '"%s"' % v.replace('%', '%%')
90+
return '%r' % v
91+
92+
negate = False
93+
while isinstance(condition, Not):
94+
negate = not negate
95+
condition = condition.restriction
96+
template = "NOT (%s)" if negate else "%s"
97+
98+
# restrict by string
99+
if isinstance(condition, str):
100+
columns.update(extract_column_names(condition))
101+
return template % condition.strip().replace("%", "%%") # escape % in strings, see issue #376
102+
103+
# restrict by AndList
104+
if isinstance(condition, AndList):
105+
# omit all conditions that evaluate to True
106+
items = [item for item in (make_condition(query_expression, cond, columns) for cond in condition)
107+
if item is not True]
108+
if any(item is False for item in items):
109+
return negate # if any item is False, the whole thing is False
110+
if not items:
111+
return not negate # and empty AndList is True
112+
return template % ('(' + ') AND ('.join(items) + ')')
113+
114+
# restriction by dj.U evaluates to True
115+
if isinstance(condition, U):
116+
return not negate
117+
118+
# restrict by boolean
119+
if isinstance(condition, bool):
120+
return negate != condition
121+
122+
# restrict by a mapping such as a dict -- convert to an AndList of string equality conditions
123+
if isinstance(condition, collections.abc.Mapping):
124+
common_attributes = set(condition).intersection(query_expression.heading.names)
125+
if not common_attributes:
126+
return not negate # no matching attributes -> evaluates to True
127+
columns.update(common_attributes)
128+
return template % ('(' + ') AND ('.join(
129+
'`%s`=%s' % (k, prep_value(k, condition[k])) for k in common_attributes) + ')')
130+
131+
# restrict by a numpy record -- convert to an AndList of string equality conditions
132+
if isinstance(condition, numpy.void):
133+
common_attributes = set(condition.dtype.fields).intersection(query_expression.heading.names)
134+
if not common_attributes:
135+
return not negate # no matching attributes -> evaluate to True
136+
columns.update(common_attributes)
137+
return template % ('(' + ') AND ('.join(
138+
'`%s`=%s' % (k, prep_value(k, condition[k])) for k in common_attributes) + ')')
139+
140+
# restrict by a QueryExpression subclass -- trigger instantiation and move on
141+
if inspect.isclass(condition) and issubclass(condition, QueryExpression):
142+
condition = condition()
143+
144+
# restrict by another expression (aka semijoin and antijoin)
145+
check_compatibility = True
146+
if isinstance(condition, PromiscuousOperand):
147+
condition = condition.operand
148+
check_compatibility = False
149+
150+
if isinstance(condition, QueryExpression):
151+
if check_compatibility:
152+
assert_join_compatibility(query_expression, condition)
153+
common_attributes = [q for q in condition.heading.names if q in query_expression.heading.names]
154+
columns.update(common_attributes)
155+
if isinstance(condition, Aggregation):
156+
condition = condition.make_subquery()
157+
return (
158+
# without common attributes, any non-empty set matches everything
159+
(not negate if condition else negate) if not common_attributes
160+
else '({fields}) {not_}in ({subquery})'.format(
161+
fields='`' + '`,`'.join(common_attributes) + '`',
162+
not_="not " if negate else "",
163+
subquery=condition.make_sql(common_attributes)))
164+
165+
# restrict by pandas.DataFrames
166+
if isinstance(condition, pandas.DataFrame):
167+
condition = condition.to_records() # convert to numpy.recarray and move on
168+
169+
# if iterable (but not a string, a QueryExpression, or an AndList), treat as an OrList
170+
try:
171+
or_list = [make_condition(query_expression, q, columns) for q in condition]
172+
except TypeError:
173+
raise DataJointError('Invalid restriction type %r' % condition)
174+
else:
175+
or_list = [item for item in or_list if item is not False] # ignore all False conditions
176+
if any(item is True for item in or_list): # if any item is True, the whole thing is True
177+
return not negate
178+
return template % ('(%s)' % ' OR '.join(or_list)) if or_list else negate # an empty or list is False
179+
180+
181+
def extract_column_names(sql_expression):
182+
"""
183+
extract all presumed column names from an sql expression such as the WHERE clause, for example.
184+
:param sql_expression: a string containing an SQL expression
185+
:return: set of extracted column names
186+
This may be MySQL-specific for now.
187+
"""
188+
assert isinstance(sql_expression, str)
189+
result = set()
190+
s = sql_expression # for terseness
191+
# remove escaped quotes
192+
s = re.sub(r'(\\\")|(\\\')', '', s)
193+
# remove quoted text
194+
s = re.sub(r"'[^']*'", "", s)
195+
s = re.sub(r'"[^"]*"', '', s)
196+
# find all tokens in back quotes and remove them
197+
result.update(re.findall(r"`([a-z][a-z_0-9]*)`", s))
198+
s = re.sub(r"`[a-z][a-z_0-9]*`", '', s)
199+
# remove space before parentheses
200+
s = re.sub(r"\s*\(", "(", s)
201+
# remove tokens followed by ( since they must be functions
202+
s = re.sub(r"(\b[a-z][a-z_0-9]*)\(", "(", s)
203+
remaining_tokens = set(re.findall(r"\b[a-z][a-z_0-9]*\b", s))
204+
# update result removing reserved words
205+
result.update(remaining_tokens - {"is", "in", "between", "like", "and", "or", "null", "not"})
206+
return result

0 commit comments

Comments
 (0)