Skip to content

Commit b9d3334

Browse files
2 parents c178d00 + f39d655 commit b9d3334

File tree

12 files changed

+128
-15
lines changed

12 files changed

+128
-15
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
## Release notes
22

33
### 0.13.3 -- TBD
4+
* Add - Expose proxy feature for S3 external stores (#961) PR #962
45
* Bugfix - Dependencies not properly loaded on populate. (#902) PR #919
56
* Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939
67
* Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956
78
* Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955
9+
* Bugfix - Fix regression issue with `DISTINCT` clause and `GROUP_BY` (#914) PR #963
10+
* Bugfix - Fix sql code generation to comply with sql mode `ONLY_FULL_GROUP_BY` (#916) PR #965
11+
* Bugfix - Fix count for left-joined `QueryExpressions` (#951) PR #966
12+
* Bugfix - Fix assertion error when performing a union into a join (#930) PR #967
813

914
### 0.13.2 -- May 7, 2021
1015
* Update `setuptools_certificate` dependency to new name `otumat`

datajoint/diagram.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def make_dot(self):
296296
node.set_style('filled')
297297

298298
for edge in dot.get_edges():
299-
# see http://www.graphviz.org/content/attrs
299+
# see https://graphviz.org/doc/info/attrs.html
300300
src = edge.get_source().strip('"')
301301
dest = edge.get_destination().strip('"')
302302
props = graph.get_edge_data(src, dest)

datajoint/expression.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class QueryExpression:
4444
_heading = None
4545
_support = None
4646

47+
# If the query will be using distinct
48+
_distinct = False
49+
4750
@property
4851
def connection(self):
4952
""" a dj.Connection object """
@@ -106,9 +109,8 @@ def make_sql(self, fields=None):
106109
Make the SQL SELECT statement.
107110
:param fields: used to explicitly set the select attributes
108111
"""
109-
distinct = self.heading.names == self.primary_key
110112
return 'SELECT {distinct}{fields} FROM {from_}{where}'.format(
111-
distinct="DISTINCT " if distinct else "",
113+
distinct="DISTINCT " if self._distinct else "",
112114
fields=self.heading.as_sql(fields or self.heading.names),
113115
from_=self.from_clause(), where=self.where_clause())
114116

@@ -266,9 +268,11 @@ def join(self, other, semantic_check=True, left=False):
266268
- join_attributes)
267269
# need subquery if any of the join attributes are derived
268270
need_subquery1 = (need_subquery1 or isinstance(self, Aggregation) or
269-
any(n in self.heading.new_attributes for n in join_attributes))
271+
any(n in self.heading.new_attributes for n in join_attributes)
272+
or isinstance(self, Union))
270273
need_subquery2 = (need_subquery2 or isinstance(other, Aggregation) or
271-
any(n in other.heading.new_attributes for n in join_attributes))
274+
any(n in other.heading.new_attributes for n in join_attributes)
275+
or isinstance(self, Union))
272276
if need_subquery1:
273277
self = self.make_subquery()
274278
if need_subquery2:
@@ -440,8 +444,10 @@ def tail(self, limit=25, **fetch_kwargs):
440444
def __len__(self):
441445
""":return: number of elements in the result set e.g. ``len(q1)``."""
442446
return self.connection.query(
443-
'SELECT count(DISTINCT {fields}) FROM {from_}{where}'.format(
444-
fields=self.heading.as_sql(self.primary_key, include_aliases=False),
447+
'SELECT {select_} FROM {from_}{where}'.format(
448+
select_=('count(*)' if any(self._left)
449+
else 'count(DISTINCT {fields})'.format(fields=self.heading.as_sql(
450+
self.primary_key, include_aliases=False))),
445451
from_=self.from_clause(),
446452
where=self.where_clause())).fetchone()[0]
447453

@@ -554,7 +560,7 @@ def create(cls, arg, group, keep_all_rows=False):
554560
if inspect.isclass(group) and issubclass(group, QueryExpression):
555561
group = group() # instantiate if a class
556562
assert isinstance(group, QueryExpression)
557-
if keep_all_rows and len(group.support) > 1:
563+
if keep_all_rows and len(group.support) > 1 or group.heading.new_attributes:
558564
group = group.make_subquery() # subquery if left joining a join
559565
join = arg.join(group, left=keep_all_rows) # reuse the join logic
560566
result = cls()
@@ -718,6 +724,7 @@ def __and__(self, other):
718724
if not isinstance(other, QueryExpression):
719725
raise DataJointError('Set U can only be restricted with a QueryExpression.')
720726
result = copy.copy(other)
727+
result._distinct = True
721728
result._heading = result.heading.set_primary_key(self.primary_key)
722729
result = result.proj()
723730
return result

datajoint/s3.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44
from io import BytesIO
55
import minio # https://docs.minio.io/docs/python-client-api-reference
6+
import urllib3
67
import warnings
78
import uuid
89
import logging
@@ -16,9 +17,24 @@ class Folder:
1617
"""
1718
A Folder instance manipulates a flat folder of objects within an S3-compatible object store
1819
"""
19-
def __init__(self, endpoint, bucket, access_key, secret_key, *, secure=False, **_):
20-
self.client = minio.Minio(endpoint, access_key=access_key, secret_key=secret_key,
21-
secure=secure)
20+
def __init__(self, endpoint, bucket, access_key, secret_key, *, secure=False,
21+
proxy_server=None, **_):
22+
# from https://docs.min.io/docs/python-client-api-reference
23+
self.client = minio.Minio(
24+
endpoint,
25+
access_key=access_key,
26+
secret_key=secret_key,
27+
secure=secure,
28+
http_client=(
29+
urllib3.ProxyManager(proxy_server,
30+
timeout=urllib3.Timeout.DEFAULT_TIMEOUT,
31+
cert_reqs="CERT_REQUIRED",
32+
retries=urllib3.Retry(total=5,
33+
backoff_factor=0.2,
34+
status_forcelist=[500, 502, 503,
35+
504]))
36+
if proxy_server else None),
37+
)
2238
self.bucket = bucket
2339
if not self.client.bucket_exists(bucket):
2440
raise errors.BucketInaccessible('Inaccessible s3 bucket %s' % bucket)

datajoint/settings.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def get_store_spec(self, store):
137137
spec['subfolding'] = spec.get('subfolding', DEFAULT_SUBFOLDING)
138138
spec_keys = { # REQUIRED in uppercase and allowed in lowercase
139139
'file': ('PROTOCOL', 'LOCATION', 'subfolding', 'stage'),
140-
's3': ('PROTOCOL', 'ENDPOINT', 'BUCKET', 'ACCESS_KEY', 'SECRET_KEY', 'LOCATION', 'secure', 'subfolding', 'stage')}
140+
's3': ('PROTOCOL', 'ENDPOINT', 'BUCKET', 'ACCESS_KEY', 'SECRET_KEY', 'LOCATION',
141+
'secure', 'subfolding', 'stage', 'proxy_server')}
141142

142143
try:
143144
spec_keys = spec_keys[spec.get('protocol', '').lower()]

docs-parts/intro/Releases_lang1.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
0.13.3 -- TBD
22
----------------------
3+
* Add - Expose proxy feature for S3 external stores (#961) PR #962
34
* Bugfix - Dependencies not properly loaded on populate. (#902) PR #919
45
* Bugfix - Replace use of numpy aliases of built-in types with built-in type. (#938) PR #939
56
* Bugfix - `ExternalTable.delete` should not remove row on error (#953) PR #956
67
* Bugfix - Fix error handling of remove_object function in `s3.py` (#952) PR #955
8+
* Bugfix - Fix sql code generation to comply with sql mode ``ONLY_FULL_GROUP_BY`` (#916) PR #965
9+
* Bugfix - Fix count for left-joined ``QueryExpressions`` (#951) PR #966
10+
* Bugfix - Fix assertion error when performing a union into a join (#930) PR #967
11+
* Bugfix - Fix regression issue with `DISTINCT` clause and `GROUP_BY` (#914) PR #963
712

813
0.13.2 -- May 7, 2021
914
----------------------

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ minio>=7.0.0
1010
matplotlib
1111
cryptography
1212
otumat
13+
urllib3

tests/schema.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,13 @@ class SessionDateA(dj.Lookup):
439439
('mouse1', '2020-12-03'),
440440
('mouse1', '2020-12-04')
441441
]
442+
443+
444+
@schema
445+
class Stimulus(dj.Lookup):
446+
definition = """
447+
id: int
448+
---
449+
contrast: int
450+
brightness: int
451+
"""

tests/test_aggr_regressions.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
"""
44

55
import itertools
6-
from nose.tools import assert_equal, raises
6+
from nose.tools import assert_equal
77
import datajoint as dj
88
from . import PREFIX, CONN_INFO
9-
9+
import uuid
10+
from .schema_uuid import Topic, Item, top_level_namespace_id
1011
schema = dj.Schema(PREFIX + '_aggr_regress', connection=dj.conn(**CONN_INFO))
1112

1213
# --------------- ISSUE 386 -------------------
@@ -103,3 +104,27 @@ def test_issue558_part2():
103104
d = dict(id=3, id2=5)
104105
assert_equal(len(X & d), len((X & d).proj(id2='3')))
105106

107+
108+
def test_left_join_len():
109+
Topic().add('jeff')
110+
Item.populate()
111+
Topic().add('jeff2')
112+
Topic().add('jeff3')
113+
q = Topic.join(Item - dict(topic_id=uuid.uuid5(top_level_namespace_id, 'jeff')),
114+
left=True)
115+
qf = q.fetch()
116+
assert len(q) == len(qf)
117+
118+
119+
def test_union_join():
120+
# https://github.com/datajoint/datajoint-python/issues/930
121+
A.insert(zip([100, 200, 300, 400, 500, 600]))
122+
B.insert([(100, 11), (200, 22), (300, 33), (400, 44)])
123+
q1 = B & 'id < 300'
124+
q2 = B & 'id > 300'
125+
126+
expected_data = [{'id': 0, 'id2': 5}, {'id': 1, 'id2': 6}, {'id': 2, 'id2': 7},
127+
{'id': 3, 'id2': 8}, {'id': 4, 'id2': 9}, {'id': 100, 'id2': 11},
128+
{'id': 200, 'id2': 22}, {'id': 400, 'id2': 44}]
129+
130+
assert ((q1 + q2) * A).fetch(as_dict=True) == expected_data

tests/test_fetch.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pandas
77
import warnings
88
from . import schema
9+
from .schema import Parent, Stimulus
910
import datajoint as dj
1011
import os
1112

@@ -287,3 +288,34 @@ def test_query_caching(self):
287288

288289
# reset cache directory state (will fail if purge was unsuccessful)
289290
os.rmdir(os.path.expanduser('~/dj_query_cache'))
291+
292+
def test_fetch_group_by(self):
293+
# https://github.com/datajoint/datajoint-python/issues/914
294+
295+
assert Parent().fetch('KEY', order_by='name') == [{'parent_id': 1}]
296+
297+
def test_dj_u_distinct(self):
298+
# Test developed to see if removing DISTINCT from the select statement
299+
# generation breakes the dj.U universal set imlementation
300+
301+
# Contents to be inserted
302+
contents = [
303+
(1,2,3),
304+
(2,2,3),
305+
(3,3,2),
306+
(4,5,5)
307+
]
308+
Stimulus.insert(contents)
309+
310+
# Query the whole table
311+
test_query = Stimulus()
312+
313+
# Use dj.U to create a list of unique contrast and brightness combinations
314+
result = dj.U('contrast', 'brightness') & test_query
315+
expected_result = [{'contrast': 2, 'brightness': 3},
316+
{'contrast': 3, 'brightness': 2},
317+
{'contrast': 5, 'brightness': 5}]
318+
319+
fetched_result = result.fetch(as_dict=True, order_by=('contrast', 'brightness'))
320+
Stimulus.delete_quick()
321+
assert fetched_result == expected_result

0 commit comments

Comments
 (0)