Merge pull request #878 from dimitri-yatsenko/cascade-delete

guzman-raphael · web-flow · commit 5217cd4ff178 · 2021-03-17T12:01:38.000-05:00
Fix join error in the new query parser (#857)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,14 +1,16 @@
 ## Release notes
 
-### 0.13.0 -- Feb 15, 2021
+### 0.13.0 -- Mar 19, 2021
 * Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484). PR #754
 * Re-implement cascading deletes for better performance. PR #839.
 * Add table method `.update1` to update a row in the table with new values PR #763
 * Python datatypes are now enabled by default in blobs (#761). PR #785
 * Added permissive join and restriction operators `@` and `^` (#785) PR #754
 * Support DataJoint datatype and connection plugins (#715, #729) PR 730, #735
-* add `dj.key_hash` alias to `dj.hash.key_hash`
-* default enable_python_native_blobs to True
+* Add `dj.key_hash` alias to `dj.hash.key_hash`
+* Default enable_python_native_blobs to True
+* Bugfix - Regression error on joins with same attribute name (#857) PR #878
+* Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878
 * Drop support for Python 3.5
 
 ### 0.12.8 -- Jan 12, 2021
diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml
@@ -32,7 +32,7 @@ services:
       interval: 1s
   fakeservices.datajoint.io:
     <<: *net
-    image: raphaelguzman/nginx:v0.0.13
+    image: datajoint/nginx:v0.0.15
     environment:
     - ADD_db_TYPE=DATABASE
     - ADD_db_ENDPOINT=db:3306
@@ -72,15 +72,17 @@ services:
       - COVERALLS_SERVICE_NAME
       - COVERALLS_REPO_TOKEN
     working_dir: /src
-    command: >
-      /bin/sh -c
-       "
-        pip install --user -r test_requirements.txt;
-        pip install --user .;
-        pip freeze | grep datajoint;
-        nosetests -vsw tests --with-coverage --cover-package=datajoint && coveralls;
-        # jupyter notebook;
-       "
+    command:
+      - sh
+      - -c
+      - |
+        set -e
+        pip install --user -r test_requirements.txt
+        pip install --user .
+        pip freeze | grep datajoint
+        nosetests -vsw tests --with-coverage --cover-package=datajoint
+        coveralls
+        # jupyter notebook
     # ports:
     #   - "8888:8888"
     user: ${UID}:${GID}
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![Coverage Status](https://coveralls.io/repos/datajoint/datajoint-python/badge.svg?branch=master&service=github)](https://coveralls.io/github/datajoint/datajoint-python?branch=master)
 [![PyPI version](https://badge.fury.io/py/datajoint.svg)](http://badge.fury.io/py/datajoint)
 [![Requirements Status](https://requires.io/github/datajoint/datajoint-python/requirements.svg?branch=master)](https://requires.io/github/datajoint/datajoint-python/requirements/?branch=master)
-[![Join the chat at https://gitter.im/datajoint/datajoint-python](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/datajoint/datajoint-python?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![Slack](https://img.shields.io/badge/slack-chat-green.svg)](https://datajoint.slack.com/)
 
 # Welcome to DataJoint for Python!
 DataJoint for Python is a framework for scientific workflow management based on relational principles. DataJoint is built on the foundation of the relational data model and prescribes a consistent method for organizing, populating, computing, and querying data.
diff --git a/datajoint/condition.py b/datajoint/condition.py
@@ -45,8 +45,9 @@ def __init__(self, restriction):
 
 def assert_join_compatibility(expr1, expr2):
     """
-    Determine if expressions expr1 and expr2 are join-compatible.  To be join-compatible, the matching attributes
-    in the two expressions must be in the primary key of one or the other expression.
+    Determine if expressions expr1 and expr2 are join-compatible.  To be join-compatible,
+    the matching attributes in the two expressions must be in the primary key of one or the
+    other expression.
     Raises an exception if not compatible.
     :param expr1: A QueryExpression object
     :param expr2: A QueryExpression object
@@ -58,10 +59,12 @@ def assert_join_compatibility(expr1, expr2):
             raise DataJointError('Object %r is not a QueryExpression and cannot be joined.' % rel)
     if not isinstance(expr1, U) and not isinstance(expr2, U):  # dj.U is always compatible
         try:
-            raise DataJointError("Cannot join query expressions on dependent attribute `%s`" % next(r for r in set(
-                expr1.heading.secondary_attributes).intersection(expr2.heading.secondary_attributes)))
+            raise DataJointError(
+                "Cannot join query expressions on dependent attribute `%s`" % next(
+                    r for r in set(expr1.heading.secondary_attributes).intersection(
+                        expr2.heading.secondary_attributes)))
         except StopIteration:
-            pass
+            pass  # all ok
 
 
 def make_condition(query_expression, condition, columns):
diff --git a/datajoint/expression.py b/datajoint/expression.py
@@ -37,7 +37,7 @@ class QueryExpression:
     _restriction = None
     _restriction_attributes = None
     _left = []  # True for left joins, False for inner joins
-    _join_attributes = []
+    _original_heading = None  # heading before projections
 
     # subclasses or instantiators must provide values
     _connection = None
@@ -61,6 +61,11 @@ def heading(self):
         """ a dj.Heading object, reflects the effects of the projection operator .proj """
         return self._heading
 
+    @property
+    def original_heading(self):
+        """ a dj.Heading object reflecting the attributes before projection """
+        return self._original_heading or self.heading
+
     @property
     def restriction(self):
         """ a AndList object of restrictions applied to input to produce the result """
@@ -85,11 +90,10 @@ def from_clause(self):
         support = ('(' + src.make_sql() + ') as `_s%x`' % next(
             self._subquery_alias_count) if isinstance(src, QueryExpression) else src for src in self.support)
         clause = next(support)
-        for s, a, left in zip(support, self._join_attributes, self._left):
-            clause += '{left} JOIN {clause}{using}'.format(
+        for s, left in zip(support, self._left):
+            clause += 'NATURAL{left} JOIN {clause}'.format(
                 left=" LEFT" if left else "",
-                clause=s,
-                using="" if not a else " USING (%s)" % ",".join('`%s`' % _ for _ in a))
+                clause=s)
         return clause
 
     def where_clause(self):
@@ -241,34 +245,29 @@ def join(self, other, semantic_check=True, left=False):
             other = other()  # instantiate
         if not isinstance(other, QueryExpression):
             raise DataJointError("The argument of join must be a QueryExpression")
-        other_clash = set(other.heading.names) | set(
-            (other.heading[n].attribute_expression.strip('`') for n in other.heading.new_attributes))
-        self_clash = set(self.heading.names) | set(
-            (self.heading[n].attribute_expression for n in self.heading.new_attributes))
-        need_subquery1 = isinstance(self, Union) or any(
-            n for n in self.heading.new_attributes if (
-                    n in other_clash or self.heading[n].attribute_expression.strip('`') in other_clash))
-        need_subquery2 = (len(other.support) > 1 or
-                          isinstance(self, Union) or any(
-            n for n in other.heading.new_attributes if (
-                    n in self_clash or other.heading[n].attribute_expression.strip('`') in other_clash)))
+        if semantic_check:
+            assert_join_compatibility(self, other)
+        join_attributes = set(n for n in self.heading.names if n in other.heading.names)
+        # needs subquery if FROM class has common attributes with the other's FROM clause
+        need_subquery1 = need_subquery2 = bool(
+            (set(self.original_heading.names) & set(other.original_heading.names))
+            - join_attributes)
+        # need subquery if any of the join attributes are derived
+        need_subquery1 = need_subquery1 or any(n in self.heading.new_attributes for n in join_attributes)
+        need_subquery2 = need_subquery2 or any(n in other.heading.new_attributes for n in join_attributes)
         if need_subquery1:
             self = self.make_subquery()
         if need_subquery2:
             other = other.make_subquery()
-        if semantic_check:
-            assert_join_compatibility(self, other)
         result = QueryExpression()
         result._connection = self.connection
         result._support = self.support + other.support
-        result._join_attributes = (
-                self._join_attributes + [[a for a in self.heading.names if a in other.heading.names]] +
-                other._join_attributes)
         result._left = self._left + [left] + other._left
         result._heading = self.heading.join(other.heading)
         result._restriction = AndList(self.restriction)
         result._restriction.append(other.restriction)
-        assert len(result.support) == len(result._join_attributes) + 1 == len(result._left) + 1
+        result._original_heading = self.original_heading.join(other.original_heading)
+        assert len(result.support) == len(result._left) + 1
         return result
 
     def __add__(self, other):
@@ -371,6 +370,7 @@ def proj(self, *attributes, **named_attributes):
             need_subquery = any(name in self.restriction_attributes for name in self.heading.new_attributes)
 
         result = self.make_subquery() if need_subquery else copy.copy(self)
+        result._original_heading = result.original_heading
         result._heading = result.heading.select(
             attributes, rename_map=dict(**rename_map, **replicate_map), compute_map=compute_map)
         return result
@@ -525,7 +525,6 @@ def create(cls, arg, group, keep_all_rows=False):
         result._connection = join.connection
         result._heading = join.heading.set_primary_key(arg.primary_key)  # use left operand's primary key
         result._support = join.support
-        result._join_attributes = join._join_attributes
         result._left = join._left
         result._left_restrict = join.restriction  # WHERE clause applied before GROUP BY
         result._grouping_attributes = result.primary_key
diff --git a/datajoint/fetch.py b/datajoint/fetch.py
@@ -207,6 +207,7 @@ def __call__(self, *attrs, offset=None, limit=None, order_by=None, format=None,
                 except Exception as e:
                     raise e
                 for name in heading:
+                    # unpack blobs and externals
                     ret[name] = list(map(partial(get, heading[name]), ret[name]))
                 if format == "frame":
                     ret = pandas.DataFrame(ret).set_index(heading.primary_key)
@@ -251,7 +252,7 @@ def __call__(self, *attrs, squeeze=False, download_path='.'):
         else:  # fetch some attributes, return as tuple
             attributes = [a for a in attrs if not is_key(a)]
             result = self._expression.proj(*attributes).fetch(
-                squeeze=squeeze, download_path=download_path)
+                squeeze=squeeze, download_path=download_path, format="array")
             if len(result) != 1:
                 raise DataJointError(
                     'fetch1 should only return one tuple. %d tuples found' % len(result))
diff --git a/datajoint/schemas.py b/datajoint/schemas.py
@@ -298,12 +298,10 @@ def exists(self):
         """
         if self.database is None:
             raise DataJointError("Schema must be activated first.")
-        return self.database is not None and (
-            self.connection.query(
-                "SELECT schema_name "
-                "FROM information_schema.schemata "
-                "WHERE schema_name = '{database}'".format(
-                    database=self.database)).rowcount > 0)
+        return bool(self.connection.query(
+            "SELECT schema_name "
+            "FROM information_schema.schemata "
+            "WHERE schema_name = '{database}'".format(database=self.database)).rowcount)
 
     @property
     def jobs(self):
diff --git a/datajoint/version.py b/datajoint/version.py
@@ -1,3 +1,3 @@
-__version__ = "0.13.dev5"
+__version__ = "0.13.dev6"
 
 assert len(__version__) <= 10  # The log table limits version to the 10 characters
diff --git a/docs-parts/computation/06-distributed-computing_jobs_by_key.rst b/docs-parts/computation/06-distributed-computing_jobs_by_key.rst
@@ -3,17 +3,18 @@ This can be done by using `dj.key_hash` to convert the key as follows:
 
 .. code-block:: python
 
-    In [4]: schema.jobs & {'key_hash' : dj.key_hash({'id': 2})}                
-    Out[4]: 
+    In [4]: jk = {'table_name': JobResults.table_name, 'key_hash' : dj.key_hash({'id': 2})}
+    In [5]: schema.jobs & jk
+    Out[5]: 
     *table_name    *key_hash      status     key        error_message  error_stac user           host      pid        connection_id  timestamp     
     +------------+ +------------+ +--------+ +--------+ +------------+ +--------+ +------------+ +-------+ +--------+ +------------+ +------------+
     __job_results  c81e728d9d4c2f error      =BLOB=     KeyboardInterr =BLOB=     datajoint@localhost  localhost     15571     59             2017-09-04 14:
      (Total: 1)
     
-    In [5]: (schema.jobs & {'key_hash' : dj.key_hash({'id': 2})}).delete()     
+    In [6]: (schema.jobs & jk).delete()     
     
-    In [6]: schema.jobs & {'key_hash' : dj.key_hash({'id': 2})}                
-    Out[6]: 
+    In [7]: schema.jobs & jk
+    Out[7]: 
     *table_name    *key_hash    status     key        error_message  error_stac user     host     pid     connection_id  timestamp    
     +------------+ +----------+ +--------+ +--------+ +------------+ +--------+ +------+ +------+ +-----+ +------------+ +-----------+
     
diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst
@@ -1,13 +1,15 @@
-0.13.0 -- Feb 15, 2021
+0.13.0 -- Mar 19, 2021
 ----------------------
 * Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484). PR #754
 * Re-implement cascading deletes for better performance. PR #839.
 * Add table method `.update1` to update a row in the table with new values PR #763
 * Python datatypes are now enabled by default in blobs (#761). PR #785
 * Added permissive join and restriction operators `@` and `^` (#785) PR #754
 * Support DataJoint datatype and connection plugins (#715, #729) PR 730, #735
-* add `dj.key_hash` alias to `dj.hash.key_hash`
-* default enable_python_native_blobs to True
+* Add `dj.key_hash` alias to `dj.hash.key_hash`
+* Default enable_python_native_blobs to True
+* Bugfix - Regression error on joins with same attribute name (#857) PR #878
+* Bugfix - Error when `fetch1('KEY')` when `dj.config['fetch_format']='frame'` set (#876) PR #880, #878
 * Drop support for Python 3.5
 
 0.12.8 -- Jan 12, 2021
diff --git a/local-docker-compose.yml b/local-docker-compose.yml
@@ -34,7 +34,7 @@ services:
       interval: 1s
   fakeservices.datajoint.io:
     <<: *net
-    image: raphaelguzman/nginx:v0.0.13
+    image: datajoint/nginx:v0.0.15
     environment:
     - ADD_db_TYPE=DATABASE
     - ADD_db_ENDPOINT=db:3306
@@ -77,11 +77,13 @@ services:
       - JUPYTER_PASSWORD=datajoint
       - DISPLAY
     working_dir: /src
-    command: >
-      /bin/sh -c
-       "
-        pip install --user nose nose-cov coveralls flake8 ptvsd .;
-        pip freeze | grep datajoint;
+    command:
+      - sh
+      - -c
+      - |
+        set -e
+        pip install --user nose nose-cov coveralls flake8 ptvsd .
+        pip freeze | grep datajoint
         ## You may run the below tests once sh'ed into container i.e. docker exec -it datajoint-python_app_1 sh
         # nosetests -vsw tests; #run all tests
         # nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch; #run specific basic test
@@ -91,11 +93,11 @@ services:
         ## Interactive Jupyter Notebook environment
         jupyter notebook &
         ## Remote debugger
-        while true;
-        do python -m ptvsd --host 0.0.0.0 --port 5678 --wait .;
-        sleep 2;
-        done;
-       "
+        while true
+        do
+          python -m ptvsd --host 0.0.0.0 --port 5678 --wait .
+          sleep 2
+        done
     ports:
       - "8888:8888"
       - "5678:5678"
diff --git a/tests/schema.py b/tests/schema.py
@@ -345,3 +345,24 @@ class ThingC(dj.Manual):
     ---
     -> [unique, nullable] ThingB
     """
+
+
+@schema
+class Parent(dj.Lookup):
+    definition = """
+    parent_id: int
+    ---
+    name: varchar(30)
+    """
+    contents = [(1, 'Joe')]
+
+
+@schema
+class Child(dj.Lookup):
+    definition = """
+    -> Parent
+    child_id: int
+    ---
+    name: varchar(30)
+    """
+    contents = [(1, 12, 'Dan')]
diff --git a/tests/test_fetch.py b/tests/test_fetch.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`		`-__version__ = "0.13.dev5"`
	`1`	`+__version__ = "0.13.dev6"`
`2`	`2`
`3`	`3`	`assert len(__version__) <= 10 # The log table limits version to the 10 characters`