WinVector
diff --git a/‎Examples/WindowFunctions/WindowFunctions.ipynb‎
Lines changed: 295 additions & 25 deletions b/‎Examples/WindowFunctions/WindowFunctions.ipynb‎
Lines changed: 295 additions & 25 deletions
diff --git a/‎Examples/WindowFunctions/WindowFunctions.md‎
Lines changed: 660 additions & 0 deletions b/‎Examples/WindowFunctions/WindowFunctions.md‎
Lines changed: 660 additions & 0 deletions
diff --git a/‎Examples/WindowFunctions/output_20_0.svg‎
Lines changed: 71 additions & 0 deletions b/‎Examples/WindowFunctions/output_20_0.svg‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎build/lib/data_algebra/data_ops.py‎
Lines changed: 13 additions & 9 deletions b/‎build/lib/data_algebra/data_ops.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎build/lib/data_algebra/db_model.py‎
Lines changed: 3 additions & 4 deletions b/‎build/lib/data_algebra/db_model.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎build/lib/data_algebra/diagram.py‎
Lines changed: 14 additions & 10 deletions b/‎build/lib/data_algebra/diagram.py‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎build/lib/data_algebra/expr_rep.py‎
Lines changed: 2 additions & 0 deletions b/‎build/lib/data_algebra/expr_rep.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎build/lib/data_algebra/pandas_model.py‎
Lines changed: 10 additions & 2 deletions b/‎build/lib/data_algebra/pandas_model.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎coverage.txt‎
Lines changed: 31 additions & 30 deletions b/‎coverage.txt‎
Lines changed: 31 additions & 30 deletions
diff --git a/‎data_algebra/data_ops.py‎
Lines changed: 2 additions & 2 deletions b/‎data_algebra/data_ops.py‎
Lines changed: 2 additions & 2 deletions
@@ -65,7 +65,7 @@ def extend(
     ):
         raise NotImplementedError("base class called")
 
-    def project(self, ops, *, group_by=None, parse_env=None):
+    def project(self, ops=None, *, group_by=None, parse_env=None):
         raise NotImplementedError("base class called")
 
     def natural_join(self, b, *, by=None, jointype="INNER"):
@@ -444,7 +444,7 @@ def extend(
             parse_env=parse_env,
         )
 
-    def project(self, ops, *, group_by=None, parse_env=None):
+    def project(self, ops=None, *, group_by=None, parse_env=None):
         return ProjectNode(source=self, ops=ops, group_by=group_by, parse_env=parse_env)
 
     def natural_join(self, b, *, by=None, jointype="INNER"):
@@ -739,12 +739,12 @@ def eval_implementation(self, *, data_map, eval_env, data_model):
 
 
 class ProjectNode(ViewRepresentation):
-    def __init__(self, source, ops, *, group_by=None, parse_env=None):
+    def __init__(self, source, ops=None, *, group_by=None, parse_env=None):
+        if ops is None:
+            ops = {}
         ops = data_algebra.expr_rep.parse_assignments_in_context(
             ops, source, parse_env=parse_env
         )
-        if len(ops) < 1:
-            raise ValueError("no ops")
         self.ops = ops
         if group_by is None:
             group_by = []
@@ -1203,7 +1203,7 @@ def to_python_implementation(self, *, indent=0, strict=True, print_sources=True)
                 + " " * (indent + 6)
             )
         else:
-            s = s + " _1 "
+            s = s + " _1, "
         s = s + (
             "by=" + self.by.__repr__() + ", jointype=" + self.jointype.__repr__() + ")"
         )
@@ -1414,9 +1414,11 @@ class Project(data_algebra.pipe.PipeStep):
 
     ops: Dict[str, data_algebra.expr_rep.Expression]
 
-    def __init__(self, ops, *, group_by=None):
+    def __init__(self, ops=None, *, group_by=None):
         if isinstance(group_by, str):
             group_by = [group_by]
+        if ops is None:
+            ops = {}
         data_algebra.pipe.PipeStep.__init__(self, name="Project")
         self._ops = ops
         self.group_by = group_by
@@ -1619,7 +1621,7 @@ def apply(self, other, **kwargs):
     def __repr__(self):
         return (
             "NaturalJoin("
-            + ", b="
+            + "b="
             + self._b.__repr__()
             + ", by="
             + self._by.__repr__()
@@ -1733,9 +1735,11 @@ def extend(
         self.ops.append(op)
         return self
 
-    def project(self, ops, *, group_by=None, parse_env=None):
+    def project(self, ops=None, *, group_by=None, parse_env=None):
         if parse_env is not None:
             raise ValueError("Expected parse_env to be None")
+        if ops is None:
+            ops = {}
         op = Project(ops=ops, group_by=group_by)
         self.ops.append(op)
         return self
 
@@ -281,11 +281,10 @@ def project_to_sql(self, project_node, *, using=None, temp_id_source=None):
         if using is None:
             using = project_node.column_set
         subops = {k: op for (k, op) in project_node.ops.items() if k in using}
-        if len(subops) < 1:
-            raise ValueError("must produce at least one column")
         subusing = project_node.columns_used_from_sources(using=using)[0]
-        if len(subusing) < 1:
+        if (len(project_node.group_by) + len(subusing)) < 1:
             raise ValueError("must use at least one column")
+        grouping = [g for g in project_node.group_by]
         derived = [
             self.expr_to_sql(oi) + " AS " + self.quote_identifier(ci)
             for (ci, oi) in subops.items()
@@ -297,7 +296,7 @@ def project_to_sql(self, project_node, *, using=None, temp_id_source=None):
         temp_id_source[0] = temp_id_source[0] + 1
         sql_str = (
             "SELECT "
-            + ", ".join(derived)
+            + ", ".join(grouping + derived)
             + " FROM ( "
             + subsql
             + " ) "
 
@@ -23,21 +23,25 @@
 def _get_op_str(op):
     op_str = op.to_python_implementation(print_sources=False)
     if have_black:
-        black_mode = black.FileMode(line_length=60)
-        op_str = black.format_str(op_str, mode=black_mode)
+        try:
+            black_mode = black.FileMode(line_length=60)
+            op_str = black.format_str(op_str, mode=black_mode)
+        except Exception:
+            pass
     return op_str
 
 
 def _to_digraph_r_nodes(ops, dot, table_keys, nextid, edges):
     if isinstance(ops, data_algebra.data_ops.TableDescription):
-        if ops.key in table_keys:
+        try:
             return table_keys[ops.key]
-        table_keys.add(ops.key)
-        node_id = nextid[0]
-        nextid[0] = node_id + 1
-        dot.attr("node", shape="folder", color="blue")
-        dot.node(str(node_id), _get_op_str(ops))
-        return node_id
+        except KeyError:
+            node_id = nextid[0]
+            table_keys[ops.key] = node_id
+            nextid[0] = node_id + 1
+            dot.attr("node", shape="folder", color="blue")
+            dot.node(str(node_id), _get_op_str(ops))
+            return node_id
     source_ids = [
         _to_digraph_r_nodes(
             ops=op, dot=dot, table_keys=table_keys, nextid=nextid, edges=edges
@@ -63,7 +67,7 @@ def to_digraph(ops):
         raise RuntimeError("graphviz not installed")
     dot = graphviz.Digraph()
     edges = []
-    _to_digraph_r_nodes(ops=ops, dot=dot, table_keys=set(), nextid=[0], edges=edges)
+    _to_digraph_r_nodes(ops=ops, dot=dot, table_keys={}, nextid=[0], edges=edges)
     for (sub_id, node_id, label) in edges:
         if label is None:
             dot.edge(sub_id, node_id)
 
@@ -846,6 +846,8 @@ def parse_assignments_in_context(ops, view, *, parse_env=None):
     :param parse_env map of names to values to add to parsing environment
     :return:
     """
+    if ops is None:
+        ops = {}
     if not isinstance(ops, dict):
         raise TypeError("ops should be a dictionary")
     column_defs = view.column_map.__dict__
 
@@ -88,7 +88,8 @@ def extend_step(self, op, *, data_map, eval_env):
                     #  Groupby preserves the order of rows within each group.
                     # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html
                 else:
-                    opframe = subframe
+                    subframe['_data_algebra_temp_g'] = 1
+                    opframe = subframe.groupby(['_data_algebra_temp_g'])
                 # TODO: document exactly which of these are available
                 if len(opk.args) == 0:
                     if opk.op == "row_number":
@@ -143,7 +144,12 @@ def project_step(self, op, *, data_map, eval_env):
         )
         if len(op.group_by) > 0:
             res = res.groupby(op.group_by)
-        cols = {k: res[str(opk.args[0])].agg(opk.op) for (k, opk) in op.ops.items()}
+        remove_temp_col = False
+        if len(op.ops) > 0:
+            cols = {k: res[str(opk.args[0])].agg(opk.op) for (k, opk) in op.ops.items()}
+        else:
+            cols = {'_data_table_temp_col': res[op.sources[0].column_names[0]].agg('sum') }
+            remove_temp_col = True
 
         # agg can return scalars, which then can't be made into a pandas.DataFrame
         def promote_scalar(v):
@@ -158,6 +164,8 @@ def promote_scalar(v):
         res = self.columns_to_frame(cols).reset_index(
             drop=len(op.group_by) < 1
         )  # grouping variables in the index
+        if remove_temp_col:
+            res = res.drop('_data_table_temp_col', 1)
         return res
 
     def select_rows_step(self, op, *, data_map, eval_env):
 
@@ -2,33 +2,34 @@
 platform darwin -- Python 3.6.9, pytest-5.0.1, py-1.8.0, pluggy-0.12.0
 rootdir: /Users/johnmount/Documents/work/data_algebra
 plugins: cov-2.7.1
-collected 34 items
+collected 36 items
 
 tests/test_R_yaml.py .                                                   [  2%]
 tests/test_apply.py .                                                    [  5%]
 tests/test_cdata1.py .                                                   [  8%]
-tests/test_cdata_example.py ....                                         [ 20%]
-tests/test_cols_used.py .                                                [ 23%]
-tests/test_dask.py ..                                                    [ 29%]
-tests/test_datatable.py .                                                [ 32%]
-tests/test_drop_columns.py .                                             [ 35%]
-tests/test_example_data_ops.py .                                         [ 38%]
-tests/test_exp.py .                                                      [ 41%]
-tests/test_export_neg.py .                                               [ 44%]
-tests/test_free_expr.py .                                                [ 47%]
-tests/test_if_else.py .                                                  [ 50%]
-tests/test_math.py .                                                     [ 52%]
-tests/test_natural_join.py .                                             [ 55%]
-tests/test_neg.py .                                                      [ 58%]
-tests/test_null_bad.py .                                                 [ 61%]
-tests/test_parse.py .                                                    [ 64%]
-tests/test_poject.py .                                                   [ 67%]
-tests/test_project.py .                                                  [ 70%]
-tests/test_scatter_example.py .                                          [ 73%]
-tests/test_scoring_example.py .                                          [ 76%]
-tests/test_select_stacking.py .                                          [ 79%]
-tests/test_simple.py .....                                               [ 94%]
-tests/test_sqlite.py .                                                   [ 97%]
+tests/test_cdata_example.py ....                                         [ 19%]
+tests/test_cols_used.py .                                                [ 22%]
+tests/test_dask.py ..                                                    [ 27%]
+tests/test_datatable.py .                                                [ 30%]
+tests/test_drop_columns.py .                                             [ 33%]
+tests/test_example_data_ops.py .                                         [ 36%]
+tests/test_exp.py .                                                      [ 38%]
+tests/test_export_neg.py .                                               [ 41%]
+tests/test_free_expr.py .                                                [ 44%]
+tests/test_if_else.py .                                                  [ 47%]
+tests/test_math.py .                                                     [ 50%]
+tests/test_natural_join.py .                                             [ 52%]
+tests/test_neg.py .                                                      [ 55%]
+tests/test_null_bad.py .                                                 [ 58%]
+tests/test_parse.py .                                                    [ 61%]
+tests/test_poject.py ..                                                  [ 66%]
+tests/test_project.py .                                                  [ 69%]
+tests/test_scatter_example.py .                                          [ 72%]
+tests/test_scoring_example.py .                                          [ 75%]
+tests/test_select_stacking.py .                                          [ 77%]
+tests/test_simple.py .....                                               [ 91%]
+tests/test_sqlite.py .                                                   [ 94%]
+tests/test_window2.py .                                                  [ 97%]
 tests/test_window_fns.py .                                               [100%]
 
 ---------- coverage: platform darwin, python 3.6.9-final-0 -----------
@@ -42,21 +43,21 @@ data_algebra/cdata.py               105     21    80%
 data_algebra/cdata_impl.py          152     60    61%
 data_algebra/dask_model.py          121     23    81%
 data_algebra/data_model.py           41     15    63%
-data_algebra/data_ops.py           1088    263    76%
+data_algebra/data_ops.py           1092    265    76%
 data_algebra/data_types.py           39     19    51%
 data_algebra/datatable_model.py     131     81    38%
-data_algebra/db_model.py            364     83    77%
-data_algebra/diagram.py              52     52     0%
+data_algebra/db_model.py            363     82    77%
+data_algebra/diagram.py              56     56     0%
 data_algebra/env.py                  48      7    85%
 data_algebra/expr.py                 20      4    80%
-data_algebra/expr_rep.py            554    199    64%
-data_algebra/pandas_model.py        145     25    83%
+data_algebra/expr_rep.py            556    200    64%
+data_algebra/pandas_model.py        152     24    84%
 data_algebra/pending_eval.py         34     34     0%
 data_algebra/pipe.py                 65     19    71%
 data_algebra/util.py                 84      7    92%
 data_algebra/yaml.py                119     15    87%
 -----------------------------------------------------
-TOTAL                              3331    967    71%
+TOTAL                              3347    972    71%
 
 
-========================== 34 passed in 7.43 seconds ===========================
+========================== 36 passed in 8.02 seconds ===========================
@@ -1203,7 +1203,7 @@ def to_python_implementation(self, *, indent=0, strict=True, print_sources=True)
                 + " " * (indent + 6)
             )
         else:
-            s = s + " _1 "
+            s = s + " _1, "
         s = s + (
             "by=" + self.by.__repr__() + ", jointype=" + self.jointype.__repr__() + ")"
         )
@@ -1621,7 +1621,7 @@ def apply(self, other, **kwargs):
     def __repr__(self):
         return (
             "NaturalJoin("
-            + ", b="
+            + "b="
             + self._b.__repr__()
             + ", by="
             + self._by.__repr__()