WinVector
diff --git a/‎Examples/LogisticExample/Logistic2.ipynb‎
Lines changed: 74 additions & 5 deletions b/‎Examples/LogisticExample/Logistic2.ipynb‎
Lines changed: 74 additions & 5 deletions
diff --git a/‎build/lib/data_algebra/data_pipe.py‎
Lines changed: 111 additions & 14 deletions b/‎build/lib/data_algebra/data_pipe.py‎
Lines changed: 111 additions & 14 deletions
diff --git a/‎coverage.txt‎
Lines changed: 3 additions & 3 deletions b/‎coverage.txt‎
Lines changed: 3 additions & 3 deletions
@@ -58,31 +58,100 @@
     {
      "name": "stdout",
      "text": [
-      "TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n   extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n   extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n   extend({'probability': 'probability / total'}) .\\\n   extend({'sort_key': '-probability'}) .\\\n   extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n   select_rows('row_number == 1') .\\\n   select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n   rename_columns({'diagnosis': 'surveyCategory'})\n"
+      "[\n    Extend({'probability': '(assessmentTotal * 0.237).exp()'}, partition_by=None, order_by=None, reverse=None),\n    Extend({'total': 'probability.sum()'}, partition_by='subjectID', order_by=None, reverse=None),\n    Extend({'probability': 'probability/total'}, partition_by=None, order_by=None, reverse=None),\n]\n"
      ],
      "output_type": "stream"
     }
    ],
    "source": [
-    "prob_caclulation = Locum(). \\\n",
+    "prob_calculation = Locum(). \\\n",
     "    extend({'probability': '(assessmentTotal * 0.237).exp()'}). \\\n",
     "    extend({'total': 'probability.sum()'},\n",
     "           partition_by='subjectID'). \\\n",
     "    extend({'probability': 'probability/total'})\n",
     "\n",
+    "print(prob_calculation)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n",
+     "is_executing": false
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [
+    {
+     "name": "stdout",
+     "text": [
+      "[\n    Extend({'sort_key': '-probability'}, partition_by=None, order_by=None, reverse=None),\n    Extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key'], reverse=None),\n    SelectRows('row_number == 1'),\n]\n"
+     ],
+     "output_type": "stream"
+    }
+   ],
+   "source": [
     "top_rank = Locum(). \\\n",
     "    extend({'sort_key': '-probability'}). \\\n",
     "    extend({'row_number': '_row_number()'},\n",
     "           partition_by=['subjectID'],\n",
     "           order_by=['sort_key']). \\\n",
     "    select_rows('row_number == 1')\n",
     "\n",
+    "print(top_rank)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n",
+     "is_executing": false
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [
+    {
+     "name": "stdout",
+     "text": [
+      "[\n    SelectColumns(['subjectID', 'surveyCategory', 'probability']),\n    RenameColumns({'diagnosis': 'surveyCategory'}),\n]\n"
+     ],
+     "output_type": "stream"
+    }
+   ],
+   "source": [
     "clean_up_columns = Locum(). \\\n",
     "    select_columns(['subjectID', 'surveyCategory', 'probability']). \\\n",
     "    rename_columns({'diagnosis': 'surveyCategory'})\n",
     "\n",
+    "print(clean_up_columns)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n",
+     "is_executing": false
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "outputs": [
+    {
+     "name": "stdout",
+     "text": [
+      "TableDescription(table_name='d', column_names=['subjectID', 'surveyCategory', 'assessmentTotal', 'irrelevantCol1', 'irrelevantCol2']) .\\\n   extend({'probability': '(assessmentTotal * 0.237).exp()'}) .\\\n   extend({'total': 'probability.sum()'}, partition_by=['subjectID']) .\\\n   extend({'probability': 'probability / total'}) .\\\n   extend({'sort_key': '-probability'}) .\\\n   extend({'row_number': '_row_number()'}, partition_by=['subjectID'], order_by=['sort_key']) .\\\n   select_rows('row_number == 1') .\\\n   select_columns(['subjectID', 'surveyCategory', 'probability']) .\\\n   rename_columns({'diagnosis': 'surveyCategory'})\n"
+     ],
+     "output_type": "stream"
+    }
+   ],
+   "source": [
     "ops =  data_algebra.data_ops.describe_table(d_local, 'd') +\\\n",
-    "    prob_caclulation +\\\n",
+    "    prob_calculation +\\\n",
     "    top_rank +\\\n",
     "    clean_up_columns\n",
     "\n",
@@ -98,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "outputs": [
     {
      "data": {
@@ -107,7 +176,7 @@
      },
      "metadata": {},
      "output_type": "execute_result",
-     "execution_count": 3
+     "execution_count": 6
     }
    ],
    "source": [
 
@@ -50,6 +50,17 @@ def apply(self, other, **kwargs):
             parse_env=parse_env,
         )
 
+    def __repr__(self):
+        return ("Extend(" + self._ops.__repr__()
+                + ", partition_by=" + self.partition_by.__repr__()
+                + ", order_by=" + self.order_by.__repr__()
+                + ", reverse=" + self.reverse.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class Project(data_algebra.pipe.PipeStep):
     """Class to specify aggregating or summarizing columns."""
@@ -69,6 +80,15 @@ def apply(self, other, **kwargs):
         parse_env = kwargs.get("parse_env", None)
         return other.project(ops=self._ops, group_by=self.group_by, parse_env=parse_env)
 
+    def __repr__(self):
+        return ("Project(" + self._ops.__repr__()
+                + ", group_by=" + self.group_by.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class SelectRows(data_algebra.pipe.PipeStep):
     """Class to specify a choice of rows.
@@ -88,6 +108,14 @@ def apply(self, other, **kwargs):
         parse_env = kwargs.get("parse_env", None)
         return other.select_rows(expr=self.expr, parse_env=parse_env)
 
+    def __repr__(self):
+        return ("SelectRows(" + self.expr.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class SelectColumns(data_algebra.pipe.PipeStep):
     """Class to specify a choice of columns.
@@ -107,6 +135,14 @@ def apply(self, other, **kwargs):
             )
         return other.select_columns(self.column_selection)
 
+    def __repr__(self):
+        return ("SelectColumns(" + self.column_selection.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class DropColumns(data_algebra.pipe.PipeStep):
     """Class to specify removal of columns.
@@ -126,6 +162,14 @@ def apply(self, other, **kwargs):
             )
         return other.drop_columns(self.column_deletions)
 
+    def __repr__(self):
+        return ("DropColumns(" + self.column_deletions.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class OrderRows(data_algebra.pipe.PipeStep):
     """Class to specify a columns to determine row order.
@@ -151,6 +195,15 @@ def apply(self, other, **kwargs):
             columns=self.order_columns, reverse=self.reverse, limit=self.limit
         )
 
+    def __repr__(self):
+        return ("OrderRows(" + self.order_columns.__repr__()
+                + ", reverse=" + self.reverse.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class RenameColumns(data_algebra.pipe.PipeStep):
     """Class to rename columns.
@@ -169,6 +222,14 @@ def apply(self, other, **kwargs):
             )
         return other.rename_columns(column_remapping=self.column_remapping)
 
+    def __repr__(self):
+        return ("RenameColumns(" + self.column_remapping.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class NaturalJoin(data_algebra.pipe.PipeStep):
     _by: List[str]
@@ -190,6 +251,17 @@ def apply(self, other, **kwargs):
             )
         return other.natural_join(b=self._b, by=self._by, jointype=self._jointype)
 
+    def __repr__(self):
+        return ("NaturalJoin("
+                + ", b=" + self._b.__repr__()
+                + ", by=" + self._by.__repr__()
+                + ", jointype=" + self._jointype.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class ConvertRecords(data_algebra.pipe.PipeStep):
     def __init__(self, record_map, *, blocks_out_table=None):
@@ -205,6 +277,16 @@ def apply(self, other, **kwargs):
         return other.convert_records(record_map=self.record_map,
                                      blocks_out_table=self.blocks_out_table)
 
+    def __repr__(self):
+        return ("ConvertRecords(" + self.record_map.__repr__()
+                + ", record_map=" + self.record_map.__repr__()
+                + ", blocks_out_table=" + self.blocks_out_table.__repr__()
+                + ")"
+                )
+
+    def __str__(self):
+        return self.__repr__()
+
 
 class Locum(data_algebra.data_ops.OperatorPlatform):
     """Class to represent future opertions."""
@@ -213,32 +295,47 @@ def __init__(self):
         data_algebra.data_ops.OperatorPlatform.__init__(self)
         self.ops = []
 
-    # noinspection PyPep8Naming
-    def realize(self, X):
-        pipeline = data_algebra.data_ops.describe_table(X, table_name="X")
+    def apply_to(self, pipeline):
+        if not isinstance(pipeline, data_algebra.data_ops.OperatorPlatform):
+            raise TypeError("Expected othter to be a data_algebra.data_ops.OperatorPlatform")
         for s in self.ops:
             # pipeline = pipeline >> s
             pipeline = s.apply(pipeline)
         return pipeline
 
+    def append(self, other):
+        if isinstance(other, Locum):
+            for o in other.ops:
+                self.ops.append(o)
+        elif isinstance(other, data_algebra.pipe.PipeStep):
+            self.ops.append(other)
+        else:
+            raise TypeError("unexpeted type for Locum + " + str(type(other)))
+        return self
+
+    def realize(self, x):
+        pipeline = data_algebra.data_ops.describe_table(x, table_name="x")
+        return self.apply_to(pipeline)
+
     # noinspection PyPep8Naming
     def transform(self, X):
+        if isinstance(X, data_algebra.data_ops.OperatorPlatform):
+            return self.apply_to(X)
         pipeline = self.realize(X)
         return pipeline.transform(X)
 
     def __rrshift__(self, other):  # override other >> self
         return self.transform(other)
 
-    def __add__(self, other):
-        if not isinstance(other, Locum):
-            raise TypeError("Expected other to be of type data_algebra.data_pipe.Locum")
+    def __add__(self, other):  # override self + other
         res = Locum()
-        for o in self.ops:
-            res.ops.append(o)
-        for o in other.ops:
-            res.ops.append(o)
+        res.append(self)
+        res.append(other)
         return res
 
+    def __radd__(self, other):  # override other + self
+        return self.apply_to(other)
+
     # print
 
     def __repr__(self):
@@ -248,13 +345,13 @@ def __repr__(self):
 
     def __str__(self):
         return '[\n    ' + \
-                '\n    '.join([str(o) + ',' for o in self.ops]) + \
-                '\n]'
+               '\n    '.join([str(o) + ',' for o in self.ops]) + \
+               '\n]'
 
     # implement method chaining collection of pending operations
 
     def extend(
-        self, ops, *, partition_by=None, order_by=None, reverse=None, parse_env=None
+            self, ops, *, partition_by=None, order_by=None, reverse=None, parse_env=None
     ):
         if parse_env is not None:
             raise ValueError("Expected parse_env to be None")
@@ -306,7 +403,7 @@ def order_rows(self, columns, *, reverse=None, limit=None):
         return self
 
     def convert_records(
-        self, record_map, *, blocks_out_table=None
+            self, record_map, *, blocks_out_table=None
     ):
         op = ConvertRecords(record_map=record_map, blocks_out_table=blocks_out_table)
         self.ops.append(op)
 
@@ -40,7 +40,7 @@ data_algebra/cdata_impl.py          152     60    61%
 data_algebra/dask_model.py          121     23    81%
 data_algebra/data_model.py           41     15    63%
 data_algebra/data_ops.py            815    173    79%
-data_algebra/data_pipe.py           183     41    78%
+data_algebra/data_pipe.py           231     64    72%
 data_algebra/data_types.py           39     19    51%
 data_algebra/datatable_model.py     131     81    38%
 data_algebra/db_model.py            364     83    77%
@@ -54,7 +54,7 @@ data_algebra/pipe.py                 65     19    71%
 data_algebra/util.py                 84      7    92%
 data_algebra/yaml.py                120     15    88%
 -----------------------------------------------------
-TOTAL                              2945    802    73%
+TOTAL                              2993    825    72%
 
 
-========================== 31 passed in 7.62 seconds ===========================
+========================== 31 passed in 7.03 seconds ===========================