scikit-learn-contrib
diff --git a/‎.coveragerc‎
Lines changed: 0 additions & 21 deletions b/‎.coveragerc‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎doc/_static/img/cover.png‎
-10.1 KB b/‎doc/_static/img/cover.png‎
-10.1 KB
diff --git a/‎doc/_static/img/dag2.png‎
-55.4 KB b/‎doc/_static/img/dag2.png‎
-55.4 KB
diff --git a/‎doc/_static/img/dag2a.png‎
50.1 KB b/‎doc/_static/img/dag2a.png‎
50.1 KB
diff --git a/‎doc/_static/img/dag3.png‎
-80.2 KB b/‎doc/_static/img/dag3.png‎
-80.2 KB
diff --git a/‎doc/_static/img/dag3a.png‎
62.3 KB b/‎doc/_static/img/dag3a.png‎
62.3 KB
diff --git a/‎doc/quick_start.rst‎
Lines changed: 33 additions & 31 deletions b/‎doc/quick_start.rst‎
Lines changed: 33 additions & 31 deletions
diff --git a/‎doc/user_guide.rst‎
Lines changed: 20 additions & 14 deletions b/‎doc/user_guide.rst‎
Lines changed: 20 additions & 14 deletions
diff --git a/‎setup.cfg‎
Lines changed: 27 additions & 0 deletions b/‎setup.cfg‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎skdag/_version.py‎
Lines changed: 1 addition & 1 deletion b/‎skdag/_version.py‎
Lines changed: 1 addition & 1 deletion
@@ -26,23 +26,26 @@ The simplest DAGs are just a chain of singular dependencies. These DAGs may be
 created from the :meth:`skdag.dag.DAG.from_pipeline` method in the same way as a
 DAG:
 
->>> from sklearn.decomposition import PCA
->>> from sklearn.impute import SimpleImputer
->>> from sklearn.linear_model import LogisticRegression
->>> dag = DAG.from_pipeline(
-...     steps=[
-...         ("impute", SimpleImputer()),
-...         ("pca", PCA()),
-...         ("lr", LogisticRegression())
-...     ]
-... )
->>> dag.draw()
-o    impute
-|
-o    pca
-|
-o    lr
-<BLANKLINE>
+.. code-block:: python
+
+    >>> from skdag import DAGBuilder
+    >>> from sklearn.decomposition import PCA
+    >>> from sklearn.impute import SimpleImputer
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> dag = DAGBuilder().from_pipeline(
+    ...     steps=[
+    ...         ("impute", SimpleImputer()),
+    ...         ("pca", PCA()),
+    ...         ("lr", LogisticRegression())
+    ...     ]
+    ... ).make_dag()
+    >>> dag.show()
+    o    impute
+    |
+    o    pca
+    |
+    o    lr
+    <BLANKLINE>
 
 .. image:: _static/img/dag1.png
 
@@ -52,7 +55,6 @@ estimator:
 
 .. code-block:: python
 
-    >>> from skdag import DAGBuilder
     >>> dag = (
     ...     DAGBuilder(infer_dataframe=True)
     ...     .add_step("impute", SimpleImputer())
@@ -61,15 +63,15 @@ estimator:
     ...     .add_step("lr", LogisticRegression(random_state=0), deps=["blood", "vitals"])
     ...     .make_dag()
     ... )
-    >>> dag.draw()
+    >>> dag.show()
     o    impute
     |\
     o o    blood,vitals
     |/
     o    lr
     <BLANKLINE>
 
-.. image:: _static/img/dag2.png
+.. image:: _static/img/dag2a.png
 
 In the above examples we pass the first four columns directly to a regressor, but
 the remaining columns have dimensionality reduction applied first before being
@@ -82,36 +84,36 @@ on how to control this behaviour, see the `User Guide <user_guide.html>`_.
 The DAG may now be used as an estimator in its own right:
 
 >>> from sklearn import datasets
->>> X, y = datasets.load_diabetes(return_X_y=True)
->>> dag.fit_predict(X, y)
-array([...
+>>> X, y = datasets.load_diabetes(return_X_y=True, as_frame=True)
+>>> type(dag.fit_predict(X, y))
+<class 'pandas.core.series.Series'>
 
 In an extension to the scikit-learn estimator interface, DAGs also support multiple
 inputs and multiple outputs. Let's say we want to compare two different classifiers:
 
 >>> from sklearn.ensemble import RandomForestClassifier
->>> cal = DAG.from_pipeline(
+>>> cal = DAGBuilder(infer_dataframe=True).from_pipeline(
 ...     [("rf", RandomForestClassifier(random_state=0))]
-... )
+... ).make_dag()
 >>> dag2 = dag.join(cal, edges=[("blood", "rf"), ("vitals", "rf")])
->>> dag2.draw()
+>>> dag2.show()
 o    impute
 |\
 o o    blood,vitals
 |x|
 o o    lr,rf
 <BLANKLINE>
 
-.. image:: _static/img/dag3.png
+.. image:: _static/img/dag3a.png
 
 Now our DAG will return two outputs: one from each classifier. Multiple outputs are
 returned as a :class:`sklearn.utils.Bunch<Bunch>`:
 
 >>> y_pred = dag2.fit_predict(X, y)
->>> y_pred.lr
-array([...
->>> y_pred.rf
-array([...
+>>> type(y_pred.lr)
+<class 'pandas.core.series.Series'>
+>>> type(y_pred.rf)
+<class 'pandas.core.series.Series'>
 
 Similarly, multiple inputs are also acceptable and inputs can be provided by
 specifying ``X`` and ``y`` as ``dict``-like objects.
@@ -18,17 +18,17 @@ scikit-learn :class:`~sklearn.pipeline.Pipeline`. These DAGs may be created from
 
 .. code-block:: python
 
+    >>> from skdag import DAGBuilder
     >>> from sklearn.decomposition import PCA
     >>> from sklearn.impute import SimpleImputer
     >>> from sklearn.linear_model import LogisticRegression
-    >>> dag = DAG.from_pipeline(
+    >>> dag = DAGBuilder(infer_dataframe=True).from_pipeline(
     ...     steps=[
     ...         ("impute", SimpleImputer()),
     ...         ("pca", PCA()),
     ...         ("lr", LogisticRegression())
-    ...     ],
-    ...     infer_dataframe=True,
-    ... )
+    ...     ]
+    ... ).make_dag()
 
 You may view a diagram of the DAG with the :meth:`~skdag.dag.DAG.show` method. In a
 notbook environment this will display an image, whereas in a terminal it will generate
@@ -97,19 +97,20 @@ The DAG may now be used as an estimator in its own right:
 .. code-block:: python
 
     >>> from sklearn import datasets
-    >>> X, y = datasets.load_diabetes(return_X_y=True)
-    >>> dag.fit_predict(X, y)
-    array([...
+    >>> X, y = datasets.load_diabetes(return_X_y=True, as_frame=True)
+    >>> y_hat = dag.fit_predict(X, y)
+    >>> type(y_hat)
+    <class 'pandas.core.series.Series'>
 
 In an extension to the scikit-learn estimator interface, DAGs also support multiple
 inputs and multiple outputs. Let's say we want to compare two different classifiers:
 
 .. code-block:: python
 
     >>> from sklearn.ensemble import RandomForestClassifier
-    >>> rf = DAG.from_pipeline(
+    >>> rf = DAGBuilder().from_pipeline(
     ...     [("rf", RandomForestClassifier(random_state=0))]
-    ... )
+    ... ).make_dag()
     >>> dag2 = dag.join(rf, edges=[("blood", "rf"), ("vitals", "rf")])
     >>> dag2.show()
     o    impute
@@ -126,10 +127,14 @@ returned as a :class:`sklearn.utils.Bunch<Bunch>`:
 .. code-block:: python
 
     >>> y_pred = dag2.fit_predict(X, y)
-    >>> y_pred.lr
-    array([...
-    >>> y_pred.rf
-    array([...
+    >>> type(y_pred.lr)
+    <class 'pandas.core.series.Series'>
+    >>> type(y_pred.rf)
+    <class 'numpy.ndarray'>
+
+Note that we have different types of output here because ``LogisticRegression`` natively
+supports dataframe input whereas ``RandomForestClassifier`` does not. We could fix this
+by specifying ``infer_dataframe=True`` when we createed our ``rf`` DAG extension.
 
 Similarly, multiple inputs are also acceptable and inputs can be provided by
 specifying ``X`` and ``y`` as ``dict``-like objects.
@@ -174,6 +179,7 @@ the next step(s).
     ...     .make_dag()
     ... )
     >>> stack.fit(X_train, y_train)
+    DAG(...
 
 .. image:: _static/img/stack.png
 
@@ -210,7 +216,7 @@ as a dictionary of step name to column indices instead:
     ...     .add_step("pass", "passthrough")
     ...     .add_step("rf", RandomForestClassifier(), deps=["pass"])
     ...     .add_step("svr", SVC(), deps=["pass"])
-    ...     .add_step("meta", LinearRegression(), deps={"rf": 1, "svc": 1}])
+    ...     .add_step("meta", LinearRegression(), deps={"rf": 1, "svr": 1})
     ...     .make_dag()
     ... )
 
 
@@ -5,11 +5,38 @@ description-file = README.rst
 test = pytest
 
 [tool:pytest]
+doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS
+testpaths = .
 addopts =
     -s
     --doctest-modules
+    --doctest-glob="*.rst"
     --cov=skdag
     --ignore setup.py
     --ignore doc/_build
     --ignore doc/_templates
     --no-cov-on-fail
+
+[coverage:run]
+branch = True
+source = skdag
+include = */skdag/*
+omit =
+    */tests/*
+    *_test.py
+    test_*.py
+    */setup.py
+
+[coverage:report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    if self.debug:
+    if settings.DEBUG
+    raise AssertionError
+    raise NotImplementedError
+    if 0:
+    if __name__ == .__main__.:
+    if self.verbose:
+show_missing = True
+
@@ -1 +1 @@
-__version__ = "0.0.4"
+__version__ = "0.0.5"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.4"`
	`1`	`+__version__ = "0.0.5"`