Skip to content

Commit 4a1602e

Browse files
committed
implemented more of sklearn step API
1 parent 0f7a3c8 commit 4a1602e

File tree

8 files changed

+89
-5
lines changed

8 files changed

+89
-5
lines changed

build/lib/data_algebra/data_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ def columns_used_from_sources(self, using=None):
114114
with the using columns (None means all)."""
115115
raise NotImplementedError("base method called")
116116

117+
def columns_produced(self):
118+
return self.column_names.copy()
119+
117120
def _clear_columns_currently_used(self):
118121
self.columns_currently_used = set()
119122
for si in self.sources:
@@ -300,6 +303,7 @@ def eval_pandas(self, data_map, *, eval_env=None, data_model=None, narrow=True):
300303
:param data_map: map from table names to data frames
301304
:param eval_env: environment to evaluate in
302305
:param data_model: adaptor to Pandas dialect
306+
:param narrow logical, if True don't copy unexpected columns
303307
:return:
304308
"""
305309

@@ -759,6 +763,9 @@ def __init__(self, *, underlying, data_map):
759763
self.data_map = data_map.copy()
760764
self.underlying = underlying
761765

766+
def columns_produced(self):
767+
return self.underlying.columns_produced()
768+
762769
# execution
763770

764771
def ex(self):

build/lib/data_algebra/data_ops_types.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ def transform(self, X, *, eval_env=None, data_model=None, narrow=True):
1818
apply self to data frame X, may or may not commute with composition
1919
2020
:param X: input data frame
21+
:param eval_env environment to look for symbols in
22+
:param data_model implementation to use
23+
:param narrow logical, if True don't copy unexpected columns
2124
:return: transformed dataframe
2225
"""
2326
raise NotImplementedError("base class called")
@@ -28,6 +31,8 @@ def act_on(self, X, *, eval_env=None, data_model=None):
2831
apply self to data frame X, must commute with composition
2932
3033
:param X: input data frame
34+
:param eval_env environment to look for symbols in
35+
:param data_model implementation to use
3136
:return: transformed dataframe
3237
"""
3338
return self.transform(X=X, eval_env=eval_env, data_model=data_model, narrow=False)
@@ -81,6 +86,11 @@ def add(self, other):
8186
"""
8287
return other.apply_to(self)
8388

89+
# info
90+
91+
def columns_produced(self):
92+
raise NotImplementedError("base class called")
93+
8494
# query generation
8595

8696
def to_sql_implementation(self, db_model, *, using, temp_id_source):
@@ -130,3 +140,28 @@ def order_rows(self, columns, *, reverse=None, limit=None):
130140

131141
def convert_records(self, record_map):
132142
raise NotImplementedError("base class called")
143+
144+
# sklearn step style interface
145+
146+
# noinspection PyPep8Naming, PyUnusedLocal
147+
def fit(self, X, y=None):
148+
pass
149+
150+
# noinspection PyPep8Naming, PyUnusedLocal
151+
def fit_transform(self, X, y=None):
152+
return self.transform(X)
153+
154+
# noinspection PyUnusedLocal
155+
def get_feature_names(self, input_features=None):
156+
return self.columns_produced()
157+
158+
# noinspection PyUnusedLocal,PyMethodMayBeStatic
159+
def get_params(self, deep=False):
160+
return dict()
161+
162+
def set_params(self, **params):
163+
pass
164+
165+
# noinspection PyPep8Naming
166+
def inverse_transform(self, X):
167+
raise TypeError("data_algebra does not support inverse_transform")

build/lib/data_algebra/near_sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(self, *, terms, quoted_query_name, temp_tables):
1313
self.temp_tables = temp_tables
1414

1515
def to_sql(self, *, columns=None, force_sql=False, constants=None, db_model):
16-
raise NotImplemented("base method called")
16+
raise NotImplementedError("base method called")
1717

1818
def summary(self):
1919
return {"quoted_query_name": self.quoted_query_name, "is_table": False}

coverage.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ data_algebra/cdata.py 232 75 68%
6363
data_algebra/cdata_impl.py 10 1 90%
6464
data_algebra/connected_components.py 49 1 98%
6565
data_algebra/data_model.py 29 13 55%
66-
data_algebra/data_ops.py 1253 268 79%
67-
data_algebra/data_ops_types.py 56 22 61%
66+
data_algebra/data_ops.py 1257 270 79%
67+
data_algebra/data_ops_types.py 70 29 59%
6868
data_algebra/data_ops_utils.py 34 6 82%
6969
data_algebra/data_pipe.py 189 50 74%
7070
data_algebra/db_model.py 402 70 83%
@@ -79,7 +79,7 @@ data_algebra/test_util.py 126 18 86%
7979
data_algebra/util.py 45 6 87%
8080
data_algebra/yaml.py 95 11 88%
8181
----------------------------------------------------------
82-
TOTAL 3909 879 78%
82+
TOTAL 3927 888 77%
8383

8484

85-
============================== 85 passed in 8.37s ==============================
85+
============================== 85 passed in 7.12s ==============================

data_algebra/data_ops.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ def columns_used_from_sources(self, using=None):
114114
with the using columns (None means all)."""
115115
raise NotImplementedError("base method called")
116116

117+
def columns_produced(self):
118+
return self.column_names.copy()
119+
117120
def _clear_columns_currently_used(self):
118121
self.columns_currently_used = set()
119122
for si in self.sources:
@@ -300,6 +303,7 @@ def eval_pandas(self, data_map, *, eval_env=None, data_model=None, narrow=True):
300303
:param data_map: map from table names to data frames
301304
:param eval_env: environment to evaluate in
302305
:param data_model: adaptor to Pandas dialect
306+
:param narrow logical, if True don't copy unexpected columns
303307
:return:
304308
"""
305309

@@ -759,6 +763,9 @@ def __init__(self, *, underlying, data_map):
759763
self.data_map = data_map.copy()
760764
self.underlying = underlying
761765

766+
def columns_produced(self):
767+
return self.underlying.columns_produced()
768+
762769
# execution
763770

764771
def ex(self):

data_algebra/data_ops_types.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ def transform(self, X, *, eval_env=None, data_model=None, narrow=True):
1818
apply self to data frame X, may or may not commute with composition
1919
2020
:param X: input data frame
21+
:param eval_env environment to look for symbols in
22+
:param data_model implementation to use
23+
:param narrow logical, if True don't copy unexpected columns
2124
:return: transformed dataframe
2225
"""
2326
raise NotImplementedError("base class called")
@@ -28,6 +31,8 @@ def act_on(self, X, *, eval_env=None, data_model=None):
2831
apply self to data frame X, must commute with composition
2932
3033
:param X: input data frame
34+
:param eval_env environment to look for symbols in
35+
:param data_model implementation to use
3136
:return: transformed dataframe
3237
"""
3338
return self.transform(X=X, eval_env=eval_env, data_model=data_model, narrow=False)
@@ -81,6 +86,11 @@ def add(self, other):
8186
"""
8287
return other.apply_to(self)
8388

89+
# info
90+
91+
def columns_produced(self):
92+
raise NotImplementedError("base class called")
93+
8494
# query generation
8595

8696
def to_sql_implementation(self, db_model, *, using, temp_id_source):
@@ -130,3 +140,28 @@ def order_rows(self, columns, *, reverse=None, limit=None):
130140

131141
def convert_records(self, record_map):
132142
raise NotImplementedError("base class called")
143+
144+
# sklearn step style interface
145+
146+
# noinspection PyPep8Naming, PyUnusedLocal
147+
def fit(self, X, y=None):
148+
pass
149+
150+
# noinspection PyPep8Naming, PyUnusedLocal
151+
def fit_transform(self, X, y=None):
152+
return self.transform(X)
153+
154+
# noinspection PyUnusedLocal
155+
def get_feature_names(self, input_features=None):
156+
return self.columns_produced()
157+
158+
# noinspection PyUnusedLocal,PyMethodMayBeStatic
159+
def get_params(self, deep=False):
160+
return dict()
161+
162+
def set_params(self, **params):
163+
pass
164+
165+
# noinspection PyPep8Naming
166+
def inverse_transform(self, X):
167+
raise TypeError("data_algebra does not support inverse_transform")
286 Bytes
Binary file not shown.

dist/data_algebra-0.4.3.tar.gz

268 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)