Skip to content

Commit 10ee7ba

Browse files
committed
retest and rebuild
1 parent 867fd10 commit 10ee7ba

32 files changed

+19058
-19050
lines changed

build/lib/data_algebra/BigQuery.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -249,20 +249,10 @@ def query_to_csv(self, q, *, res_name) -> None:
249249
q = q.to_sql(self.db_model)
250250
else:
251251
q = str(q)
252-
253-
def open_regular():
254-
"""open regular"""
255-
return lambda: open(res_name, "w")
256-
257-
def open_gzip():
258-
"""open gzipped"""
259-
return lambda: gzip.open(res_name, "w")
260-
261252
if res_name.endswith(".gz"):
262-
op = open_gzip
253+
op = lambda: gzip.open(res_name, "w", encoding="utf-8")
263254
else:
264-
op = open_regular()
265-
255+
op = lambda: open(res_name, "w", encoding="utf-8")
266256
with op() as res:
267257
res_iter = self.conn.query(q).result().to_dataframe_iterable()
268258
is_first = True

build/lib/data_algebra/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,5 @@
1616
the `rquery`<https://github.com/WinVector/rquery> and `rqdatatable`<https://github.com/WinVector/rqdatatable> packages.
1717
"""
1818

19-
import data_algebra.pandas_model
20-
21-
22-
# set up what pandas supplier we are using
23-
default_data_model = data_algebra.pandas_model.PandasModel()
19+
# import for easy access for package users
20+
from data_algebra.data_ops import TableDescription, SQLNode, describe_table, descr, data, ex

build/lib/data_algebra/arrow.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import abc
22

3-
import data_algebra
43
import data_algebra.data_ops
54
import data_algebra.flow_text
65

build/lib/data_algebra/cdata.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
from typing import Iterable, List, Optional
88

99
import numpy
10-
11-
import data_algebra
10+
import data_algebra.pandas_model
1211
import data_algebra.util
1312

1413

@@ -36,7 +35,7 @@ def __init__(
3635
:param local_data_model: data.frame data model
3736
"""
3837
if local_data_model is None:
39-
local_data_model = data_algebra.default_data_model
38+
local_data_model = data_algebra.pandas_model.default_data_model
4039
control_table = control_table.reset_index(inplace=False, drop=True)
4140
if control_table.shape[0] < 1:
4241
raise ValueError("control table should have at least 1 row")
@@ -194,7 +193,7 @@ def blocks_to_rowrecs(data, *, blocks_in: RecordSpecification, local_data_model=
194193
if len(ck) != len(set(ck)):
195194
raise ValueError("blocks_in can not have duplicate content keys")
196195
if local_data_model is None:
197-
local_data_model = data_algebra.default_data_model
196+
local_data_model = data_algebra.pandas_model.default_data_model
198197
data = data.reset_index(drop=True)
199198
missing_cols = set(blocks_in.control_table_keys).union(blocks_in.record_keys) - set(
200199
data.columns
@@ -281,7 +280,7 @@ def rowrecs_to_blocks(
281280
"""
282281
assert isinstance(blocks_out, RecordSpecification)
283282
if local_data_model is None:
284-
local_data_model = data_algebra.default_data_model
283+
local_data_model = data_algebra.pandas_model.default_data_model
285284
data = data.reset_index(drop=True)
286285
missing_cols = set(blocks_out.record_keys) - set(data.columns)
287286
if len(missing_cols) > 0:
@@ -431,7 +430,7 @@ def example_input(self, *, local_data_model=None):
431430
:return: example result data frame.
432431
"""
433432
if local_data_model is None:
434-
local_data_model = data_algebra.default_data_model
433+
local_data_model = data_algebra.pandas_model.default_data_model
435434
if self.blocks_in is not None:
436435
example = self.blocks_in.control_table.copy()
437436
nrow = example.shape[0]
@@ -461,7 +460,7 @@ def transform(
461460
if len(unknown) > 0:
462461
raise ValueError("missing required columns: " + str(unknown))
463462
if local_data_model is None:
464-
local_data_model = data_algebra.default_data_model
463+
local_data_model = data_algebra.pandas_model.default_data_model
465464
X = X.reset_index(drop=True)
466465
if self.blocks_in is not None:
467466
X = blocks_to_rowrecs(
@@ -651,7 +650,7 @@ def pivot_blocks_to_rowrecs(
651650
"""
652651

653652
if local_data_model is None:
654-
local_data_model = data_algebra.default_data_model
653+
local_data_model = data_algebra.pandas_model.default_data_model
655654
control_table = local_data_model.data_frame(
656655
{
657656
attribute_key_column: record_value_columns,
@@ -687,7 +686,7 @@ def pivot_rowrecs_to_blocks(
687686
"""
688687

689688
if local_data_model is None:
690-
local_data_model = data_algebra.default_data_model
689+
local_data_model = data_algebra.pandas_model.default_data_model
691690
control_table = local_data_model.data_frame(
692691
{
693692
attribute_key_column: record_value_columns,
@@ -730,7 +729,7 @@ def pivot_specification(
730729
assert len(known_cols) == len(set(known_cols))
731730
record_map = RecordMap(
732731
blocks_in=RecordSpecification(
733-
control_table=data_algebra.pandas_model.pd.DataFrame(
732+
control_table=data_algebra.pandas_model.default_data_model.pd.DataFrame(
734733
{
735734
col_name_key: value_cols,
736735
col_value_key: value_cols,
@@ -770,7 +769,7 @@ def unpivot_specification(
770769
assert len(known_cols) == len(set(known_cols))
771770
record_map = RecordMap(
772771
blocks_out=RecordSpecification(
773-
control_table=data_algebra.pandas_model.pd.DataFrame(
772+
control_table=data_algebra.pandas_model.default_data_model.pd.DataFrame(
774773
{
775774
col_name_key: value_cols,
776775
col_value_key: value_cols,

build/lib/data_algebra/data_ops.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111

1212
import numpy
1313

14-
import data_algebra
14+
import data_algebra.pandas_model
1515
import data_algebra.expr_parse
1616
import data_algebra.flow_text
1717
import data_algebra.data_model
18-
import data_algebra.pandas_model
1918
import data_algebra.expr_rep
2019
from data_algebra.data_ops_types import MethodUse, OperatorPlatform
2120
import data_algebra.data_ops_utils
@@ -480,7 +479,7 @@ def eval(
480479
"""
481480
assert isinstance(data_map, dict)
482481
if data_model is None:
483-
data_model = data_algebra.default_data_model
482+
data_model = data_algebra.pandas_model.default_data_model
484483
assert isinstance(data_model, data_algebra.data_model.DataModel)
485484
self.columns_used() # for table consistency check/raise
486485
tables = self.get_tables()
@@ -512,7 +511,7 @@ def transform(
512511
:return: transformed data frame
513512
"""
514513
if data_model is None:
515-
data_model = data_algebra.default_data_model
514+
data_model = data_algebra.pandas_model.default_data_model
516515
assert isinstance(data_model, data_algebra.data_model.DataModel)
517516
self.columns_used() # for table consistency check/raise
518517
tables = self.get_tables()

build/lib/data_algebra/db_model.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111

1212
import pandas.io.sql
1313

14-
import data_algebra
15-
14+
import data_algebra.pandas_model
1615
import data_algebra.near_sql
1716
import data_algebra.expr_rep
1817
import data_algebra.util
@@ -797,7 +796,7 @@ def __init__(
797796
union_all_term_end: str = ")",
798797
):
799798
if local_data_model is None:
800-
local_data_model = data_algebra.default_data_model
799+
local_data_model = data_algebra.pandas_model.default_data_model
801800
self.local_data_model = local_data_model
802801
if sql_formatters is None:
803802
sql_formatters = {}

build/lib/data_algebra/eval_cache.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
44
import hashlib
5-
import data_algebra
5+
import data_algebra.pandas_model
66
import data_algebra.db_model
77

88

@@ -13,9 +13,9 @@ def hash_data_frame(d) -> str:
1313
:param d: data frame
1414
:return: hash code as a string
1515
"""
16-
data_algebra.default_data_model.is_appropriate_data_instance(d)
16+
data_algebra.pandas_model.default_data_model.is_appropriate_data_instance(d)
1717
hash_str = hashlib.sha256(
18-
data_algebra.default_data_model.pd.util.hash_pandas_object(d).values
18+
data_algebra.pandas_model.default_data_model.pd.util.hash_pandas_object(d).values
1919
).hexdigest()
2020
return f"{d.shape}_{list(d.columns)}_{hash_str}"
2121

@@ -44,7 +44,7 @@ def make_cache_key(
4444
data_map_keys.sort()
4545
for k in data_map_keys:
4646
assert isinstance(k, str)
47-
assert data_algebra.default_data_model.is_appropriate_data_instance(data_map[k])
47+
assert data_algebra.pandas_model.default_data_model.is_appropriate_data_instance(data_map[k])
4848
return EvalKey(
4949
db_model_name=str(db_model),
5050
sql=sql,
@@ -74,7 +74,7 @@ def get(
7474
"""get result from cache, raise KeyError if not present"""
7575
k = make_cache_key(db_model=db_model, sql=sql, data_map=data_map)
7676
res = self.result_cache[k]
77-
assert data_algebra.default_data_model.is_appropriate_data_instance(res)
77+
assert data_algebra.pandas_model.default_data_model.is_appropriate_data_instance(res)
7878
return res.copy()
7979

8080
def store(
@@ -86,7 +86,7 @@ def store(
8686
res,
8787
) -> None:
8888
"""Store result to cache, mark dirty if change."""
89-
assert data_algebra.default_data_model.is_appropriate_data_instance(res)
89+
assert data_algebra.pandas_model.default_data_model.is_appropriate_data_instance(res)
9090
op_key = make_cache_key(db_model=db_model, sql=sql, data_map=data_map)
9191
try:
9292
previous = self.result_cache[op_key]

build/lib/data_algebra/expr_rep.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
import numpy
99

10-
import data_algebra
1110
import data_algebra.util
11+
import data_algebra.pandas_model
1212

1313

1414
# for some ideas in capturing expressions in Python see:
@@ -1333,14 +1333,14 @@ def _can_find_method_by_name(op):
13331333
# first check chosen mappings
13341334
try:
13351335
# noinspection PyUnusedLocal
1336-
check_val = data_algebra.default_data_model.user_fun_map[op] # for KeyError
1336+
check_val = data_algebra.pandas_model.default_data_model.user_fun_map[op] # for KeyError
13371337
return True
13381338
except KeyError:
13391339
pass
13401340
# check chosen mappings
13411341
try:
13421342
# noinspection PyUnusedLocal
1343-
check_val = data_algebra.default_data_model.impl_map[op] # for KeyError
1343+
check_val = data_algebra.pandas_model.default_data_model.impl_map[op] # for KeyError
13441344
return True
13451345
except KeyError:
13461346
pass
@@ -1454,13 +1454,13 @@ def evaluate(self, data_frame):
14541454
# check user fns
14551455
# first check chosen mappings
14561456
try:
1457-
method_to_call = data_algebra.default_data_model.user_fun_map[self.op]
1457+
method_to_call = data_algebra.pandas_model.default_data_model.user_fun_map[self.op]
14581458
return method_to_call(*args)
14591459
except KeyError:
14601460
pass
14611461
# check chosen mappings
14621462
try:
1463-
method_to_call = data_algebra.default_data_model.impl_map[self.op]
1463+
method_to_call = data_algebra.pandas_model.default_data_model.impl_map[self.op]
14641464
return method_to_call(*args)
14651465
except KeyError:
14661466
pass

build/lib/data_algebra/op_catalog.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import data_algebra
21

3-
pd = data_algebra.default_data_model.pd
2+
import data_algebra.pandas_model
3+
4+
pd = data_algebra.pandas_model.default_data_model.pd
45

56

67
methods_table = pd.DataFrame(

0 commit comments

Comments
 (0)