Skip to content

Commit 7e9bbdb

Browse files
committed
rebuild and recheck
1 parent 058990c commit 7e9bbdb

File tree

8 files changed

+120
-65
lines changed

8 files changed

+120
-65
lines changed

build/lib/data_algebra/pandas_base.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def table_step(self, op, *, data_map, narrow):
7878
res = df.loc[:, columns_using]
7979
res = res.reset_index(drop=True)
8080
# check type compatibility
81-
if (op.column_types is not None) and (len(op.column_types) > 0):
81+
if((op.column_types is not None) and (len(op.column_types) > 0)
82+
and (res.shape[0] > 0) and (res.shape[1] > 0)):
8283
types_seen = data_algebra.util.guess_column_types(res)
8384
for c in columns_using:
8485
assert data_algebra.util.compatible_types(
@@ -465,15 +466,21 @@ def concat_rows_step(self, op, *, data_map, narrow):
465466
data_map=data_map, data_model=self, narrow=narrow
466467
)
467468
if op.id_column is not None:
468-
left[op.id_column] = op.a_name
469-
right[op.id_column] = op.b_name
470-
type_checks = data_algebra.util.check_columns_appear_compatible(left, right)
471-
if type_checks is not None:
472-
raise ValueError(f"concat: incompatible column types: {type_checks}")
469+
if left.shape[0] > 0:
470+
left[op.id_column] = op.a_name
471+
else:
472+
left[op.id_column] = []
473+
if right.shape[0] > 0:
474+
right[op.id_column] = op.b_name
475+
else:
476+
right[op.id_column] = []
473477
if left.shape[0] < 1:
474478
return right
475479
if right.shape[0] < 1:
476480
return left
481+
type_checks = data_algebra.util.check_columns_appear_compatible(left, right)
482+
if type_checks is not None:
483+
raise ValueError(f"concat: incompatible column types: {type_checks}")
477484
# noinspection PyUnresolvedReferences
478485
res = self.pd.concat([left, right], axis=0, ignore_index=True, sort=False)
479486
res = res.reset_index(drop=True)

build/lib/data_algebra/util.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
2+
import datetime
3+
14
import numpy
25

36
import data_algebra
@@ -73,7 +76,8 @@ def guess_carried_scalar_type(col):
7376
:return: type of first non-None entry, if any , else type(None)
7477
"""
7578
ct = map_type_to_canonical(type(col))
76-
if ct in {str, int, float, bool, type(None), numpy.int64, numpy.float64}:
79+
if ct in {str, int, float, bool, type(None), numpy.int64, numpy.float64,
80+
datetime.datetime, datetime.date, datetime.timedelta}:
7781
return ct
7882
if len(col) < 1:
7983
return type(None)
@@ -85,25 +89,27 @@ def guess_carried_scalar_type(col):
8589

8690
def guess_column_types(d, *, columns=None):
8791
"""
88-
Guess column types as type of first non-missing value
92+
Guess column types as type of first non-missing value.
93+
Will not return series types, as some pandas data frames with non-trivial indexing report this type.
8994
9095
:param d: pandas.DataFrame
9196
:param columns: list of columns to check, if None all columns are checked
92-
:return: map of column names to guessed types
97+
:return: map of column names to guessed types, empty dict if any column guess fails
9398
"""
94-
if d.shape[1] <= 0:
99+
if (d.shape[0] <= 0) or (d.shape[1] <= 0):
95100
return dict()
96101
if columns is None:
97-
columns = d.columns
102+
columns = d.columns.copy()
98103
assert len(set(columns) - set(d.columns)) == 0
99-
if d.shape[0] <= 0:
100-
return {c: type(None) for c in columns}
104+
if len(columns) <= 0:
105+
return dict()
101106
res = dict()
102107
for c in columns:
103-
res[c] = guess_carried_scalar_type(d[c])
104-
if any([str(v).endswith('.Series\'>') for v in res.values()]):
105-
# pandas.concat() poisons types with Series, don't allow that
106-
return dict()
108+
gt = guess_carried_scalar_type(d[c])
109+
if (gt is None) or (not isinstance(gt, type)) or str(gt).endswith('.Series\'>'):
110+
# pandas.concat() poisons types with Series, don't allow that
111+
return dict()
112+
res[c] = gt
107113
return res
108114

109115

@@ -129,7 +135,11 @@ def check_columns_appear_compatible(d_left, d_right, *, columns=None):
129135
assert len(set(columns) - set(d_left.columns)) == 0
130136
assert len(set(columns) - set(d_right.columns)) == 0
131137
left_types = data_algebra.util.guess_column_types(d_left, columns=columns)
138+
if (left_types is None) or (len(left_types) <= 0):
139+
return None
132140
right_types = data_algebra.util.guess_column_types(d_right, columns=columns)
141+
if (right_types is None) or (len(right_types) <= 0):
142+
return None
133143
mismatches = dict()
134144
for c in columns:
135145
if not compatible_types([left_types[c], right_types[c]]):

coverage.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ data_algebra/expr_rep.py 562 103 82%
111111
data_algebra/flow_text.py 17 0 100%
112112
data_algebra/near_sql.py 144 1 99%
113113
data_algebra/op_container.py 112 39 65%
114-
data_algebra/pandas_base.py 305 32 90%
114+
data_algebra/pandas_base.py 309 32 90%
115115
data_algebra/pandas_model.py 18 3 83%
116116
data_algebra/parse_by_lark.py 143 26 82%
117117
data_algebra/python3_lark.py 1 0 100%
118118
data_algebra/test_util.py 195 37 81%
119-
data_algebra/util.py 80 6 92%
119+
data_algebra/util.py 86 6 93%
120120
----------------------------------------------------------
121-
TOTAL 4794 853 82%
121+
TOTAL 4804 853 82%
122122

123123

124-
============================= 210 passed in 17.18s =============================
124+
============================= 210 passed in 16.93s =============================
173 Bytes
Binary file not shown.

dist/data_algebra-0.8.0.tar.gz

245 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)