Skip to content

Commit 17d2c5c

Browse files
committed
code clean up and documentation
rebuild and retest
1 parent a1fb430 commit 17d2c5c

File tree

15 files changed

+3517
-702
lines changed

15 files changed

+3517
-702
lines changed

build/lib/data_algebra/db_model.py

Lines changed: 126 additions & 1 deletion
Large diffs are not rendered by default.

build/lib/data_algebra/expr_rep.py

Lines changed: 408 additions & 38 deletions
Large diffs are not rendered by default.

build/lib/data_algebra/pandas_base.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,30 +275,48 @@ def __init__(self, *, pd: types.ModuleType, presentation_model_name: str):
275275
# utils
276276

277277
def data_frame(self, arg=None):
278+
"""
279+
Build a new emtpy data frame.
280+
"""
278281
if arg is None:
279282
# noinspection PyUnresolvedReferences
280283
return self.pd.DataFrame()
281284
# noinspection PyUnresolvedReferences
282285
return self.pd.DataFrame(arg)
283286

284287
def is_appropriate_data_instance(self, df):
288+
"""
289+
Check if df is our type of data frame.
290+
"""
285291
# noinspection PyUnresolvedReferences
286292
return isinstance(df, self.pd.DataFrame)
287293

288294
def can_convert_col_to_numeric(self, x):
295+
"""
296+
Return True if column or value can be converted to numeric type.
297+
"""
289298
if isinstance(x, numbers.Number):
290299
return True
291300
# noinspection PyUnresolvedReferences
292301
return self.pd.api.types.is_numeric_dtype(x)
293302

294303
def to_numeric(self, x, *, errors="coerce"):
304+
"""
305+
Convert column to numeric.
306+
"""
295307
# noinspection PyUnresolvedReferences
296308
return self.pd.to_numeric(x, errors="coerce")
297309

298310
def isnull(self, x):
311+
"""
312+
Return vector indicating which entries are null (vectorized).
313+
"""
299314
return self.pd.isnull(x)
300315

301316
def bad_column_positions(self, x):
317+
"""
318+
Return vector indicating which entries are bad (null or nan) (vectorized).
319+
"""
302320
if self.can_convert_col_to_numeric(x):
303321
x = numpy.asarray(x + 0, dtype=float)
304322
return numpy.logical_or(
@@ -309,7 +327,10 @@ def bad_column_positions(self, x):
309327
# bigger stuff
310328

311329
# noinspection PyMethodMayBeStatic,PyUnusedLocal
312-
def table_step(self, op, *, data_map, narrow):
330+
def table_step(self, op, *, data_map: dict, narrow: bool):
331+
"""
332+
Return data frame from table description and data_map.
333+
"""
313334
if op.node_name != "TableDescription":
314335
raise TypeError(
315336
"op was supposed to be a data_algebra.data_ops.TableDescription"
@@ -361,6 +382,9 @@ def columns_to_frame_(self, cols, *, target_rows=0):
361382

362383
# agg can return scalars, which then can't be made into a self.pd.DataFrame
363384
def promote_scalar(vi, *, target_len):
385+
"""
386+
Convert a scalar into a vector.
387+
"""
364388
# noinspection PyBroadException
365389
try:
366390
len_v = len(vi)
@@ -379,6 +403,12 @@ def promote_scalar(vi, *, target_len):
379403
return self.pd.DataFrame(cols)
380404

381405
def add_data_frame_columns_to_data_frame_(self, res, transient_new_frame):
406+
"""
407+
Add columns from transient_new_frame to res. Res may be altered, and either of res or
408+
transient_new_frame may be returned.
409+
"""
410+
if transient_new_frame.shape[1] < 1:
411+
return res
382412
if (res.shape[0] == 0) and (transient_new_frame.shape[0] > 0):
383413
# scalars get interpreted as single row items, instead of zero row items
384414
# growing the extension frame
@@ -402,6 +432,9 @@ def add_data_frame_columns_to_data_frame_(self, res, transient_new_frame):
402432
return res
403433

404434
def extend_step(self, op, *, data_map, narrow):
435+
"""
436+
Execute an extend step, returning a data frame.
437+
"""
405438
if op.node_name != "ExtendNode":
406439
raise TypeError("op was supposed to be a data_algebra.data_ops.ExtendNode")
407440
window_situation = (
@@ -514,6 +547,9 @@ def extend_step(self, op, *, data_map, narrow):
514547
return res
515548

516549
def project_step(self, op, *, data_map, narrow):
550+
"""
551+
Execute a project step, returning a data frame.
552+
"""
517553
if op.node_name != "ProjectNode":
518554
raise TypeError("op was supposed to be a data_algebra.data_ops.ProjectNode")
519555
# check these are forms we are prepared to work with, and build an aggregation dictionary
@@ -581,6 +617,9 @@ def project_step(self, op, *, data_map, narrow):
581617
return res
582618

583619
def select_rows_step(self, op, *, data_map, narrow):
620+
"""
621+
Execute a select rows step, returning a data frame.
622+
"""
584623
if op.node_name != "SelectRowsNode":
585624
raise TypeError(
586625
"op was supposed to be a data_algebra.data_ops.SelectRowsNode"
@@ -595,6 +634,9 @@ def select_rows_step(self, op, *, data_map, narrow):
595634
return res
596635

597636
def select_columns_step(self, op, *, data_map, narrow):
637+
"""
638+
Execute a select columns step, returning a data frame.
639+
"""
598640
if op.node_name != "SelectColumnsNode":
599641
raise TypeError(
600642
"op was supposed to be a data_algebra.data_ops.SelectColumnsNode"
@@ -605,6 +647,9 @@ def select_columns_step(self, op, *, data_map, narrow):
605647
return res[op.column_selection]
606648

607649
def drop_columns_step(self, op, *, data_map, narrow):
650+
"""
651+
Execute a drop columns step, returning a data frame.
652+
"""
608653
if op.node_name != "DropColumnsNode":
609654
raise TypeError(
610655
"op was supposed to be a data_algebra.data_ops.DropColumnsNode"
@@ -616,6 +661,9 @@ def drop_columns_step(self, op, *, data_map, narrow):
616661
return res[column_selection]
617662

618663
def order_rows_step(self, op, *, data_map, narrow):
664+
"""
665+
Execute an order rows step, returning a data frame.
666+
"""
619667
if op.node_name != "OrderRowsNode":
620668
raise TypeError(
621669
"op was supposed to be a data_algebra.data_ops.OrderRowsNode"
@@ -635,6 +683,9 @@ def order_rows_step(self, op, *, data_map, narrow):
635683
return res
636684

637685
def rename_columns_step(self, op, *, data_map, narrow):
686+
"""
687+
Execute a rename columns step, returning a data frame.
688+
"""
638689
if op.node_name != "RenameColumnsNode":
639690
raise TypeError(
640691
"op was supposed to be a data_algebra.data_ops.RenameColumnsNode"
@@ -646,6 +697,9 @@ def rename_columns_step(self, op, *, data_map, narrow):
646697

647698
# noinspection PyMethodMayBeStatic
648699
def standardize_join_code(self, jointype):
700+
"""
701+
Map join names to Pandas names.
702+
"""
649703
assert isinstance(jointype, str)
650704
jointype = jointype.lower()
651705
mp = {
@@ -659,6 +713,9 @@ def standardize_join_code(self, jointype):
659713
return jointype
660714

661715
def natural_join_step(self, op, *, data_map, narrow):
716+
"""
717+
Execute a natural join step, returning a data frame.
718+
"""
662719
if op.node_name != "NaturalJoinNode":
663720
raise TypeError(
664721
"op was supposed to be a data_algebra.data_ops.NaturalJoinNode"
@@ -707,6 +764,9 @@ def natural_join_step(self, op, *, data_map, narrow):
707764
return res
708765

709766
def concat_rows_step(self, op, *, data_map, narrow):
767+
"""
768+
Execute a concat rows step, returning a data frame.
769+
"""
710770
if op.node_name != "ConcatRowsNode":
711771
raise TypeError(
712772
"op was supposed to be a data_algebra.data_ops.ConcatRowsNode"
@@ -739,6 +799,9 @@ def concat_rows_step(self, op, *, data_map, narrow):
739799
return res
740800

741801
def convert_records_step(self, op, *, data_map, narrow):
802+
"""
803+
Execute record conversion step, returning a data frame.
804+
"""
742805
if op.node_name != "ConvertRecordsNode":
743806
raise TypeError(
744807
"op was supposed to be a data_algebra.data_ops.ConvertRecordsNode"

coverage.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,18 @@ data_algebra/data_ops_types.py 82 34 59%
115115
data_algebra/data_ops_utils.py 49 6 88%
116116
data_algebra/db_model.py 855 91 89%
117117
data_algebra/expr_parse.py 35 0 100%
118-
data_algebra/expr_rep.py 642 113 82%
118+
data_algebra/expr_rep.py 620 102 84%
119119
data_algebra/flow_text.py 17 0 100%
120120
data_algebra/near_sql.py 179 4 98%
121121
data_algebra/op_container.py 118 45 62%
122-
data_algebra/pandas_base.py 416 40 90%
122+
data_algebra/pandas_base.py 418 41 90%
123123
data_algebra/pandas_model.py 5 0 100%
124124
data_algebra/parse_by_lark.py 165 27 84%
125125
data_algebra/python3_lark.py 1 0 100%
126126
data_algebra/test_util.py 247 44 82%
127127
data_algebra/util.py 139 29 79%
128128
----------------------------------------------------------
129-
TOTAL 5137 915 82%
129+
TOTAL 5117 905 82%
130130

131131

132-
============================= 229 passed in 21.08s =============================
132+
============================= 229 passed in 21.31s =============================

0 commit comments

Comments
 (0)