@@ -275,30 +275,48 @@ def __init__(self, *, pd: types.ModuleType, presentation_model_name: str):
275275 # utils
276276
277277 def data_frame (self , arg = None ):
278+ """
279+ Build a new emtpy data frame.
280+ """
278281 if arg is None :
279282 # noinspection PyUnresolvedReferences
280283 return self .pd .DataFrame ()
281284 # noinspection PyUnresolvedReferences
282285 return self .pd .DataFrame (arg )
283286
284287 def is_appropriate_data_instance (self , df ):
288+ """
289+ Check if df is our type of data frame.
290+ """
285291 # noinspection PyUnresolvedReferences
286292 return isinstance (df , self .pd .DataFrame )
287293
288294 def can_convert_col_to_numeric (self , x ):
295+ """
296+ Return True if column or value can be converted to numeric type.
297+ """
289298 if isinstance (x , numbers .Number ):
290299 return True
291300 # noinspection PyUnresolvedReferences
292301 return self .pd .api .types .is_numeric_dtype (x )
293302
294303 def to_numeric (self , x , * , errors = "coerce" ):
304+ """
305+ Convert column to numeric.
306+ """
295307 # noinspection PyUnresolvedReferences
296308 return self .pd .to_numeric (x , errors = "coerce" )
297309
298310 def isnull (self , x ):
311+ """
312+ Return vector indicating which entries are null (vectorized).
313+ """
299314 return self .pd .isnull (x )
300315
301316 def bad_column_positions (self , x ):
317+ """
318+ Return vector indicating which entries are bad (null or nan) (vectorized).
319+ """
302320 if self .can_convert_col_to_numeric (x ):
303321 x = numpy .asarray (x + 0 , dtype = float )
304322 return numpy .logical_or (
@@ -309,7 +327,10 @@ def bad_column_positions(self, x):
309327 # bigger stuff
310328
311329 # noinspection PyMethodMayBeStatic,PyUnusedLocal
312- def table_step (self , op , * , data_map , narrow ):
330+ def table_step (self , op , * , data_map : dict , narrow : bool ):
331+ """
332+ Return data frame from table description and data_map.
333+ """
313334 if op .node_name != "TableDescription" :
314335 raise TypeError (
315336 "op was supposed to be a data_algebra.data_ops.TableDescription"
@@ -361,6 +382,9 @@ def columns_to_frame_(self, cols, *, target_rows=0):
361382
362383 # agg can return scalars, which then can't be made into a self.pd.DataFrame
363384 def promote_scalar (vi , * , target_len ):
385+ """
386+ Convert a scalar into a vector.
387+ """
364388 # noinspection PyBroadException
365389 try :
366390 len_v = len (vi )
@@ -379,6 +403,12 @@ def promote_scalar(vi, *, target_len):
379403 return self .pd .DataFrame (cols )
380404
381405 def add_data_frame_columns_to_data_frame_ (self , res , transient_new_frame ):
406+ """
407+ Add columns from transient_new_frame to res. Res may be altered, and either of res or
408+ transient_new_frame may be returned.
409+ """
410+ if transient_new_frame .shape [1 ] < 1 :
411+ return res
382412 if (res .shape [0 ] == 0 ) and (transient_new_frame .shape [0 ] > 0 ):
383413 # scalars get interpreted as single row items, instead of zero row items
384414 # growing the extension frame
@@ -402,6 +432,9 @@ def add_data_frame_columns_to_data_frame_(self, res, transient_new_frame):
402432 return res
403433
404434 def extend_step (self , op , * , data_map , narrow ):
435+ """
436+ Execute an extend step, returning a data frame.
437+ """
405438 if op .node_name != "ExtendNode" :
406439 raise TypeError ("op was supposed to be a data_algebra.data_ops.ExtendNode" )
407440 window_situation = (
@@ -514,6 +547,9 @@ def extend_step(self, op, *, data_map, narrow):
514547 return res
515548
516549 def project_step (self , op , * , data_map , narrow ):
550+ """
551+ Execute a project step, returning a data frame.
552+ """
517553 if op .node_name != "ProjectNode" :
518554 raise TypeError ("op was supposed to be a data_algebra.data_ops.ProjectNode" )
519555 # check these are forms we are prepared to work with, and build an aggregation dictionary
@@ -581,6 +617,9 @@ def project_step(self, op, *, data_map, narrow):
581617 return res
582618
583619 def select_rows_step (self , op , * , data_map , narrow ):
620+ """
621+ Execute a select rows step, returning a data frame.
622+ """
584623 if op .node_name != "SelectRowsNode" :
585624 raise TypeError (
586625 "op was supposed to be a data_algebra.data_ops.SelectRowsNode"
@@ -595,6 +634,9 @@ def select_rows_step(self, op, *, data_map, narrow):
595634 return res
596635
597636 def select_columns_step (self , op , * , data_map , narrow ):
637+ """
638+ Execute a select columns step, returning a data frame.
639+ """
598640 if op .node_name != "SelectColumnsNode" :
599641 raise TypeError (
600642 "op was supposed to be a data_algebra.data_ops.SelectColumnsNode"
@@ -605,6 +647,9 @@ def select_columns_step(self, op, *, data_map, narrow):
605647 return res [op .column_selection ]
606648
607649 def drop_columns_step (self , op , * , data_map , narrow ):
650+ """
651+ Execute a drop columns step, returning a data frame.
652+ """
608653 if op .node_name != "DropColumnsNode" :
609654 raise TypeError (
610655 "op was supposed to be a data_algebra.data_ops.DropColumnsNode"
@@ -616,6 +661,9 @@ def drop_columns_step(self, op, *, data_map, narrow):
616661 return res [column_selection ]
617662
618663 def order_rows_step (self , op , * , data_map , narrow ):
664+ """
665+ Execute an order rows step, returning a data frame.
666+ """
619667 if op .node_name != "OrderRowsNode" :
620668 raise TypeError (
621669 "op was supposed to be a data_algebra.data_ops.OrderRowsNode"
@@ -635,6 +683,9 @@ def order_rows_step(self, op, *, data_map, narrow):
635683 return res
636684
637685 def rename_columns_step (self , op , * , data_map , narrow ):
686+ """
687+ Execute a rename columns step, returning a data frame.
688+ """
638689 if op .node_name != "RenameColumnsNode" :
639690 raise TypeError (
640691 "op was supposed to be a data_algebra.data_ops.RenameColumnsNode"
@@ -646,6 +697,9 @@ def rename_columns_step(self, op, *, data_map, narrow):
646697
647698 # noinspection PyMethodMayBeStatic
648699 def standardize_join_code (self , jointype ):
700+ """
701+ Map join names to Pandas names.
702+ """
649703 assert isinstance (jointype , str )
650704 jointype = jointype .lower ()
651705 mp = {
@@ -659,6 +713,9 @@ def standardize_join_code(self, jointype):
659713 return jointype
660714
661715 def natural_join_step (self , op , * , data_map , narrow ):
716+ """
717+ Execute a natural join step, returning a data frame.
718+ """
662719 if op .node_name != "NaturalJoinNode" :
663720 raise TypeError (
664721 "op was supposed to be a data_algebra.data_ops.NaturalJoinNode"
@@ -707,6 +764,9 @@ def natural_join_step(self, op, *, data_map, narrow):
707764 return res
708765
709766 def concat_rows_step (self , op , * , data_map , narrow ):
767+ """
768+ Execute a concat rows step, returning a data frame.
769+ """
710770 if op .node_name != "ConcatRowsNode" :
711771 raise TypeError (
712772 "op was supposed to be a data_algebra.data_ops.ConcatRowsNode"
@@ -739,6 +799,9 @@ def concat_rows_step(self, op, *, data_map, narrow):
739799 return res
740800
741801 def convert_records_step (self , op , * , data_map , narrow ):
802+ """
803+ Execute record conversion step, returning a data frame.
804+ """
742805 if op .node_name != "ConvertRecordsNode" :
743806 raise TypeError (
744807 "op was supposed to be a data_algebra.data_ops.ConvertRecordsNode"
0 commit comments