@@ -731,3 +731,120 @@ def _reverse_transform(self, data):
731731
732732 data [:, 0 ] = reversed_values
733733 return super ()._reverse_transform (data )
734+
735+
736+ class LogScaler (FloatFormatter ):
737+ """Transformer for numerical data using log.
738+
739+ This transformer scales numerical values using log and an optional constant.
740+ Null values are replaced using a ``NullTransformer``.
741+
742+ Args:
743+ missing_value_replacement (object):
744+ Indicate what to replace the null values with. If an integer or float is given,
745+ replace them with the given value. If the strings ``'mean'`` or ``'mode'``
746+ are given, replace them with the corresponding aggregation and if ``'random'``
747+ replace each null value with a random value in the data range. Defaults to ``mean``.
748+ missing_value_generation (str or None):
749+ The way missing values are being handled. There are three strategies:
750+ * ``random``: Randomly generates missing values based on the percentage of
751+ missing values.
752+ * ``from_column``: Creates a binary column that describes whether the original
753+ value was missing. Then use it to recreate missing values.
754+ * ``None``: Do nothing with the missing values on the reverse transform. Simply
755+ pass whatever data we get through.
756+ constant (float):
757+ The constant to set as the 0-value for the log-based transform. Defaults to 0
758+ (do not modify the 0-value of the data).
759+ invert (bool):
760+ Whether to invert the data with respect to the constant value. If False, do not
761+ invert the data (all values will be greater than the constant value). If True,
762+ invert the data (all the values will be less than the constant value).
763+ Defaults to False.
764+ learn_rounding_scheme (bool):
765+ Whether or not to learn what place to round to based on the data seen during ``fit``.
766+ If ``True``, the data returned by ``reverse_transform`` will be rounded to that place.
767+ Defaults to ``False``.
768+ """
769+
770+ def __init__ (
771+ self ,
772+ missing_value_replacement = 'mean' ,
773+ missing_value_generation = 'random' ,
774+ constant : float = 0.0 ,
775+ invert : bool = False ,
776+ learn_rounding_scheme : bool = False ,
777+ ):
778+ if isinstance (constant , (int , float )):
779+ self .constant = constant
780+ else :
781+ raise ValueError ('The constant parameter must be a float or int.' )
782+ if isinstance (invert , bool ):
783+ self .invert = invert
784+ else :
785+ raise ValueError ('The invert parameter must be a bool.' )
786+
787+ super ().__init__ (
788+ missing_value_replacement = missing_value_replacement ,
789+ missing_value_generation = missing_value_generation ,
790+ learn_rounding_scheme = learn_rounding_scheme ,
791+ )
792+
793+ def _validate_data (self , data : pd .Series ):
794+ column_name = self .get_input_column ()
795+ if self .invert :
796+ if not all (data < self .constant ):
797+ raise InvalidDataError (
798+ f"Unable to apply a log transform to column '{ column_name } ' due to constant"
799+ ' being too small.'
800+ )
801+ else :
802+ if not all (data > self .constant ):
803+ raise InvalidDataError (
804+ f"Unable to apply a log transform to column '{ column_name } ' due to constant"
805+ ' being too large.'
806+ )
807+
808+ def _fit (self , data ):
809+ super ()._fit (data )
810+ data = super ()._transform (data )
811+
812+ if data .ndim > 1 :
813+ self ._validate_data (data [:, 0 ])
814+ else :
815+ self ._validate_data (data )
816+
817+ def _log_transform (self , data ):
818+ self ._validate_data (data )
819+
820+ if self .invert :
821+ return np .log (self .constant - data )
822+ else :
823+ return np .log (data - self .constant )
824+
825+ def _transform (self , data ):
826+ data = super ()._transform (data )
827+
828+ if data .ndim > 1 :
829+ data [:, 0 ] = self ._log_transform (data [:, 0 ])
830+ else :
831+ data = self ._log_transform (data )
832+
833+ return data
834+
835+ def _reverse_log (self , data ):
836+ if self .invert :
837+ return self .constant - np .exp (data )
838+ else :
839+ return np .exp (data ) + self .constant
840+
841+ def _reverse_transform (self , data ):
842+ if not isinstance (data , np .ndarray ):
843+ data = data .to_numpy ()
844+
845+ if data .ndim > 1 :
846+ data [:, 0 ] = self ._reverse_log (data [:, 0 ])
847+ else :
848+ data = self ._reverse_log (data )
849+
850+ return super ()._reverse_transform (data )
0 commit comments