File tree Expand file tree Collapse file tree 7 files changed +21
-13
lines changed
app/models/easy_ml/dataset/learner/lazy Expand file tree Collapse file tree 7 files changed +21
-13
lines changed Original file line number Diff line number Diff line change 11PATH
22 remote: .
33 specs:
4- easy_ml (0.2.0.pre.rc104 )
4+ easy_ml (0.2.0.pre.rc105 )
55 activerecord
66 activerecord-import (~> 1.8.1 )
77 activesupport
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ def full_dataset_query
1111
1212 def unique_count
1313 Polars . col ( column . name )
14- . cast ( column . polars_datatype )
14+ . cast ( datatype )
1515 . n_unique . alias ( "#{ column . name } __unique_count" )
1616 end
1717 end
Original file line number Diff line number Diff line change @@ -6,27 +6,27 @@ class Numeric < Query
66 def train_query
77 super . concat ( [
88 Polars . col ( column . name )
9- . cast ( column . polars_datatype )
9+ . cast ( datatype )
1010 . mean
1111 . alias ( "#{ column . name } __mean" ) ,
1212
1313 Polars . col ( column . name )
14- . cast ( column . polars_datatype )
14+ . cast ( datatype )
1515 . median
1616 . alias ( "#{ column . name } __median" ) ,
1717
1818 Polars . col ( column . name )
19- . cast ( column . polars_datatype )
19+ . cast ( datatype )
2020 . min
2121 . alias ( "#{ column . name } __min" ) ,
2222
2323 Polars . col ( column . name )
24- . cast ( column . polars_datatype )
24+ . cast ( datatype )
2525 . max
2626 . alias ( "#{ column . name } __max" ) ,
2727
2828 Polars . col ( column . name )
29- . cast ( column . polars_datatype )
29+ . cast ( datatype )
3030 . std
3131 . alias ( "#{ column . name } __std" ) ,
3232 ] )
Original file line number Diff line number Diff line change @@ -33,6 +33,14 @@ def execute(split)
3333 end
3434 end
3535
36+ def datatype
37+ case column . polars_datatype . to_s
38+ when /Polars::Categorical/ then Polars ::String
39+ else
40+ column . polars_datatype
41+ end
42+ end
43+
3644 private
3745
3846 def full_dataset_query
@@ -45,21 +53,21 @@ def train_query
4553
4654 def null_count
4755 Polars . col ( column . name )
48- . cast ( column . polars_datatype )
56+ . cast ( datatype )
4957 . null_count
5058 . alias ( "#{ column . name } __null_count" )
5159 end
5260
5361 def num_rows
5462 Polars . col ( column . name )
55- . cast ( column . polars_datatype )
63+ . cast ( datatype )
5664 . len
5765 . alias ( "#{ column . name } __num_rows" )
5866 end
5967
6068 def most_frequent_value
6169 Polars . col ( column . name )
62- . cast ( column . polars_datatype )
70+ . cast ( datatype )
6371 . filter ( Polars . col ( column . name ) . is_not_null )
6472 . mode
6573 . first
@@ -70,7 +78,7 @@ def last_value
7078 return unless dataset . date_column . present?
7179
7280 Polars . col ( column . name )
73- . cast ( column . polars_datatype )
81+ . cast ( datatype )
7482 . sort_by ( dataset . date_column . name , reverse : true , nulls_last : true )
7583 . filter ( Polars . col ( column . name ) . is_not_null )
7684 . first
Original file line number Diff line number Diff line change 11# frozen_string_literal: true
22
33module EasyML
4- VERSION = "0.2.0-rc104 "
4+ VERSION = "0.2.0-rc105 "
55
66 module Version
77 end
Original file line number Diff line number Diff line change 400400 ] )
401401
402402 column . update ( datatype : "null" )
403- expect ( column . errors . map ( & :message ) ) . to include ( "Can't cast from float to null" )
403+ expect ( column . raw . data [ "Age" ] . dtype ) . to eq ( Polars :: Null )
404404 end
405405 end
406406 end
You can’t perform that action at this time.
0 commit comments