Skip to content

Commit c70aa15

Browse files
rc105
1 parent c1f92fb commit c70aa15

File tree

7 files changed

+21
-13
lines changed

7 files changed

+21
-13
lines changed

Gemfile.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PATH
22
remote: .
33
specs:
4-
easy_ml (0.2.0.pre.rc104)
4+
easy_ml (0.2.0.pre.rc105)
55
activerecord
66
activerecord-import (~> 1.8.1)
77
activesupport

app/models/easy_ml/dataset/learner/lazy/datetime.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def full_dataset_query
1111

1212
def unique_count
1313
Polars.col(column.name)
14-
.cast(column.polars_datatype)
14+
.cast(datatype)
1515
.n_unique.alias("#{column.name}__unique_count")
1616
end
1717
end

app/models/easy_ml/dataset/learner/lazy/numeric.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,27 @@ class Numeric < Query
66
def train_query
77
super.concat([
88
Polars.col(column.name)
9-
.cast(column.polars_datatype)
9+
.cast(datatype)
1010
.mean
1111
.alias("#{column.name}__mean"),
1212

1313
Polars.col(column.name)
14-
.cast(column.polars_datatype)
14+
.cast(datatype)
1515
.median
1616
.alias("#{column.name}__median"),
1717

1818
Polars.col(column.name)
19-
.cast(column.polars_datatype)
19+
.cast(datatype)
2020
.min
2121
.alias("#{column.name}__min"),
2222

2323
Polars.col(column.name)
24-
.cast(column.polars_datatype)
24+
.cast(datatype)
2525
.max
2626
.alias("#{column.name}__max"),
2727

2828
Polars.col(column.name)
29-
.cast(column.polars_datatype)
29+
.cast(datatype)
3030
.std
3131
.alias("#{column.name}__std"),
3232
])

app/models/easy_ml/dataset/learner/lazy/query.rb

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ def execute(split)
3333
end
3434
end
3535

36+
def datatype
37+
case column.polars_datatype.to_s
38+
when /Polars::Categorical/ then Polars::String
39+
else
40+
column.polars_datatype
41+
end
42+
end
43+
3644
private
3745

3846
def full_dataset_query
@@ -45,21 +53,21 @@ def train_query
4553

4654
def null_count
4755
Polars.col(column.name)
48-
.cast(column.polars_datatype)
56+
.cast(datatype)
4957
.null_count
5058
.alias("#{column.name}__null_count")
5159
end
5260

5361
def num_rows
5462
Polars.col(column.name)
55-
.cast(column.polars_datatype)
63+
.cast(datatype)
5664
.len
5765
.alias("#{column.name}__num_rows")
5866
end
5967

6068
def most_frequent_value
6169
Polars.col(column.name)
62-
.cast(column.polars_datatype)
70+
.cast(datatype)
6371
.filter(Polars.col(column.name).is_not_null)
6472
.mode
6573
.first
@@ -70,7 +78,7 @@ def last_value
7078
return unless dataset.date_column.present?
7179

7280
Polars.col(column.name)
73-
.cast(column.polars_datatype)
81+
.cast(datatype)
7482
.sort_by(dataset.date_column.name, reverse: true, nulls_last: true)
7583
.filter(Polars.col(column.name).is_not_null)
7684
.first

easy_ml-0.2.0.pre.rc105.gem

934 KB
Binary file not shown.

lib/easy_ml/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# frozen_string_literal: true
22

33
module EasyML
4-
VERSION = "0.2.0-rc104"
4+
VERSION = "0.2.0-rc105"
55

66
module Version
77
end

spec/app/models/easy_ml/column_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@
400400
])
401401

402402
column.update(datatype: "null")
403-
expect(column.errors.map(&:message)).to include("Can't cast from float to null")
403+
expect(column.raw.data["Age"].dtype).to eq(Polars::Null)
404404
end
405405
end
406406
end

0 commit comments

Comments
 (0)