Skip to content

Commit 5c31db7

Browse files
Issue #222 - Provide inverse transform function for both MinMax and Std scalers
1 parent 838cb5a commit 5c31db7

File tree

6 files changed

+220
-5
lines changed

6 files changed

+220
-5
lines changed

danfojs-browser/src/preprocessing/scalers.js

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const utils = new Utils();
88
export class MinMaxScaler {
99
/**
1010
* Fit minmax scaler on data, to obtain their min and max value
11-
* @param {data} data [DataRame | Series | Array]
11+
* @param {data} data [DataFrame | Series | Array]
1212
* @returns Array
1313
*/
1414
fit(data) {
@@ -77,15 +77,58 @@ export class MinMaxScaler {
7777
.arraySync();
7878
return new DataFrame(output_data);
7979
} else {
80-
throw Error("Value Error: Data type not supoorted");
80+
throw Error("Value Error: Data type not supported");
81+
}
82+
}
83+
84+
/**
85+
* Restore a transformed array to their original values,
86+
* using the min and max generated from the fitting on data
87+
* @param {Series|Array|DataFrame} data
88+
* @returns Series|DataFrame
89+
*/
90+
inverse_transform(data) {
91+
if (data instanceof Series) {
92+
if (data.dtypes.includes("string")) {
93+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
94+
}
95+
let tensor_data = tensor(data.values);
96+
let output_data = tensor_data
97+
.mul(this.max.sub(this.min))
98+
.add(this.min)
99+
.arraySync();
100+
return new Series(output_data);
101+
} else if (Array.isArray(data)) {
102+
let tensor_data = tensor(data);
103+
let output_data = tensor_data
104+
.mul(this.max.sub(this.min))
105+
.add(this.min)
106+
.arraySync();
107+
if (utils.__is_1D_array(data)) {
108+
return new Series(output_data);
109+
} else {
110+
return new DataFrame(output_data);
111+
}
112+
} else if (data instanceof DataFrame) {
113+
if (data.dtypes.includes("string")) {
114+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
115+
}
116+
let tensor_data = tensor(data.values);
117+
let output_data = tensor_data
118+
.mul(this.max.sub(this.min))
119+
.add(this.min)
120+
.arraySync();
121+
return new DataFrame(output_data);
122+
} else {
123+
throw Error("Value Error: Data type not supported");
81124
}
82125
}
83126
}
84127

85128
export class StandardScaler {
86129
/**
87130
*
88-
* @param {data} data [DataRame | Series | Array]
131+
* @param {data} data [DataFame | Series | Array]
89132
* @returns Array
90133
*/
91134
fit(data) {
@@ -140,7 +183,41 @@ export class StandardScaler {
140183
let output_data = tensor_data.sub(this.mean).div(this.std).arraySync();
141184
return new DataFrame(output_data);
142185
} else {
143-
throw Error("Value Error: Data type not supoorted");
186+
throw Error("Value Error: Data type not supported");
187+
}
188+
}
189+
190+
/**
191+
* Restore a transformed array to their original values,
192+
* using the mean and std generated from the fitting on data
193+
* @param {Series|Array|DataFrame} data
194+
* @returns Series|DataFrame
195+
*/
196+
inverse_transform(data) {
197+
if (data instanceof Series) {
198+
if (data.dtypes.includes("string")) {
199+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
200+
}
201+
let tensor_data = tensor(data.values);
202+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
203+
return new Series(output_data);
204+
} else if (Array.isArray(data)) {
205+
let tensor_data = tensor(data);
206+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
207+
if (utils.__is_1D_array(data)) {
208+
return new Series(output_data);
209+
} else {
210+
return new DataFrame(output_data);
211+
}
212+
} else if (data instanceof DataFrame) {
213+
if (data.dtypes.includes("string")) {
214+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
215+
}
216+
let tensor_data = tensor(data.values);
217+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
218+
return new DataFrame(output_data);
219+
} else {
220+
throw Error("Value Error: Data type not supported");
144221
}
145222
}
146223
}
@@ -237,7 +314,7 @@ export class StandardScaler {
237314

238315
// /**
239316
// * Fit robust scalar on data to obtain the first quantile and third quantile
240-
// * @param {data} data [DataRame | Series | Array]
317+
// * @param {data} data [DataFrame | Series | Array]
241318
// * @returns Array
242319
// */
243320
// fit(data){

danfojs-browser/tests/preprocessing/scaler.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,20 @@ describe("Preprocessing", function(){
2020
assert.deepEqual(scaler.transform(new dfd.Series(data)).values, result);
2121
assert.deepEqual(scaler.transform([ 2, 2 ]).values, transform_data);
2222
});
23+
it("should be able to inverse the normalization of a Series", function() {
24+
let data = [ -1, 2, -0.5, 60, 101, 18 ];
25+
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
26+
let scaler = new dfd.MinMaxScaler();
27+
scaler.fit(new dfd.Series(data));
28+
assert.deepEqual(scaler.inverse_transform(new dfd.Series(result)).values, data);
29+
});
30+
it("should be able to inverse the normalization of a DataFrame", function(){
31+
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
32+
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
33+
let scaler = new dfd.MinMaxScaler();
34+
scaler.fit(new dfd.DataFrame(data));
35+
assert.deepEqual(scaler.inverse_transform(new dfd.DataFrame(result)).values, data);
36+
});
2337
});
2438

2539
describe("StandardScaler", function(){
@@ -34,6 +48,20 @@ describe("Preprocessing", function(){
3448
assert.deepEqual(scaler.fit(new dfd.DataFrame(data)).round().values, fit_data);
3549
assert.deepEqual(scaler.transform([ [ 2, 2 ] ]).round().values, transform_data);
3650
});
51+
it("should be able to inverse the normalization of a Series", function() {
52+
let data = [ -1, 2, -0.5, 60, 101, 18 ];
53+
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
54+
let scaler = new dfd.MinMaxScaler();
55+
scaler.fit(new dfd.Series(data));
56+
assert.deepEqual(scaler.inverse_transform(new dfd.Series(result)).values, data);
57+
});
58+
it("should be able to inverse the normalization of a DataFrame", function(){
59+
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
60+
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
61+
let scaler = new dfd.MinMaxScaler();
62+
scaler.fit(new dfd.DataFrame(data));
63+
assert.deepEqual(scaler.inverse_transform(new dfd.DataFrame(result)).values, data);
64+
});
3765
});
3866

3967

danfojs-browser/types/preprocessing/scalers.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export class MinMaxScaler {
1313
* @returns array
1414
*/
1515
transform(data?: any): Series | DataFrame;
16+
inverse_transform(data?: any): Series | DataFrame;
1617
}
1718
export class StandardScaler {
1819
/**
@@ -24,6 +25,7 @@ export class StandardScaler {
2425
std?: any;
2526
mean?: any;
2627
transform(data?: any): Series | DataFrame;
28+
inverse_transform(data?: any): Series | DataFrame;
2729
}
2830
import { Series } from "../core/series";
2931
import { DataFrame } from "../core/frame";

danfojs-node/src/preprocessing/scalers.js

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,50 @@ export class MinMaxScaler {
8080
throw Error("Value Error: Data type not supoorted");
8181
}
8282
}
83+
84+
85+
/**
86+
* Restore a transformed array to their original values,
87+
* using the min and max generated from the fitting on data
88+
* @param {Series|Array|DataFrame} data
89+
* @returns Series|DataFrame
90+
*/
91+
inverse_transform(data) {
92+
if (data instanceof Series) {
93+
if (data.dtypes.includes("string")) {
94+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
95+
}
96+
let tensor_data = tf.tensor(data.values);
97+
let output_data = tensor_data
98+
.mul(this.max.sub(this.min))
99+
.add(this.min)
100+
.arraySync();
101+
return new Series(output_data);
102+
} else if (Array.isArray(data)) {
103+
let tensor_data = tf.tensor(data);
104+
let output_data = tensor_data
105+
.mul(this.max.sub(this.min))
106+
.add(this.min)
107+
.arraySync();
108+
if (utils.__is_1D_array(data)) {
109+
return new Series(output_data);
110+
} else {
111+
return new DataFrame(output_data);
112+
}
113+
} else if (data instanceof DataFrame) {
114+
if (data.dtypes.includes("string")) {
115+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
116+
}
117+
let tensor_data = tf.tensor(data.values);
118+
let output_data = tensor_data
119+
.mul(this.max.sub(this.min))
120+
.add(this.min)
121+
.arraySync();
122+
return new DataFrame(output_data);
123+
} else {
124+
throw Error("Value Error: Data type not supoorted");
125+
}
126+
}
83127
}
84128

85129
export class StandardScaler {
@@ -143,6 +187,40 @@ export class StandardScaler {
143187
throw Error("Value Error: Data type not supoorted");
144188
}
145189
}
190+
191+
/**
192+
* Restore a transformed array to their original values,
193+
* using the mean and std generated from the fitting on data
194+
* @param {Series|Array|DataFrame} data
195+
* @returns Series|DataFrame
196+
*/
197+
inverse_transform(data) {
198+
if (data instanceof Series) {
199+
if (data.dtypes.includes("string")) {
200+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
201+
}
202+
let tensor_data = tf.tensor(data.values);
203+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
204+
return new Series(output_data);
205+
} else if (Array.isArray(data)) {
206+
let tensor_data = tf.tensor(data);
207+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
208+
if (utils.__is_1D_array(data)) {
209+
return new Series(output_data);
210+
} else {
211+
return new DataFrame(output_data);
212+
}
213+
} else if (data instanceof DataFrame) {
214+
if (data.dtypes.includes("string")) {
215+
throw Error("Dtype Error: Cannot perform operation on string dtypes");
216+
}
217+
let tensor_data = tf.tensor(data.values);
218+
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
219+
return new DataFrame(output_data);
220+
} else {
221+
throw Error("Value Error: Data type not supoorted");
222+
}
223+
}
146224
}
147225

148226
// export class RobustScaler{

danfojs-node/tests/preprocessing/scaler.js

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,20 @@ describe("Preprocessing", function(){
2626
assert.deepEqual(scaler.transform(new Series(data)).values, result);
2727
assert.deepEqual(scaler.transform([ 2, 2 ]).values, transform_data);
2828
});
29+
it("should be able to inverse the normalization of a Series", function() {
30+
let data = [ -1, 2, -0.5, 60, 101, 18 ];
31+
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
32+
let scaler = new MinMaxScaler();
33+
scaler.fit(new Series(data));
34+
assert.deepEqual(scaler.inverse_transform(new Series(result)).values, data);
35+
});
36+
it("should be able to inverse the normalization of a DataFrame", function(){
37+
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
38+
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
39+
let scaler = new MinMaxScaler();
40+
scaler.fit(new DataFrame(data));
41+
assert.deepEqual(scaler.inverse_transform(new DataFrame(result)).values, data);
42+
});
2943
});
3044

3145
describe("StandardScaler", function(){
@@ -40,6 +54,20 @@ describe("Preprocessing", function(){
4054
assert.deepEqual(scaler.fit(new DataFrame(data)).round().values, fit_data);
4155
assert.deepEqual(scaler.transform([ [ 2, 2 ] ]).round().values, transform_data);
4256
});
57+
it("should be able to inverse the normalization of a Series", function() {
58+
let data = [ -1, 2, -0.5, 60, 101, 18 ];
59+
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
60+
let scaler = new MinMaxScaler();
61+
scaler.fit(new Series(data));
62+
assert.deepEqual(scaler.inverse_transform(new Series(result)).values, data);
63+
});
64+
it("should be able to inverse the normalization of a DataFrame", function(){
65+
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
66+
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
67+
let scaler = new MinMaxScaler();
68+
scaler.fit(new DataFrame(data));
69+
assert.deepEqual(scaler.inverse_transform(new DataFrame(result)).values, data);
70+
});
4371
});
4472

4573

danfojs-node/types/preprocessing/scalers.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export class MinMaxScaler {
1313
* @returns array
1414
*/
1515
transform(data?: any): Series | DataFrame;
16+
inverse_transform(data?: any): Series | DataFrame;
1617
}
1718
export class StandardScaler {
1819
/**
@@ -24,6 +25,7 @@ export class StandardScaler {
2425
std?: any;
2526
mean?: any;
2627
transform(data?: any): Series | DataFrame;
28+
inverse_transform(data?: any): Series | DataFrame;
2729
}
2830
import { Series } from "../core/series";
2931
import { DataFrame } from "../core/frame";

0 commit comments

Comments
 (0)