From 99afe818e6e5c14796e2f186982d17b542249cef Mon Sep 17 00:00:00 2001 From: Sujan Govindaraju Date: Tue, 29 Apr 2025 04:12:10 -0700 Subject: [PATCH] Add percentile scaling transformation and unit tests --- pandas/io/percentile_scaling.py | 11 +++++++++++ pandas/tests/io/test_percentile_scaling.py | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 pandas/io/percentile_scaling.py create mode 100644 pandas/tests/io/test_percentile_scaling.py diff --git a/pandas/io/percentile_scaling.py b/pandas/io/percentile_scaling.py new file mode 100644 index 0000000000000..25ca1a748cfa6 --- /dev/null +++ b/pandas/io/percentile_scaling.py @@ -0,0 +1,11 @@ +import numpy as np + +def percentile_scaling(data): + data = np.array(data) + min_val = np.min(data) + max_val = np.max(data) + if max_val == min_val: + raise ValueError("Cannot scale data with identical values.") + + scaled = 100 * (data - min_val) / (max_val - min_val) + return scaled.tolist() diff --git a/pandas/tests/io/test_percentile_scaling.py b/pandas/tests/io/test_percentile_scaling.py new file mode 100644 index 0000000000000..131868fe3d481 --- /dev/null +++ b/pandas/tests/io/test_percentile_scaling.py @@ -0,0 +1,21 @@ +import unittest +from pandas.io.percentile_scaling import percentile_scaling + +class TestPercentileScaling(unittest.TestCase): + def test_scaling(self): + data = [10, 20, 30, 40, 50] + expected = [0.0, 25.0, 50.0, 75.0, 100.0] + result = percentile_scaling(data) + for r, e in zip(result, expected): + self.assertAlmostEqual(r, e) + + def test_identical_values(self): + with self.assertRaises(ValueError): + percentile_scaling([5, 5, 5]) + + def test_empty(self): + with self.assertRaises(ValueError): + percentile_scaling([]) + +if __name__ == "__main__": + unittest.main()