|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding:utf-8 -*- |
| 3 | +# @Filename: InstanceSelection.py |
| 4 | +# @Author: Daniel Puente Ramírez |
| 5 | +# @Time: 15/4/22 16:20 |
| 6 | + |
| 7 | +import random |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +import pandas as pd |
| 11 | +import pytest |
| 12 | +from sklearn.datasets import load_iris |
| 13 | + |
| 14 | +from instance_selection import ENN, CNN, RNN, ICF, MSS, DROP3, LSSm, LSBo |
| 15 | + |
| 16 | + |
| 17 | +def to_dataframe(y): |
| 18 | + if not isinstance(y, pd.DataFrame): |
| 19 | + return pd.DataFrame(y) |
| 20 | + return y |
| 21 | + |
| 22 | + |
| 23 | +@pytest.fixture |
| 24 | +def iris_dataset(): |
| 25 | + x, y = load_iris(return_X_y=True, as_frame=True) |
| 26 | + y = to_dataframe(y) |
| 27 | + return x, y |
| 28 | + |
| 29 | + |
| 30 | +@pytest.fixture |
| 31 | +def iris_dataset_ss(): |
| 32 | + x, y = load_iris(return_X_y=True, as_frame=True) |
| 33 | + y = to_dataframe(y) |
| 34 | + li = list(set(range(x.shape[0]))) |
| 35 | + |
| 36 | + unlabeled = random.sample(li, int(x.shape[0] * 0.3)) |
| 37 | + labeled = [x for x in range(x.shape[0]) if x not in unlabeled] |
| 38 | + |
| 39 | + complete = x |
| 40 | + complete_labels = y |
| 41 | + |
| 42 | + original = x.loc[labeled] |
| 43 | + original_labels = y.loc[labeled] |
| 44 | + |
| 45 | + return original, original_labels, complete, complete_labels |
| 46 | + |
| 47 | + |
| 48 | +def base(x, y, algorithm, params=None): |
| 49 | + assert isinstance(x, pd.DataFrame) and isinstance(y, pd.DataFrame) |
| 50 | + model = algorithm(**params) if params is not None else algorithm() |
| 51 | + x_filtered, y_filtered = model.filter(x, y) |
| 52 | + |
| 53 | + assert x_filtered.shape[1] == x.shape[1] and y_filtered.shape[1] == \ |
| 54 | + y.shape[1] |
| 55 | + |
| 56 | + assert x_filtered.shape[0] == y_filtered.shape[0] |
| 57 | + assert x_filtered.shape[0] < x.shape[0] |
| 58 | + |
| 59 | + |
| 60 | +def test_enn_original(iris_dataset): |
| 61 | + x, y = iris_dataset |
| 62 | + base(x, y, ENN, {'nearest_neighbors': 3, 'power_parameter': 2}) |
| 63 | + |
| 64 | + |
| 65 | +def test_cnn(iris_dataset): |
| 66 | + x, y = iris_dataset |
| 67 | + base(x, y, CNN) |
| 68 | + |
| 69 | + |
| 70 | +def test_rnn(iris_dataset): |
| 71 | + x, y = iris_dataset |
| 72 | + base(x, y, RNN) |
| 73 | + |
| 74 | + |
| 75 | +def test_icf(iris_dataset): |
| 76 | + x, y = iris_dataset |
| 77 | + base(x, y, ICF, {'nearest_neighbors': 3, 'power_parameter': 2}) |
| 78 | + |
| 79 | + |
| 80 | +def test_mss(iris_dataset): |
| 81 | + x, y = iris_dataset |
| 82 | + base(x, y, MSS) |
| 83 | + |
| 84 | + |
| 85 | +def test_drop3(iris_dataset): |
| 86 | + x, y = iris_dataset |
| 87 | + base(x, y, DROP3, {'nearest_neighbors': 3, 'power_parameter': 2}) |
| 88 | + |
| 89 | + |
| 90 | +def test_local_sets_lssm(iris_dataset): |
| 91 | + x, y = iris_dataset |
| 92 | + base(x, y, LSSm) |
| 93 | + |
| 94 | + |
| 95 | +def test_local_sets_lsbo(iris_dataset): |
| 96 | + x, y = iris_dataset |
| 97 | + base(x, y, LSBo) |
| 98 | + |
| 99 | + |
| 100 | +def test_enn_ss(iris_dataset_ss): |
| 101 | + original, original_labels, complete, complete_labels, = iris_dataset_ss |
| 102 | + |
| 103 | + model = ENN() |
| 104 | + x, y = model.filter_original_complete(original, original_labels, |
| 105 | + complete, complete_labels) |
| 106 | + |
| 107 | + new_orig = [] |
| 108 | + for ori in original.to_numpy(): |
| 109 | + for index, x_sample in enumerate(x.to_numpy()): |
| 110 | + if np.array_equal(ori, x_sample): |
| 111 | + new_orig.append(index) |
| 112 | + break |
| 113 | + |
| 114 | + a = np.ravel(y.loc[new_orig].to_numpy()) |
| 115 | + o = np.ravel(original_labels.to_numpy()) |
| 116 | + assert np.array_equal(o, a) |
| 117 | + assert complete.shape[1] == x.shape[1] |
| 118 | + assert complete.shape[0] >= x.shape[0] |
| 119 | + |
| 120 | + |
| 121 | +def test_different_len(iris_dataset): |
| 122 | + x, y = iris_dataset |
| 123 | + y = y.loc[:-1] |
| 124 | + model1 = LSSm() |
| 125 | + with pytest.raises(ValueError): |
| 126 | + model1.filter(x, y) |
| 127 | + model2 = LSBo() |
| 128 | + with pytest.raises(ValueError): |
| 129 | + model2.filter(x, y) |
0 commit comments