|
5 | 5 |
|
6 | 6 | import numpy as np |
7 | 7 | from scipy.stats import chisqprob |
| 8 | +import bottlechest as bn |
8 | 9 | import Orange |
9 | 10 | from Orange.classification import Learner, Model |
10 | 11 |
|
@@ -48,7 +49,8 @@ def argmaxrnd(a, random_seed=None): |
48 | 49 | random = (np.random if random_seed is None |
49 | 50 | else np.random.RandomState(random_seed)) |
50 | 51 |
|
51 | | - def f(x): return random.choice((x == np.nanmax(x)).nonzero()[0]) |
| 52 | + def f(x): return random.choice((x == bn.nanmax(x)).nonzero()[0]) |
| 53 | + |
52 | 54 | return f(a) if a.ndim == 1 else np.apply_along_axis(f, axis=1, arr=a) |
53 | 55 |
|
54 | 56 |
|
@@ -727,21 +729,24 @@ def __init__(self, preprocessors=None, base_rules=None): |
727 | 729 |
|
728 | 730 | @abstractmethod |
729 | 731 | def fit(self, X, Y, W=None): |
| 732 | + row_defined = ~np.isnan(X).any(axis=1) |
| 733 | + X = X[row_defined] |
| 734 | + Y = Y[row_defined] |
730 | 735 | rule_list = self.find_rules(X, Y, None, self.base_rules, self.domain) |
731 | 736 | return RuleClassifier(domain=self.domain, rule_list=rule_list) |
732 | 737 |
|
733 | 738 | def find_rules(self, X, Y, target_class, base_rules, domain): |
734 | 739 | rule_list = [] |
735 | 740 | while not self.data_stopping(X, Y, target_class): |
736 | 741 | new_rule = self.rule_finder(X, Y, target_class, base_rules, domain) |
737 | | - if self.rule_stopping(X, Y, new_rule): |
| 742 | + if self.rule_stopping(new_rule): |
738 | 743 | break |
739 | 744 | X, Y = self.cover_and_remove(X, Y, new_rule) |
740 | 745 | rule_list.append(new_rule) |
741 | 746 | return rule_list |
742 | 747 |
|
743 | 748 | @staticmethod |
744 | | - def rule_stopping(X, Y, new_rule): |
| 749 | + def rule_stopping(new_rule, alpha=1.0): |
745 | 750 | return False |
746 | 751 |
|
747 | 752 | @staticmethod |
@@ -793,6 +798,7 @@ class CN2Learner(RuleLearner): |
793 | 798 | .. [1] "The CN2 Induction Algorithm", Peter Clark and Tim Niblett, |
794 | 799 | Machine Learning Journal, 3 (4), pp261-283, (1989) |
795 | 800 | """ |
| 801 | + name = 'CN2 inducer' |
796 | 802 |
|
797 | 803 | def __init__(self, preprocessors=None, base_rules=None): |
798 | 804 | super().__init__(preprocessors, base_rules) |
@@ -849,6 +855,8 @@ class CN2UnorderedLearner(RuleLearner): |
849 | 855 | Clark and Robin Boswell, Machine Learning - Proceedings of |
850 | 856 | the 5th European Conference (EWSL-91), pp151-163, 1991 |
851 | 857 | """ |
| 858 | + name = 'CN2 Unordered inducer' |
| 859 | + |
852 | 860 | def __init__(self, preprocessors=None, base_rules=None): |
853 | 861 | super().__init__(preprocessors, base_rules) |
854 | 862 | self.rule_finder.search_algorithm.beam_width = 10 |
@@ -924,13 +932,14 @@ def main(): |
924 | 932 | classifier = learner(data) |
925 | 933 | for rule in classifier.rule_list: |
926 | 934 | print(rule.curr_class_dist.tolist(), rule) |
| 935 | + print(len(classifier.rule_list)) |
927 | 936 |
|
928 | 937 | print() |
929 | 938 |
|
930 | | - learner = CN2UnorderedLearner() |
931 | | - classifier = learner(data) |
932 | | - for rule in classifier.rule_list: |
933 | | - print(rule, rule.curr_class_dist.tolist()) |
| 939 | + # learner = CN2UnorderedLearner() |
| 940 | + # classifier = learner(data) |
| 941 | + # for rule in classifier.rule_list: |
| 942 | + # print(rule, rule.curr_class_dist.tolist()) |
934 | 943 |
|
935 | 944 | if __name__ == "__main__": |
936 | 945 | main() |
|
0 commit comments