33from Orange .classification import Learner , Model
44from Orange .data import Instance , Storage
55from Orange .statistics import contingency
6- from Orange .preprocess import Discretize
6+ from Orange .preprocess import Discretize , RemoveNaNColumns
77
88__all__ = ["NaiveBayesLearner" ]
99
@@ -19,7 +19,8 @@ class NaiveBayesLearner(Learner):
1919 An ordered list of preprocessors applied to data before training
2020 or testing.
2121 """
22- preprocessors = [Discretize ()]
22+ preprocessors = [RemoveNaNColumns (), Discretize ()]
23+ name = 'naive bayes'
2324
2425 def fit_storage (self , table ):
2526 if not isinstance (table , Storage ):
@@ -31,33 +32,35 @@ def fit_storage(self, table):
3132 cont = contingency .get_contingencies (table )
3233 class_freq = np .array (np .diag (
3334 contingency .get_contingency (table , table .domain .class_var )))
34- return NaiveBayesModel (cont , class_freq , table .domain )
35+ class_prob = (class_freq + 1 ) / (np .sum (class_freq ) + len (class_freq ))
36+ log_cont_prob = [np .log (
37+ (np .array (c ) + 1 ) / (np .sum (np .array (c ), axis = 0 )[None , :] +
38+ c .shape [0 ]) / class_prob [:, None ])
39+ for c in cont ]
40+ return NaiveBayesModel (log_cont_prob , class_prob , table .domain )
3541
3642
3743class NaiveBayesModel (Model ):
38- def __init__ (self , cont , class_freq , domain ):
44+ def __init__ (self , log_cont_prob , class_prob , domain ):
3945 super ().__init__ (domain )
40- self .cont = cont
41- self .class_freq = class_freq
46+ self .log_cont_prob = log_cont_prob
47+ self .class_prob = class_prob
4248
4349 def predict_storage (self , data ):
4450 if isinstance (data , Instance ):
4551 data = [data ]
46- n_cls = len (self .class_freq )
47- class_prob = (self .class_freq + 1 ) / (np .sum (self .class_freq ) + n_cls )
4852 if len (data .domain .attributes ) == 0 :
49- probs = np .tile (class_prob , (len (data ), 1 ))
53+ probs = np .tile (self . class_prob , (len (data ), 1 ))
5054 else :
51- log_cont_prob = [np .log (np .divide (np .array (c ) + 1 ,
52- self .class_freq .reshape ((n_cls , 1 )) +
53- c .shape [1 ])) for c in self .cont ]
5455 probs = np .exp (np .array ([np .sum (attr_prob [:, int (attr_val )]
5556 for attr_val , attr_prob
56- in zip (ins , log_cont_prob )
57+ in zip (ins , self . log_cont_prob )
5758 if not np .isnan (attr_val ))
58- for ins in data ]) + np .log (class_prob ))
59+ for ins in data ]) + np .log (
60+ self .class_prob ))
5961 probs /= probs .sum (axis = 1 )[:, None ]
6062 values = probs .argmax (axis = 1 )
6163 return values , probs
6264
65+
6366NaiveBayesLearner .__returns__ = NaiveBayesModel
0 commit comments