Skip to content

Commit 75de2c8

Browse files
authored
Merge pull request #1936 from VesnaT/nomogram
OWNomogram: Add a new widget
2 parents 633bfc4 + 5f781ce commit 75de2c8

File tree

8 files changed

+1391
-14
lines changed

8 files changed

+1391
-14
lines changed

Orange/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def __call__(self, data):
106106

107107
if isinstance(data, Instance):
108108
data = Table(data.domain, [data])
109+
origdata = data
109110
data = self.preprocess(data)
110111

111112
if len(data.domain.class_vars) > 1 and not self.supports_multiclass:
@@ -123,6 +124,7 @@ def __call__(self, data):
123124
model.supports_multiclass = self.supports_multiclass
124125
model.name = self.name
125126
model.original_domain = origdomain
127+
model.original_data = origdata
126128
return model
127129

128130
def preprocess(self, data):

Orange/classification/naive_bayes.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from Orange.classification import Learner, Model
44
from Orange.data import Instance, Storage
55
from Orange.statistics import contingency
6-
from Orange.preprocess import Discretize
6+
from Orange.preprocess import Discretize, RemoveNaNColumns
77

88
__all__ = ["NaiveBayesLearner"]
99

@@ -19,7 +19,8 @@ class NaiveBayesLearner(Learner):
1919
An ordered list of preprocessors applied to data before training
2020
or testing.
2121
"""
22-
preprocessors = [Discretize()]
22+
preprocessors = [RemoveNaNColumns(), Discretize()]
23+
name = 'naive bayes'
2324

2425
def fit_storage(self, table):
2526
if not isinstance(table, Storage):
@@ -31,33 +32,35 @@ def fit_storage(self, table):
3132
cont = contingency.get_contingencies(table)
3233
class_freq = np.array(np.diag(
3334
contingency.get_contingency(table, table.domain.class_var)))
34-
return NaiveBayesModel(cont, class_freq, table.domain)
35+
class_prob = (class_freq + 1) / (np.sum(class_freq) + len(class_freq))
36+
log_cont_prob = [np.log(
37+
(np.array(c) + 1) / (np.sum(np.array(c), axis=0)[None, :] +
38+
c.shape[0]) / class_prob[:, None])
39+
for c in cont]
40+
return NaiveBayesModel(log_cont_prob, class_prob, table.domain)
3541

3642

3743
class NaiveBayesModel(Model):
38-
def __init__(self, cont, class_freq, domain):
44+
def __init__(self, log_cont_prob, class_prob, domain):
3945
super().__init__(domain)
40-
self.cont = cont
41-
self.class_freq = class_freq
46+
self.log_cont_prob = log_cont_prob
47+
self.class_prob = class_prob
4248

4349
def predict_storage(self, data):
4450
if isinstance(data, Instance):
4551
data = [data]
46-
n_cls = len(self.class_freq)
47-
class_prob = (self.class_freq + 1) / (np.sum(self.class_freq) + n_cls)
4852
if len(data.domain.attributes) == 0:
49-
probs = np.tile(class_prob, (len(data), 1))
53+
probs = np.tile(self.class_prob, (len(data), 1))
5054
else:
51-
log_cont_prob = [np.log(np.divide(np.array(c) + 1,
52-
self.class_freq.reshape((n_cls, 1)) +
53-
c.shape[1])) for c in self.cont]
5455
probs = np.exp(np.array([np.sum(attr_prob[:, int(attr_val)]
5556
for attr_val, attr_prob
56-
in zip(ins, log_cont_prob)
57+
in zip(ins, self.log_cont_prob)
5758
if not np.isnan(attr_val))
58-
for ins in data]) + np.log(class_prob))
59+
for ins in data]) + np.log(
60+
self.class_prob))
5961
probs /= probs.sum(axis=1)[:, None]
6062
values = probs.argmax(axis=1)
6163
return values, probs
6264

65+
6366
NaiveBayesLearner.__returns__ = NaiveBayesModel
Lines changed: 16 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)