Skip to content

Commit fe61735

Browse files
authored
Merge pull request #3406 from irgolic/smart-suggestions-laplace-correction
Smart Suggestions: Default values using collected data
2 parents 429753f + 8219183 commit fe61735

File tree

3 files changed

+91
-33
lines changed

3 files changed

+91
-33
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Default suggestions generated from usage statistics.
2+
# See: https://gitlab.fri.uni-lj.si/irgolic/orange-usage-statistics-analysis
3+
# pylint: disable=line-too-long
4+
default_suggestions = [{'Source': 'Corpus Viewer', 'Sink': 'Word Cloud', 'Direction': 1, 'Value': 3.0394218134034166}, {'Source': 'Bag of Words', 'Sink': 'Word Cloud', 'Direction': 1, 'Value': 3.0657030223390276}, {'Source': 'Preprocess Text', 'Sink': 'Word Cloud', 'Direction': 1, 'Value': 3.354796320630749}, {'Source': 'File', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 5.83180026281209}, {'Source': 'File', 'Sink': 'Data Table', 'Direction': 1, 'Value': 6}, {'Source': 'File', 'Sink': 'Distributions', 'Direction': 1, 'Value': 3.683311432325887}, {'Source': 'Scatter Plot', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.8081471747700393}, {'Source': 'Bag of Words', 'Sink': 'Distances', 'Direction': 1, 'Value': 3.3350854139290407}, {'Source': 'Distances', 'Sink': 'Hierarchical Clustering', 'Direction': 1, 'Value': 4.977660972404731}, {'Source': 'Import Documents', 'Sink': 'Preprocess Text', 'Direction': 1, 'Value': 3.105124835742444}, {'Source': 'Preprocess Text', 'Sink': 'Bag of Words', 'Direction': 1, 'Value': 3.8869908015768724}, {'Source': 'Bag of Words', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.2825229960578186}, {'Source': 'Bag of Words', 'Sink': 'Logistic Regression', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'Logistic Regression', 'Sink': 'Nomogram', 'Direction': 1, 'Value': 3.052562417871222}, {'Source': 'Bag of Words', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.1839684625492772}, {'Source': 'Datasets', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.466491458607096}, {'Source': 'Datasets', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.059132720105125}, {'Source': 'Logistic Regression', 'Sink': 'Test & Score', 'Direction': 2, 'Value': 3.131406044678055}, {'Source': 'File', 'Sink': 'Box Plot', 'Direction': 1, 'Value': 3.2036793692509855}, {'Source': 'File', 'Sink': 'Tree', 'Direction': 1, 'Value': 4.3994743758212875}, {'Source': 'Tree', 'Sink': 'Tree Viewer', 'Direction': 1, 'Value': 4.793692509855454}, {'Source': 'Data Table', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.946123521681997}, {'Source': 'File', 'Sink': 'Rank', 'Direction': 1, 'Value': 3.5584756898817345}, {'Source': 'File', 'Sink': 'Select Columns', 'Direction': 1, 'Value': 4.064388961892247}, {'Source': 'Select Columns', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.407358738501971}, {'Source': 'Tree', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.0131406044678055}, {'Source': 'Predictions', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.131406044678055}, {'Source': 'Test & Score', 'Sink': 'Confusion Matrix', 'Direction': 1, 'Value': 4.4257555847568995}, {'Source': 'Select Columns', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.6373193166885676}, {'Source': 'File', 'Sink': 'Distances', 'Direction': 1, 'Value': 3.459921156373193}, {'Source': 'Data Table', 'Sink': 'Tree', 'Direction': 1, 'Value': 3.1182654402102497}, {'Source': 'File', 'Sink': 'Logistic Regression', 'Direction': 1, 'Value': 3.440210249671485}, {'Source': 'File', 'Sink': 'Predictions', 'Direction': 1, 'Value': 3.6438896189224703}, {'Source': 'Predictions', 'Sink': 'Confusion Matrix', 'Direction': 1, 'Value': 3.321944809461235}, {'Source': 'File', 'Sink': 'SVM', 'Direction': 1, 'Value': 3.0394218134034166}, {'Source': 'File', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 4.40604467805519}, {'Source': 'Test & Score', 'Sink': 'ROC Analysis', 'Direction': 1, 'Value': 3.2365308804204993}, {'Source': 'Rank', 'Sink': 'Box Plot', 'Direction': 1, 'Value': 3.0197109067017083}, {'Source': 'Rank', 'Sink': 'Distributions', 'Direction': 1, 'Value': 3.085413929040736}, {'Source': 'File', 'Sink': 'k-Means', 'Direction': 1, 'Value': 3.249671484888305}, {'Source': 'k-Means', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.8015768725361365}, {'Source': 'File', 'Sink': 'PCA', 'Direction': 1, 'Value': 3.131406044678055}, {'Source': 'PCA', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.9067017082785807}, {'Source': 'PCA', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.4467805519053876}, {'Source': 'Data Table', 'Sink': 'Distributions', 'Direction': 1, 'Value': 3.4139290407358738}, {'Source': 'Test & Score', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.7818659658344282}, {'Source': 'Import Images', 'Sink': 'Image Viewer', 'Direction': 1, 'Value': 3.0197109067017083}, {'Source': 'Import Images', 'Sink': 'Image Embedding', 'Direction': 1, 'Value': 3.164257555847569}, {'Source': 'Image Embedding', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.2102496714848883}, {'Source': 'Image Embedding', 'Sink': 'Distances', 'Direction': 1, 'Value': 3.052562417871222}, {'Source': 'Hierarchical Clustering', 'Sink': 'Image Viewer', 'Direction': 1, 'Value': 3.0657030223390276}, {'Source': 'Confusion Matrix', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.3088042049934296}, {'Source': 'Random Forest', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.052562417871222}, {'Source': 'Logistic Regression', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.1839684625492772}, {'Source': 'Confusion Matrix', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.269382391590013}, {'Source': 'Predictions', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.6044678055190538}, {'Source': 'Paint Data', 'Sink': 'k-Means', 'Direction': 1, 'Value': 3.111695137976347}, {'Source': 'Naive Bayes', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.0197109067017083}, {'Source': 'File', 'Sink': 'Corpus', 'Direction': 1, 'Value': 3.111695137976347}, {'Source': 'Corpus', 'Sink': 'Corpus Viewer', 'Direction': 1, 'Value': 3.630749014454665}, {'Source': 'Data Table', 'Sink': 'Save Data', 'Direction': 1, 'Value': 3.3153745072273324}, {'Source': 'File', 'Sink': 'Naive Bayes', 'Direction': 1, 'Value': 3.0197109067017083}, {'Source': 'Test & Score', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.1971090670170828}, {'Source': 'Python Script', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.0}, {'Source': 'Corpus Viewer', 'Sink': 'Preprocess Text', 'Direction': 1, 'Value': 3.0065703022339028}, {'Source': 'Concatenate', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'Neural Network', 'Sink': 'Predictions', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'Hierarchical Clustering', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.275952693823916}, {'Source': 'Corpus', 'Sink': 'Word Cloud', 'Direction': 1, 'Value': 3.157687253613666}, {'Source': 'Corpus', 'Sink': 'Preprocess Text', 'Direction': 1, 'Value': 3.486202365308804}, {'Source': 'Hierarchical Clustering', 'Sink': 'Box Plot', 'Direction': 1, 'Value': 3.223390275952694}, {'Source': 'Distances', 'Sink': 'Distance Map', 'Direction': 1, 'Value': 3.0459921156373193}, {'Source': 'Distances', 'Sink': 'Distance Matrix', 'Direction': 1, 'Value': 3.0459921156373193}, {'Source': 'Polynomial Regression', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.0065703022339028}, {'Source': 'Select Columns', 'Sink': 'k-Means', 'Direction': 1, 'Value': 3.059132720105125}, {'Source': 'Hierarchical Clustering', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.249671484888305}, {'Source': 'Outliers', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'File', 'Sink': 'Linear Regression', 'Direction': 1, 'Value': 3.6110381077529565}, {'Source': 'Predictions', 'Sink': 'Data Table', 'Direction': 1, 'Value': 4.202365308804205}, {'Source': 'Data Table', 'Sink': 'Select Columns', 'Direction': 1, 'Value': 3.2036793692509855}, {'Source': 'Linear Regression', 'Sink': 'Test & Score', 'Direction': 2, 'Value': 3.0131406044678055}, {'Source': 'Logistic Regression', 'Sink': 'Predictions', 'Direction': 1, 'Value': 3.0919842312746386}, {'Source': 'Linear Regression', 'Sink': 'Predictions', 'Direction': 1, 'Value': 3.354796320630749}, {'Source': 'Select Columns', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.0065703022339028}, {'Source': 'Test & Score', 'Sink': 'Predictions', 'Direction': 1, 'Value': 3.900131406044678}, {'Source': 'Feature Constructor', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.078843626806833}, {'Source': 'Data Table', 'Sink': 'Data Sampler', 'Direction': 1, 'Value': 3.0394218134034166}, {'Source': 'Import Documents', 'Sink': 'Corpus Viewer', 'Direction': 1, 'Value': 3.1182654402102497}, {'Source': 'Select Columns', 'Sink': 'Distances', 'Direction': 1, 'Value': 3.1445466491458607}, {'Source': 'File', 'Sink': 'FreeViz', 'Direction': 1, 'Value': 3.0065703022339028}, {'Source': 'Corpus', 'Sink': 'Select Columns', 'Direction': 1, 'Value': 3.111695137976347}, {'Source': 'File', 'Sink': 'Preprocess', 'Direction': 1, 'Value': 3.38107752956636}, {'Source': 'Data Table', 'Sink': 'Logistic Regression', 'Direction': 1, 'Value': 3.0394218134034166}, {'Source': 'Data Sampler', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.164257555847569}, {'Source': 'Select Columns', 'Sink': 'PCA', 'Direction': 1, 'Value': 3.059132720105125}, {'Source': 'k-Means', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.0657030223390276}, {'Source': 'Select Columns', 'Sink': 'Linear Regression', 'Direction': 1, 'Value': 3.078843626806833}, {'Source': 'Linear Regression', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.354796320630749}, {'Source': 'Python Script', 'Sink': 'Python Script', 'Direction': 1, 'Value': 3.1773981603153745}, {'Source': 'SVM', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.1511169513797634}, {'Source': 'Rank', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.2102496714848883}, {'Source': 'Linear Regression', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'File', 'Sink': 'As Timeseries', 'Direction': 1, 'Value': 3.157687253613666}, {'Source': 'Merge Data', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'File', 'Sink': 'Data Sampler', 'Direction': 1, 'Value': 3.032851511169514}, {'Source': 'Data Table', 'Sink': 'Feature Constructor', 'Direction': 1, 'Value': 3.0}, {'Source': 'CN2 Rule Induction', 'Sink': 'CN2 Rule Viewer', 'Direction': 1, 'Value': 3.026281208935611}, {'Source': 'kNN', 'Sink': 'Test & Score', 'Direction': 2, 'Value': 3.0197109067017083}, {'Source': 't-SNE', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.0131406044678055}, {'Source': 'File', 'Sink': 'Correlations', 'Direction': 1, 'Value': 3.0065703022339028}, {'Source': 'Preprocess', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.269382391590013}, {'Source': 'Select Columns', 'Sink': 'Select Rows', 'Direction': 1, 'Value': 3.032851511169514}, {'Source': 'Select Rows', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.3153745072273324}, {'Source': 'Preprocess Text', 'Sink': 'Louvain Clustering', 'Direction': 1, 'Value': 3.0919842312746386}, {'Source': 'Similarity Hashing', 'Sink': 'k-Means', 'Direction': 1, 'Value': 3.0919842312746386}, {'Source': 'Linear Regression', 'Sink': 'Data Table', 'Direction': 1, 'Value': 4.281208935611038}, {'Source': 'Select Columns', 'Sink': 'Python Script', 'Direction': 1, 'Value': 3.0}, {'Source': 'Process Profiles', 'Sink': 'Data Table', 'Direction': 1, 'Value': 3.0131406044678055}, {'Source': 'Preprocess', 'Sink': 'Test & Score', 'Direction': 1, 'Value': 3.1248357424441524}, {'Source': 'Feature Constructor', 'Sink': 'Scatter Plot', 'Direction': 1, 'Value': 3.026281208935611}, {'Source': 'File', 'Sink': 'Polynomial Regression', 'Direction': 1, 'Value': 3.0131406044678055}]

Orange/canvas/document/suggestions.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from Orange.canvas import config
77
from .interactions import NewLinkAction
88

9+
from .default_suggestion_weights import default_suggestions
10+
911
log = logging.getLogger(__name__)
1012

1113

@@ -20,40 +22,42 @@ def __init__(self):
2022

2123
self.__scheme = None
2224
self.__direction = None
23-
self.link_frequencies = defaultdict(int)
24-
self.source_probability = defaultdict(lambda: defaultdict(float))
25-
self.sink_probability = defaultdict(lambda: defaultdict(float))
25+
self.__link_frequencies = defaultdict(int)
26+
self.__source_probability = defaultdict(lambda: defaultdict(float))
27+
self.__sink_probability = defaultdict(lambda: defaultdict(float))
2628

27-
if not self.load_link_frequency():
28-
self.default_link_frequency()
29+
try:
30+
self.__load_default_suggestions()
31+
except OSError:
32+
log.warning("Failed to load default suggestions from file.")
33+
self.__load_link_frequencies()
2934

30-
def load_link_frequency(self):
35+
def __load_link_frequencies(self):
3136
if not os.path.isfile(self.__frequencies_path):
32-
return False
37+
return
3338

3439
try:
3540
with open(self.__frequencies_path, "rb") as f:
3641
imported_freq = pickle.load(f)
3742
except OSError:
3843
log.warning("Failed to open widget link frequencies.")
39-
return False
44+
return
4045

4146
for k, v in imported_freq.items():
4247
imported_freq[k] = self.__import_factor * v
4348

44-
self.link_frequencies = imported_freq
45-
self.overwrite_probabilities_with_frequencies()
46-
return True
47-
48-
def default_link_frequency(self):
49-
self.link_frequencies[("File", "Data Table", NewLinkAction.FROM_SOURCE)] = 3
50-
self.overwrite_probabilities_with_frequencies()
49+
self.__link_frequencies = imported_freq
50+
for link, count in self.__link_frequencies.items():
51+
self.__increment_probability(link[0], link[1], link[2], count)
5152

52-
def overwrite_probabilities_with_frequencies(self):
53-
for link, count in self.link_frequencies.items():
54-
self.increment_probability(link[0], link[1], link[2], count)
53+
def __load_default_suggestions(self):
54+
for link in default_suggestions:
55+
self.__increment_probability(link["Source"],
56+
link["Sink"],
57+
link["Direction"],
58+
link["Value"])
5559

56-
def new_link(self, link):
60+
def log_new_link(self, link):
5761
# direction is none when a widget was not added+linked via quick menu
5862
if self.__direction is None:
5963
return
@@ -62,25 +66,23 @@ def new_link(self, link):
6266
sink_id = link.sink_node.description.name
6367

6468
link_key = (source_id, sink_id, self.__direction)
65-
self.link_frequencies[link_key] += 1
69+
self.__link_frequencies[link_key] += 1
6670

67-
self.increment_probability(source_id, sink_id, self.__direction, 1)
68-
self.write_link_frequency()
71+
self.__increment_probability(source_id, sink_id, self.__direction, 1)
72+
self.__save_link_frequency()
6973

7074
self.__direction = None
7175

72-
def increment_probability(self, source_id, sink_id, direction, factor):
76+
def __increment_probability(self, source_id, sink_id, direction, factor):
7377
if direction == NewLinkAction.FROM_SOURCE:
74-
self.source_probability[source_id][sink_id] += factor
75-
self.sink_probability[sink_id][source_id] += factor * 0.5
78+
self.__source_probability[source_id][sink_id] += factor
7679
else: # FROM_SINK
77-
self.source_probability[source_id][sink_id] += factor * 0.5
78-
self.sink_probability[sink_id][source_id] += factor
80+
self.__sink_probability[sink_id][source_id] += factor
7981

80-
def write_link_frequency(self):
82+
def __save_link_frequency(self):
8183
try:
8284
with open(self.__frequencies_path, "wb") as f:
83-
pickle.dump(self.link_frequencies, f)
85+
pickle.dump(self.__link_frequencies, f)
8486
except OSError:
8587
log.warning("Failed to write widget link frequencies.")
8688
return
@@ -94,16 +96,16 @@ def set_direction(self, direction):
9496

9597
def set_scheme(self, scheme):
9698
self.__scheme = scheme
97-
scheme.onNewLink(self.new_link)
99+
scheme.onNewLink(self.log_new_link)
98100

99101
def get_sink_suggestions(self, source_id):
100-
return self.source_probability[source_id]
102+
return self.__source_probability[source_id]
101103

102104
def get_source_suggestions(self, sink_id):
103-
return self.sink_probability[sink_id]
105+
return self.__sink_probability[sink_id]
104106

105107
def get_default_suggestions(self):
106-
return self.source_probability
108+
return self.__source_probability
107109

108110
instance = None
109111

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# pylint: disable=protected-access
2+
from Orange.canvas.document import SchemeEditWidget
3+
from Orange.canvas.document.interactions import NewLinkAction
4+
from Orange.canvas.document.suggestions import Suggestions
5+
from Orange.canvas.gui.test import QAppTestCase
6+
from Orange.canvas.registry import global_registry
7+
from Orange.canvas.registry.qt import QtWidgetRegistry
8+
from Orange.canvas.scheme import SchemeLink, Scheme, SchemeNode
9+
10+
11+
class TestSuggestions(QAppTestCase):
12+
def test_load_default(self):
13+
suggestions = Suggestions()
14+
suggestions._Suggestions__load_default_suggestions()
15+
16+
def test_log_link(self):
17+
suggestions = Suggestions()
18+
19+
reg = QtWidgetRegistry(global_registry())
20+
21+
w = SchemeEditWidget()
22+
scheme = Scheme()
23+
w.setScheme(scheme)
24+
25+
base = "Orange.widgets."
26+
file_desc = reg.widget(base + "data.owfile.OWFile")
27+
disc_desc = reg.widget(base + "data.owdiscretize.OWDiscretize")
28+
29+
node1 = SchemeNode(file_desc, title="title1",
30+
position=(100, 100))
31+
w.addNode(node1)
32+
node2 = SchemeNode(disc_desc, title="title2",
33+
position=(300, 100))
34+
w.addNode(node2)
35+
36+
link = SchemeLink(node1, "Data", node2, "Data")
37+
src_name = link.source_node.description.name
38+
sink_name = link.sink_node.description.name
39+
link_key = (src_name, sink_name, NewLinkAction.FROM_SOURCE)
40+
41+
suggestions.set_direction(NewLinkAction.FROM_SOURCE)
42+
43+
freq = suggestions._Suggestions__link_frequencies[link_key]
44+
source_prob = suggestions._Suggestions__source_probability[src_name][sink_name]
45+
46+
w.addLink(link)
47+
48+
new_freq = suggestions._Suggestions__link_frequencies[link_key]
49+
new_source_prob = suggestions._Suggestions__source_probability[src_name][sink_name]
50+
51+
self.assertEqual(freq + 1, new_freq)
52+
self.assertEqual(source_prob + 1, new_source_prob)

0 commit comments

Comments
 (0)