Skip to content

Commit 8632579

Browse files
Merge pull request #18 from dataiku/feature/color-palette-select-predefined
Tweak color palette and chart title
2 parents 34b253e + e30c69f commit 8632579

File tree

10 files changed

+250
-159
lines changed

10 files changed

+250
-159
lines changed
Lines changed: 147 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,129 +1,149 @@
11
{
2-
"meta": {
3-
"label": "Word clouds",
4-
"description": "Generate word clouds from your text data",
5-
"icon": "icon-comment"
6-
},
7-
"kind": "PYTHON",
8-
"selectableFromDataset": "input_dataset",
9-
"inputRoles": [
10-
{
11-
"name": "input_dataset",
12-
"label": "Input dataset",
13-
"description": "Dataset with a text column",
14-
"arity": "UNARY",
15-
"required": true,
16-
"acceptsDataset": true,
17-
"acceptsManagedFolder": false
18-
}
19-
],
20-
"outputRoles": [
21-
{
22-
"name": "output_folder",
23-
"label": "Word cloud folder",
24-
"description": "Folder where the word clouds will be saved as images",
25-
"arity": "UNARY",
26-
"required": true,
27-
"acceptsDataset": false,
28-
"acceptsManagedFolder": true
29-
}
30-
],
31-
"paramsPythonSetup": "get_language_list.py",
32-
"params": [
33-
{
34-
"name": "separator_params",
35-
"label": "Input parameters",
36-
"type": "SEPARATOR"
2+
"meta": {
3+
"label": "Word clouds",
4+
"description": "Generate word clouds from your text data",
5+
"icon": "icon-comment"
376
},
38-
{
39-
"name": "text_column",
40-
"type": "COLUMN",
41-
"columnRole": "input_dataset",
42-
"allowedColumnTypes": ["string"],
43-
"label": "Text column",
44-
"mandatory": true
45-
},
46-
{
47-
"name": "language",
48-
"type": "SELECT",
49-
"getChoicesFromPython": true,
50-
"label": "Language",
51-
"mandatory": true,
52-
"defaultValue": "en"
53-
},
54-
{
55-
"name": "language_column",
56-
"label": "Language column",
57-
"type": "COLUMN",
58-
"description": "Column containing ISO 639-1 language codes",
59-
"columnRole": "input_dataset",
60-
"allowedColumnTypes": ["string"],
61-
"visibilityCondition": "model.language == 'language_column'",
62-
"mandatory": false
63-
},
64-
{
65-
"name": "separator_text_handling",
66-
"label": "Text simplification",
67-
"type": "SEPARATOR"
68-
},
69-
{
70-
"name": "remove_stopwords",
71-
"label": "Clear stopwords",
72-
"description": "Remove so-called 'stop words' (the, I, a, of, ...). This transformation is language-specific.",
73-
"type": "BOOLEAN",
74-
"defaultValue": true,
75-
"mandatory": true
76-
},
77-
{
78-
"name": "remove_punctuation",
79-
"label": "Clear punctuation",
80-
"description": "Remove punctuation characters e.g., ! ? ( ) . This transformation is language-specific.",
81-
"type": "BOOLEAN",
82-
"defaultValue": true,
83-
"mandatory": true
84-
},
85-
{
86-
"name": "case_insensitive",
87-
"label": "Case-insensitive",
88-
"description": "Normalize case to count 'You' and 'you' as the same word. The most common case will be displayed.",
89-
"type": "BOOLEAN",
90-
"defaultValue": false,
91-
"mandatory": true
92-
},
93-
{
94-
"name": "separator_display",
95-
"label": "Display parameters",
96-
"type": "SEPARATOR"
97-
},
98-
{
99-
"type": "INT",
100-
"name": "max_words",
101-
"label": "Maximum number of words",
102-
"minI": 1,
103-
"defaultValue": 100
104-
},
105-
{
106-
"type": "STRINGS",
107-
"name": "color_list",
108-
"label": "Color palette",
109-
"description": "List of colors to randomly color each word. Supported formats are hexadecimal color codes and matplotlib color names.",
110-
"allowDuplicates": false,
111-
"defaultValue": ["#1F75B3", "#FF7F0F", "#2CA02B"]
112-
},
113-
{
114-
"name": "separator_subcharts",
115-
"label": "Subcharts",
116-
"type": "SEPARATOR"
117-
},
118-
{
119-
"name": "subchart_column",
120-
"type": "COLUMN",
121-
"columnRole": "input_dataset",
122-
"allowedColumnTypes": ["string"],
123-
"label": "Split by column",
124-
"description": "Optional column to generate one word cloud per category",
125-
"mandatory": false
126-
}
127-
],
128-
"resourceKeys": []
129-
}
7+
"kind": "PYTHON",
8+
"selectableFromDataset": "input_dataset",
9+
"inputRoles": [
10+
{
11+
"name": "input_dataset",
12+
"label": "Input dataset",
13+
"description": "Dataset with a text column",
14+
"arity": "UNARY",
15+
"required": true,
16+
"acceptsDataset": true,
17+
"acceptsManagedFolder": false
18+
}
19+
],
20+
"outputRoles": [
21+
{
22+
"name": "output_folder",
23+
"label": "Word cloud folder",
24+
"description": "Folder where the word clouds will be saved as images",
25+
"arity": "UNARY",
26+
"required": true,
27+
"acceptsDataset": false,
28+
"acceptsManagedFolder": true
29+
}
30+
],
31+
"paramsPythonSetup": "dynamic_select.py",
32+
"params": [
33+
{
34+
"name": "separator_params",
35+
"label": "Input parameters",
36+
"type": "SEPARATOR"
37+
},
38+
{
39+
"name": "text_column",
40+
"type": "COLUMN",
41+
"columnRole": "input_dataset",
42+
"allowedColumnTypes": [
43+
"string"
44+
],
45+
"label": "Text column",
46+
"mandatory": true
47+
},
48+
{
49+
"name": "language",
50+
"type": "SELECT",
51+
"getChoicesFromPython": true,
52+
"label": "Language",
53+
"mandatory": true,
54+
"defaultValue": "en"
55+
},
56+
{
57+
"name": "language_column",
58+
"label": " ↳ Language column",
59+
"type": "COLUMN",
60+
"description": "Column containing ISO 639-1 language codes",
61+
"columnRole": "input_dataset",
62+
"allowedColumnTypes": [
63+
"string"
64+
],
65+
"visibilityCondition": "model.language == 'language_column'",
66+
"mandatory": false
67+
},
68+
{
69+
"name": "separator_text_handling",
70+
"label": "Text simplification",
71+
"type": "SEPARATOR"
72+
},
73+
{
74+
"name": "remove_stopwords",
75+
"label": "Clear stopwords",
76+
"description": "Remove so-called 'stop words' (the, I, a, of, ...). This transformation is language-specific.",
77+
"type": "BOOLEAN",
78+
"defaultValue": true,
79+
"mandatory": true
80+
},
81+
{
82+
"name": "remove_punctuation",
83+
"label": "Clear punctuation",
84+
"description": "Remove punctuation characters e.g., ! ? ( ) . This transformation is language-specific.",
85+
"type": "BOOLEAN",
86+
"defaultValue": true,
87+
"mandatory": true
88+
},
89+
{
90+
"name": "case_insensitive",
91+
"label": "Case-insensitive",
92+
"description": "Normalize case to count 'You' and 'you' as the same word. The most common case will be displayed.",
93+
"type": "BOOLEAN",
94+
"defaultValue": false,
95+
"mandatory": true
96+
},
97+
{
98+
"name": "separator_display",
99+
"label": "Display parameters",
100+
"type": "SEPARATOR"
101+
},
102+
{
103+
"type": "INT",
104+
"name": "max_words",
105+
"label": "Maximum number of words",
106+
"minI": 1,
107+
"defaultValue": 100
108+
},
109+
{
110+
"type": "SELECT",
111+
"name": "color_palette",
112+
"label": "Color palette",
113+
"description": "Select a built-in DSS palette or set a custom one to randomly color each word",
114+
"mandatory": true,
115+
"getChoicesFromPython": true,
116+
"defaultValue": "default"
117+
},
118+
{
119+
"type": "STRINGS",
120+
"name": "color_list",
121+
"label": " ↳ Custom palette",
122+
"description": "List of hexadecimal color codes",
123+
"allowDuplicates": false,
124+
"defaultValue": [
125+
"#1F77B4",
126+
"#FF7F0E",
127+
"#2CA02C"
128+
],
129+
"visibilityCondition": "model.color_palette == 'custom'"
130+
},
131+
{
132+
"name": "separator_subcharts",
133+
"label": "Subcharts",
134+
"type": "SEPARATOR"
135+
},
136+
{
137+
"name": "subchart_column",
138+
"type": "COLUMN",
139+
"columnRole": "input_dataset",
140+
"allowedColumnTypes": [
141+
"string"
142+
],
143+
"label": "Split by column",
144+
"description": "Optional column to generate one word cloud per category",
145+
"mandatory": false
146+
}
147+
],
148+
"resourceKeys": []
149+
}

python-lib/color_palettes.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# -*- coding: utf-8 -*-
2+
"""Module with built-in DSS color palettes"""
3+
4+
5+
DSS_BUILTIN_COLOR_PALETTES = [
6+
{
7+
"id": "default",
8+
"name": "Default",
9+
"colors": ["#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F"],
10+
},
11+
{
12+
"id": "dku_dss_next",
13+
"name": "DSS Next",
14+
"colors": ["#00AEDB", "#8CC63F", "#FFC425", "#F37735", "#D11141", "#91268F", "#194BA3", "#00B159"],
15+
},
16+
{
17+
"id": "dku_pastel1",
18+
"name": "Pastel",
19+
"colors": ["#EC6547", "#FDC665", "#95C37B", "#75C2CC", "#694A82", "#538BC8", "#65B890", "#A874A0"],
20+
},
21+
{
22+
"id": "dku_corpo1",
23+
"name": "Corporate",
24+
"colors": ["#0075B2", "#818991", "#EA9423", "#A4C2DB", "#EF3C39", "#009D4B", "#CFD6D3", "#231F20"],
25+
},
26+
{
27+
"id": "dku_deuteranopia1",
28+
"name": "Deuteranopia",
29+
"colors": ["#193C81", "#7EA0F9", "#211924", "#757A8D", "#D6C222", "#776A37", "#AE963A", "#655E5D"],
30+
},
31+
{
32+
"id": "dku_tritanopia1",
33+
"name": "Tritanopia",
34+
"colors": ["#CA0849", "#0B4D61", "#E4B2BF", "#3F6279", "#F24576", "#7D8E98", "#9C4259", "#2B2A2E"],
35+
},
36+
{
37+
"id": "dku_pastel2",
38+
"name": "Pastel 2",
39+
"colors": ["#F06548", "#FDC766", "#7BC9A6", "#4EC5DA", "#548ECB", "#97668F", "#5E2974"],
40+
},
41+
]
42+
"""list: Constant with DSS built-in color palette identifiers, names and lists of colors
43+
44+
This is an amended version of what is in the DSS code base, without the faded colors and truncated to the first 8 colors
45+
"""

python-lib/plugin_config_loading.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
get_recipe_resource,
1515
)
1616

17-
from language_dict import SUPPORTED_LANGUAGES_SPACY
17+
from language_support import SUPPORTED_LANGUAGES_SPACY
18+
from color_palettes import DSS_BUILTIN_COLOR_PALETTES
1819
from partitions_handling import get_folder_partition_root
1920

2021

@@ -114,13 +115,26 @@ def load_plugin_config_wordcloud() -> Dict:
114115
raise PluginParamValidationError("Maximum number of words is not a positive integer")
115116
logging.info(f"Max number of words: {params['max_words']}")
116117

117-
params["color_list"] = recipe_config.get("color_list")
118-
if not (isinstance(params["color_list"], list) & (len(params["color_list"]) >= 1)):
119-
raise PluginParamValidationError("Empty color palette")
120-
if not all([matplotlib.colors.is_color_like(color) for color in params["color_list"]]):
121-
raise PluginParamValidationError(f"Invalid color palette: {params['color_list']}")
122-
123-
params["color_list"] = [matplotlib.colors.to_hex(color) for color in params["color_list"]]
124-
logging.info(f"Color list: {params['color_list']}")
118+
color_palette = recipe_config.get("color_palette")
119+
if not color_palette:
120+
raise PluginParamValidationError("Empty color palette selection")
121+
if color_palette == "custom":
122+
params["color_list"] = recipe_config.get("color_list")
123+
if not (isinstance(params["color_list"], list) & (len(params["color_list"]) >= 1)):
124+
raise PluginParamValidationError("Empty custom palette")
125+
if not all([matplotlib.colors.is_color_like(color) for color in params["color_list"]]):
126+
raise PluginParamValidationError(f"Invalid custom palette: {params['color_list']}")
127+
params["color_list"] = [matplotlib.colors.to_hex(color) for color in params["color_list"]]
128+
logging.info(f"Custom palette: {params['color_list']}")
129+
else:
130+
if color_palette not in {builtin_palette["id"] for builtin_palette in DSS_BUILTIN_COLOR_PALETTES}:
131+
raise PluginParamValidationError(f"Unsupported color palette: {color_palette}")
132+
selected_palette_dict = [
133+
builtin_palette for builtin_palette in DSS_BUILTIN_COLOR_PALETTES if builtin_palette["id"] == color_palette
134+
][0]
135+
params["color_list"] = selected_palette_dict["colors"]
136+
logging.info(
137+
f"Using built-in DSS palette: '{selected_palette_dict['name']}' with colors: {params['color_list']}"
138+
)
125139

126140
return params

python-lib/spacy_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from emoji import UNICODE_EMOJI
1818
from fastcore.utils import store_attr
1919

20-
from language_dict import SUPPORTED_LANGUAGES_SPACY, SPACY_LANGUAGE_MODELS
20+
from language_support import SUPPORTED_LANGUAGES_SPACY, SPACY_LANGUAGE_MODELS
2121
from plugin_io_utils import generate_unique, truncate_text_list
2222

2323

0 commit comments

Comments
 (0)