dataiku
diff --git a/‎custom-recipes/nlp-visualization-wordcloud/recipe.json‎
Lines changed: 147 additions & 127 deletions b/‎custom-recipes/nlp-visualization-wordcloud/recipe.json‎
Lines changed: 147 additions & 127 deletions
diff --git a/‎python-lib/color_palettes.py‎
Lines changed: 45 additions & 0 deletions b/‎python-lib/color_palettes.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎python-lib/language_dict.py‎ ‎python-lib/language_support.py‎python-lib/language_dict.py renamed to python-lib/language_support.py b/‎python-lib/language_dict.py‎ ‎python-lib/language_support.py‎python-lib/language_dict.py renamed to python-lib/language_support.py
diff --git a/‎python-lib/plugin_config_loading.py‎
Lines changed: 23 additions & 9 deletions b/‎python-lib/plugin_config_loading.py‎
Lines changed: 23 additions & 9 deletions
diff --git a/‎python-lib/spacy_tokenizer.py‎
Lines changed: 1 addition & 1 deletion b/‎python-lib/spacy_tokenizer.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,129 +1,149 @@
 {
-  "meta": {
-    "label": "Word clouds",
-    "description": "Generate word clouds from your text data",
-    "icon": "icon-comment"
-  },
-  "kind": "PYTHON",
-  "selectableFromDataset": "input_dataset",
-  "inputRoles": [
-    {
-      "name": "input_dataset",
-      "label": "Input dataset",
-      "description": "Dataset with a text column",
-      "arity": "UNARY",
-      "required": true,
-      "acceptsDataset": true,
-      "acceptsManagedFolder": false
-    }
-  ],
-  "outputRoles": [
-    {
-      "name": "output_folder",
-      "label": "Word cloud folder",
-      "description": "Folder where the word clouds will be saved as images",
-      "arity": "UNARY",
-      "required": true,
-      "acceptsDataset": false,
-      "acceptsManagedFolder": true
-    }
-  ],
-  "paramsPythonSetup": "get_language_list.py",
-  "params": [
-    {
-      "name": "separator_params",
-      "label": "Input parameters",
-      "type": "SEPARATOR"
+    "meta": {
+        "label": "Word clouds",
+        "description": "Generate word clouds from your text data",
+        "icon": "icon-comment"
     },
-    {
-      "name": "text_column",
-      "type": "COLUMN",
-      "columnRole": "input_dataset",
-      "allowedColumnTypes": ["string"],
-      "label": "Text column",
-      "mandatory": true
-    },
-    {
-      "name": "language",
-      "type": "SELECT",
-      "getChoicesFromPython": true,
-      "label": "Language",
-      "mandatory": true,
-      "defaultValue": "en"
-    },
-    {
-      "name": "language_column",
-      "label": "Language column",
-      "type": "COLUMN",
-      "description": "Column containing ISO 639-1 language codes",
-      "columnRole": "input_dataset",
-      "allowedColumnTypes": ["string"],
-      "visibilityCondition": "model.language == 'language_column'",
-      "mandatory": false
-    },
-    {
-      "name": "separator_text_handling",
-      "label": "Text simplification",
-      "type": "SEPARATOR"
-    },
-    {
-      "name": "remove_stopwords",
-      "label": "Clear stopwords",
-      "description": "Remove so-called 'stop words' (the, I, a, of, ...). This transformation is language-specific.",
-      "type": "BOOLEAN",
-      "defaultValue": true,
-      "mandatory": true
-    },
-    {
-      "name": "remove_punctuation",
-      "label": "Clear punctuation",
-      "description": "Remove punctuation characters e.g., ! ? ( ) . This transformation is language-specific.",
-      "type": "BOOLEAN",
-      "defaultValue": true,
-      "mandatory": true
-    },
-    {
-      "name": "case_insensitive",
-      "label": "Case-insensitive",
-      "description": "Normalize case to count 'You' and 'you' as the same word. The most common case will be displayed.",
-      "type": "BOOLEAN",
-      "defaultValue": false,
-      "mandatory": true
-    },
-    {
-      "name": "separator_display",
-      "label": "Display parameters",
-      "type": "SEPARATOR"
-    },
-    {
-      "type": "INT",
-      "name": "max_words",
-      "label": "Maximum number of words",
-      "minI": 1,
-      "defaultValue": 100
-    },
-    {
-      "type": "STRINGS",
-      "name": "color_list",
-      "label": "Color palette",
-      "description": "List of colors to randomly color each word. Supported formats are hexadecimal color codes and matplotlib color names.",
-      "allowDuplicates": false,
-      "defaultValue": ["#1F75B3", "#FF7F0F", "#2CA02B"]
-    },
-    {
-      "name": "separator_subcharts",
-      "label": "Subcharts",
-      "type": "SEPARATOR"
-    },
-    {
-      "name": "subchart_column",
-      "type": "COLUMN",
-      "columnRole": "input_dataset",
-      "allowedColumnTypes": ["string"],
-      "label": "Split by column",
-      "description": "Optional column to generate one word cloud per category",
-      "mandatory": false
-    }
-  ],
-  "resourceKeys": []
-}
+    "kind": "PYTHON",
+    "selectableFromDataset": "input_dataset",
+    "inputRoles": [
+        {
+            "name": "input_dataset",
+            "label": "Input dataset",
+            "description": "Dataset with a text column",
+            "arity": "UNARY",
+            "required": true,
+            "acceptsDataset": true,
+            "acceptsManagedFolder": false
+        }
+    ],
+    "outputRoles": [
+        {
+            "name": "output_folder",
+            "label": "Word cloud folder",
+            "description": "Folder where the word clouds will be saved as images",
+            "arity": "UNARY",
+            "required": true,
+            "acceptsDataset": false,
+            "acceptsManagedFolder": true
+        }
+    ],
+    "paramsPythonSetup": "dynamic_select.py",
+    "params": [
+        {
+            "name": "separator_params",
+            "label": "Input parameters",
+            "type": "SEPARATOR"
+        },
+        {
+            "name": "text_column",
+            "type": "COLUMN",
+            "columnRole": "input_dataset",
+            "allowedColumnTypes": [
+                "string"
+            ],
+            "label": "Text column",
+            "mandatory": true
+        },
+        {
+            "name": "language",
+            "type": "SELECT",
+            "getChoicesFromPython": true,
+            "label": "Language",
+            "mandatory": true,
+            "defaultValue": "en"
+        },
+        {
+            "name": "language_column",
+            "label": "  ↳ Language column",
+            "type": "COLUMN",
+            "description": "Column containing ISO 639-1 language codes",
+            "columnRole": "input_dataset",
+            "allowedColumnTypes": [
+                "string"
+            ],
+            "visibilityCondition": "model.language == 'language_column'",
+            "mandatory": false
+        },
+        {
+            "name": "separator_text_handling",
+            "label": "Text simplification",
+            "type": "SEPARATOR"
+        },
+        {
+            "name": "remove_stopwords",
+            "label": "Clear stopwords",
+            "description": "Remove so-called 'stop words' (the, I, a, of, ...). This transformation is language-specific.",
+            "type": "BOOLEAN",
+            "defaultValue": true,
+            "mandatory": true
+        },
+        {
+            "name": "remove_punctuation",
+            "label": "Clear punctuation",
+            "description": "Remove punctuation characters e.g., ! ? ( ) . This transformation is language-specific.",
+            "type": "BOOLEAN",
+            "defaultValue": true,
+            "mandatory": true
+        },
+        {
+            "name": "case_insensitive",
+            "label": "Case-insensitive",
+            "description": "Normalize case to count 'You' and 'you' as the same word. The most common case will be displayed.",
+            "type": "BOOLEAN",
+            "defaultValue": false,
+            "mandatory": true
+        },
+        {
+            "name": "separator_display",
+            "label": "Display parameters",
+            "type": "SEPARATOR"
+        },
+        {
+            "type": "INT",
+            "name": "max_words",
+            "label": "Maximum number of words",
+            "minI": 1,
+            "defaultValue": 100
+        },
+        {
+            "type": "SELECT",
+            "name": "color_palette",
+            "label": "Color palette",
+            "description": "Select a built-in DSS palette or set a custom one to randomly color each word",
+            "mandatory": true,
+            "getChoicesFromPython": true,
+            "defaultValue": "default"
+        },
+        {
+            "type": "STRINGS",
+            "name": "color_list",
+            "label": "  ↳ Custom palette",
+            "description": "List of hexadecimal color codes",
+            "allowDuplicates": false,
+            "defaultValue": [
+                "#1F77B4",
+                "#FF7F0E",
+                "#2CA02C"
+            ],
+            "visibilityCondition": "model.color_palette == 'custom'"
+        },
+        {
+            "name": "separator_subcharts",
+            "label": "Subcharts",
+            "type": "SEPARATOR"
+        },
+        {
+            "name": "subchart_column",
+            "type": "COLUMN",
+            "columnRole": "input_dataset",
+            "allowedColumnTypes": [
+                "string"
+            ],
+            "label": "Split by column",
+            "description": "Optional column to generate one word cloud per category",
+            "mandatory": false
+        }
+    ],
+    "resourceKeys": []
+}
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""Module with built-in DSS color palettes"""
+
+
+DSS_BUILTIN_COLOR_PALETTES = [
+    {
+        "id": "default",
+        "name": "Default",
+        "colors": ["#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F"],
+    },
+    {
+        "id": "dku_dss_next",
+        "name": "DSS Next",
+        "colors": ["#00AEDB", "#8CC63F", "#FFC425", "#F37735", "#D11141", "#91268F", "#194BA3", "#00B159"],
+    },
+    {
+        "id": "dku_pastel1",
+        "name": "Pastel",
+        "colors": ["#EC6547", "#FDC665", "#95C37B", "#75C2CC", "#694A82", "#538BC8", "#65B890", "#A874A0"],
+    },
+    {
+        "id": "dku_corpo1",
+        "name": "Corporate",
+        "colors": ["#0075B2", "#818991", "#EA9423", "#A4C2DB", "#EF3C39", "#009D4B", "#CFD6D3", "#231F20"],
+    },
+    {
+        "id": "dku_deuteranopia1",
+        "name": "Deuteranopia",
+        "colors": ["#193C81", "#7EA0F9", "#211924", "#757A8D", "#D6C222", "#776A37", "#AE963A", "#655E5D"],
+    },
+    {
+        "id": "dku_tritanopia1",
+        "name": "Tritanopia",
+        "colors": ["#CA0849", "#0B4D61", "#E4B2BF", "#3F6279", "#F24576", "#7D8E98", "#9C4259", "#2B2A2E"],
+    },
+    {
+        "id": "dku_pastel2",
+        "name": "Pastel 2",
+        "colors": ["#F06548", "#FDC766", "#7BC9A6", "#4EC5DA", "#548ECB", "#97668F", "#5E2974"],
+    },
+]
+"""list: Constant with DSS built-in color palette identifiers, names and lists of colors
+
+This is an amended version of what is in the DSS code base, without the faded colors and truncated to the first 8 colors
+"""
@@ -14,7 +14,8 @@
     get_recipe_resource,
 )
 
-from language_dict import SUPPORTED_LANGUAGES_SPACY
+from language_support import SUPPORTED_LANGUAGES_SPACY
+from color_palettes import DSS_BUILTIN_COLOR_PALETTES
 from partitions_handling import get_folder_partition_root
 
 
@@ -114,13 +115,26 @@ def load_plugin_config_wordcloud() -> Dict:
         raise PluginParamValidationError("Maximum number of words is not a positive integer")
     logging.info(f"Max number of words: {params['max_words']}")
 
-    params["color_list"] = recipe_config.get("color_list")
-    if not (isinstance(params["color_list"], list) & (len(params["color_list"]) >= 1)):
-        raise PluginParamValidationError("Empty color palette")
-    if not all([matplotlib.colors.is_color_like(color) for color in params["color_list"]]):
-        raise PluginParamValidationError(f"Invalid color palette: {params['color_list']}")
-
-    params["color_list"] = [matplotlib.colors.to_hex(color) for color in params["color_list"]]
-    logging.info(f"Color list: {params['color_list']}")
+    color_palette = recipe_config.get("color_palette")
+    if not color_palette:
+        raise PluginParamValidationError("Empty color palette selection")
+    if color_palette == "custom":
+        params["color_list"] = recipe_config.get("color_list")
+        if not (isinstance(params["color_list"], list) & (len(params["color_list"]) >= 1)):
+            raise PluginParamValidationError("Empty custom palette")
+        if not all([matplotlib.colors.is_color_like(color) for color in params["color_list"]]):
+            raise PluginParamValidationError(f"Invalid custom palette: {params['color_list']}")
+        params["color_list"] = [matplotlib.colors.to_hex(color) for color in params["color_list"]]
+        logging.info(f"Custom palette: {params['color_list']}")
+    else:
+        if color_palette not in {builtin_palette["id"] for builtin_palette in DSS_BUILTIN_COLOR_PALETTES}:
+            raise PluginParamValidationError(f"Unsupported color palette: {color_palette}")
+        selected_palette_dict = [
+            builtin_palette for builtin_palette in DSS_BUILTIN_COLOR_PALETTES if builtin_palette["id"] == color_palette
+        ][0]
+        params["color_list"] = selected_palette_dict["colors"]
+        logging.info(
+            f"Using built-in DSS palette: '{selected_palette_dict['name']}' with colors: {params['color_list']}"
+        )
 
     return params
@@ -17,7 +17,7 @@
 from emoji import UNICODE_EMOJI
 from fastcore.utils import store_attr
 
-from language_dict import SUPPORTED_LANGUAGES_SPACY, SPACY_LANGUAGE_MODELS
+from language_support import SUPPORTED_LANGUAGES_SPACY, SPACY_LANGUAGE_MODELS
 from plugin_io_utils import generate_unique, truncate_text_list