sapientml · tashiro-akira · Oct 25, 2023 · Oct 25, 2023 · Oct 26, 2023 · Nov 8, 2023
@@ -271,30 +271,10 @@ def generate_code(self, dataset: Dataset, task: Task) -> Tuple[Dataset, Code]:
             df = df.drop(col, axis=1)
         if cols_numeric_and_string:
             tpl = template_env.get_template("handle_mixed_typed_columns.py.jinja")
-            code.validation += _render(
-                tpl,
-                training=True,
-                test=True,
-                cols_numeric_and_string=cols_numeric_and_string
-            )
-            code.test += _render(
-                tpl,
-                training=True,
-                test=True,
-                cols_numeric_and_string=cols_numeric_and_string
-            )
-            code.train += _render(
-                tpl,
-                training=True,
-                test=False,
-                cols_numeric_and_string=cols_numeric_and_string
-            )
-            code.predict += _render(
-                tpl,
-                training=False,
-                test=True,
-                cols_numeric_and_string=cols_numeric_and_string
-            )
+            code.validation += _render(tpl, training=True, test=True, cols_numeric_and_string=cols_numeric_and_string)
+            code.test += _render(tpl, training=True, test=True, cols_numeric_and_string=cols_numeric_and_string)
+            code.train += _render(tpl, training=True, test=False, cols_numeric_and_string=cols_numeric_and_string)
+            code.predict += _render(tpl, training=False, test=True, cols_numeric_and_string=cols_numeric_and_string)
 
         # meta features must be calculated after replacing inf with nan,
         # becuase the replaced nan must be preprocessed in the generated code.

@@ -3,6 +3,8 @@ import re
 cols_has_symbols = {{ cols_has_symbols }}
 inhibited_symbol_pattern = re.compile(r"[\{\}\[\]\",:<'\\]+")
 {% if training %}
+rename_symbol_cols = {col: inhibited_symbol_pattern.sub("", col) if col in cols_has_symbols else col in cols_has_symbols for col in cols_has_symbols }
+rename_symbol_cols = {v: k for k, v in rename_symbol_cols.items()}
 train_dataset = train_dataset.rename(columns=lambda col: inhibited_symbol_pattern.sub("", col) if col in cols_has_symbols else col)
 {% endif %}
 {% if test %}