sapientml · tashiro-akira · Oct 24, 2023 · Oct 24, 2023 · Oct 27, 2023 · Oct 31, 2023
@@ -221,6 +221,29 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
         for pipeline in sapientml_results:
             pipeline.validation = code_block.validation + pipeline.validation
             pipeline.test = code_block.test + pipeline.test
+            if "cols_has_symbols" in pipeline.test:
+                addindex = pipeline.test.index("perm_df = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex]
+                    + "feature_train_csv = feature_train.rename(columns=rename_symbol_cols)\n    "
+                    + pipeline.test[addindex:]
+                )
+                addindex = pipeline.test.index("prediction = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex]
+                    + "TARGET_COLUMNS_csv = [rename_symbol_cols[TARGET_COLUMNS[0]]]\n"
+                    + pipeline.test[addindex:]
+                )
+            else:
+                addindex = pipeline.test.index("perm_df = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex] + "feature_train_csv = feature_train\n    " + pipeline.test[addindex:]
+                )
+                addindex = pipeline.test.index("prediction = pd.DataFrame")
+                pipeline.test = (
+                    pipeline.test[:addindex] + "TARGET_COLUMNS_csv = [TARGET_COLUMNS[0]]\n" + pipeline.test[addindex:]
+                )
+
             pipeline.train = code_block.train + pipeline.train
             pipeline.predict = code_block.predict + pipeline.predict
             result_pipelines.append(pipeline)

diff --git a/sapientml_core/templates/other_templates/permutation_importance.py.jinja b/sapientml_core/templates/other_templates/permutation_importance.py.jinja
@@ -13,13 +13,13 @@ if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train.sparse.to_dense(), {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% else %}
 if len(feature_train.columns) <= 100:
     perm = permutation_importance(model, feature_train, {{ TARGET_TRAIN }},
                                     n_repeats=5,
                                     random_state=0)
-    perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
+    perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
     perm_df.to_csv("./permutation_importance.csv", index=False)
 {% endif %}
diff --git a/sapientml_core/templates/other_templates/prediction_result.py.jinja b/sapientml_core/templates/other_templates/prediction_result.py.jinja
@@ -5,17 +5,17 @@ prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index)
 {% elif pipeline.config.predict_option is none and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
 prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
 {% elif pipeline.config.predict_option is none and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
-prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% elif pipeline.adaptation_metric.startswith("MAP_") %}
 {% set k = pipeline.adaptation_metric.split("_")[1] %}
-prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
+prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS_csv[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
 {% else %}
-prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS, index=feature_test.index)
+prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS_csv, index=feature_test.index)
 {% endif %}
 prediction.to_csv("./prediction_result.csv")