Skip to content
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5121800
Fixed an error when there were many missing bool columns in the input…
Oct 24, 2023
829c3c2
Merge branch 'main' of https://github.com/tashiro-akira/core
Oct 24, 2023
96ecff3
Merge branch 'sapientml:main' into main
tashiro-akira Oct 27, 2023
d9e4704
Merge branch 'sapientml:main' into main
tashiro-akira Oct 31, 2023
b684d5c
fix:Add action to return column names
Nov 7, 2023
e47bebc
Merge branch 'sapientml:main' into main
tashiro-akira Nov 8, 2023
5572a54
fix:Reflects the findings of the review
Nov 14, 2023
334f690
fix:Fixed an error with mixed target column types.
Nov 20, 2023
5264b21
Merge branch 'sapientml:main' into main
tashiro-akira Nov 21, 2023
83438c0
fix: Return changes to move modifications to another branch.
Nov 21, 2023
8358cee
Merge branch 'sapientml:main' into main
tashiro-akira Dec 7, 2023
acabc47
Merge branch 'sapientml:main' into main
tashiro-akira Dec 11, 2023
5244dae
fix: Merged master modifications.
Dec 11, 2023
2f43a68
fix:Reflected review results
Dec 12, 2023
e678461
style:Removed unnecessary blank lines.
Dec 12, 2023
5254cfe
fix:Fixed to return column names in csv file
Dec 22, 2023
a64f3d3
Merge branch 'sapientml:main' into #8_jinja
tashiro-akira Jan 11, 2024
8381704
fix:Reflected the content of the review
Jan 11, 2024
bfb31c1
Merge branch 'main' into #8_jinja
kimusaku Feb 6, 2024
e1e4644
fix:Save Changes Temporarily
Feb 27, 2024
0a91bba
Merge branch 'sapientml:main' into #8_jinja
tashiro-akira Mar 11, 2024
1619b10
fix:Reflect Modifications
Mar 11, 2024
440db93
Merge branch '#8_jinja' of https://github.com/tashiro-akira/core into…
Mar 11, 2024
1cee451
fix:Reflected review results
Mar 22, 2024
78d1aad
fix:Remove Unnecessary Modifications
Mar 22, 2024
7724cd1
fix:The format has been modified.
Mar 22, 2024
969ded3
fix:Reflect Review Results
tashiro-akira Apr 25, 2024
426a32d
fix:Fixed error in running lint
tashiro-akira Apr 25, 2024
9f2bea0
Merge branch 'main' into #8_jinja
AkiraUra May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions sapientml_core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,29 @@ def generate_pipeline(self, dataset: Dataset, task: Task):
for pipeline in sapientml_results:
pipeline.validation = code_block.validation + pipeline.validation
pipeline.test = code_block.test + pipeline.test
if "cols_has_symbols" in pipeline.test:
addindex = pipeline.test.index("perm_df = pd.DataFrame")
pipeline.test = (
pipeline.test[:addindex]
+ "feature_train_csv = feature_train.rename(columns=rename_symbol_cols)\n "
+ pipeline.test[addindex:]
)
addindex = pipeline.test.index("prediction = pd.DataFrame")
pipeline.test = (
pipeline.test[:addindex]
+ "TARGET_COLUMNS_csv = [rename_symbol_cols[TARGET_COLUMNS[0]]]\n"
+ pipeline.test[addindex:]
)
else:
addindex = pipeline.test.index("perm_df = pd.DataFrame")
pipeline.test = (
pipeline.test[:addindex] + "feature_train_csv = feature_train\n " + pipeline.test[addindex:]
)
addindex = pipeline.test.index("prediction = pd.DataFrame")
pipeline.test = (
pipeline.test[:addindex] + "TARGET_COLUMNS_csv = [TARGET_COLUMNS[0]]\n" + pipeline.test[addindex:]
)

pipeline.train = code_block.train + pipeline.train
pipeline.predict = code_block.predict + pipeline.predict
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about the following code?

            if "cols_has_symbols" in pipeline.test:
                pipeline.test = pipeline.test.replace('perm_df.to_csv', 'perm_df.rename(columns=rename_symbol_cols).to_csv')
                def replace_targets(match_obj):
                    return match_obj[0].replace('TARGET_COLUMNS', "[rename_symbol_cols.get(v, v) for v in TARGET_COLUMNS]")
                pat = r"prediction = pd.DataFrame\(y_prob, columns=.?TARGET_COLUMNS.*, index=feature_test.index\)"
                pipeline.test = pat.sub(replace_targets, pipeline.test)
                pipeline.predict = pat.sub(replace_targets, pipeline.predict)
            pipeline.validation = code_block.validation + pipeline.validation
            pipeline.test = code_block.test + pipeline.test
            pipeline.train = code_block.train + pipeline.train
            pipeline.predict = code_block.predict + pipeline.predict

Copy link
Contributor Author

@tashiro-akira tashiro-akira Jan 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AkiraUra
When I modified the code as described in the comments, the output csv column names were not returned from the modified state. (I also modified preprocess.)

The string substitution method of the indicated code is shorter than the previous code, so I will try to fix it referring to this.

result_pipelines.append(pipeline)
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these changes are necessary.

Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ if len(feature_train.columns) <= 100:
perm = permutation_importance(model, feature_train.sparse.to_dense(), {{ TARGET_TRAIN }},
n_repeats=5,
random_state=0)
perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
perm_df.to_csv("./permutation_importance.csv", index=False)
{% else %}
if len(feature_train.columns) <= 100:
perm = permutation_importance(model, feature_train, {{ TARGET_TRAIN }},
n_repeats=5,
random_state=0)
perm_df = pd.DataFrame({"feature": feature_train.columns, "importance": perm.importances_mean})
perm_df = pd.DataFrame({"feature": feature_train_csv.columns, "importance": perm.importances_mean})
perm_df.to_csv("./permutation_importance.csv", index=False)
{% endif %}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these changes are necessary.

Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.
{% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
{% elif pipeline.config.predict_option == macros.PRED_PROBABILITY and (pipeline.adaptation_metric in macros.metrics_for_classification) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
{% elif pipeline.config.predict_option is none and model_name == xgbclassifier and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
prediction = pd.DataFrame(y_prob, columns=label_encoder.inverse_transform(model.classes_), index=feature_test.index)
{% elif pipeline.config.predict_option is none and pipeline.task.is_multiclass == True and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
prediction = pd.DataFrame(y_prob, columns=model.classes_, index=feature_test.index)
{% elif pipeline.config.predict_option is none and (pipeline.adaptation_metric in macros.metric_needing_predict_proba) and (not pipeline.adaptation_metric.startswith("MAP_"))%}
prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS, index=feature_test.index)
prediction = pd.DataFrame(y_prob, columns=TARGET_COLUMNS_csv, index=feature_test.index)
{% elif pipeline.adaptation_metric.startswith("MAP_") %}
{% set k = pipeline.adaptation_metric.split("_")[1] %}
prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
prediction = pd.DataFrame(y_prob, columns=[TARGET_COLUMNS_csv[0] + "_" +str(i) for i in range(1, y_prob.shape[1] + 1)], index=feature_test.index)
{% else %}
prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS, index=feature_test.index)
prediction = pd.DataFrame(y_pred, columns=TARGET_COLUMNS_csv, index=feature_test.index)
{% endif %}
prediction.to_csv("./prediction_result.csv")