update analysis

xingzhongyu · xingzhongyu · commit c6f60dfa8009 · 2025-02-21T14:55:48.000+08:00
diff --git a/examples/tuning/imputation_deepimpute/main.py b/examples/tuning/imputation_deepimpute/main.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 
 import numpy as np
+import pandas as pd
 import torch
 import wandb
 
@@ -40,22 +41,30 @@
     parser.add_argument("--sweep_id", type=str, default=None)
     parser.add_argument("--summary_file_path", default="results/pipeline/best_test_acc.csv", type=str)
     parser.add_argument("--root_path", default=str(Path(__file__).resolve().parent), type=str)
-
+    parser.add_argument("--get_result", action="store_true",help="save imputation result")
     params = parser.parse_args()
     print(vars(params))
     file_root_path = Path(params.root_path, params.dataset).resolve()
     logger.info(f"\n files is saved in {file_root_path}")
     pipeline_planer = PipelinePlaner.from_config_file(f"{file_root_path}/{params.tune_mode}_tuning_config.yaml")
     os.environ["WANDB_AGENT_MAX_INITIAL_FAILURES"] = "2000"
-
+    logger.info(params.tune_mode)
     def evaluate_pipeline(tune_mode=params.tune_mode, pipeline_planer=pipeline_planer):
         wandb.init(settings=wandb.Settings(start_method='thread'))
         set_seed(params.seed)
 
         data = ImputationDataset(data_dir=params.data_dir, dataset=params.dataset,
                                  train_size=params.train_size).load_data()
         # Prepare preprocessing pipeline and apply it to data
-        kwargs = {tune_mode: dict(wandb.config)}
+        wandb_config = wandb.config
+        if "run_kwargs" in pipeline_planer.config:
+            if any(d == dict(wandb.config["run_kwargs"]) for d in pipeline_planer.config.run_kwargs):
+                wandb_config = wandb_config["run_kwargs"]
+            else:
+                wandb.log({"skip": 1})
+                wandb.finish()
+                return
+        kwargs = {tune_mode: dict(wandb_config)}
         preprocessing_pipeline = pipeline_planer.generate(**kwargs)
         print(f"Pipeline config:\n{preprocessing_pipeline.to_yaml()}")
         preprocessing_pipeline(data)
@@ -77,10 +86,20 @@ def evaluate_pipeline(tune_mode=params.tune_mode, pipeline_planer=pipeline_plane
         pcc = model.score(X, imputed_data, mask, "PCC")
         mre = model.score(X, imputed_data, mask, metric="MRE")
         wandb.log({"RMSE": score, "PCC": pcc, "MRE": mre})
-
+        if params.get_result:
+            result=model.predict(X,None)
+            array = result.detach().cpu().numpy()
+            df = pd.DataFrame(
+                data=array,
+                index=data.data.obs_names,
+                columns=data.data.var_names
+            )
+            df.to_csv(f"{params.dataset}/result.csv")
     entity, project, sweep_id = pipeline_planer.wandb_sweep_agent(
         evaluate_pipeline, sweep_id=params.sweep_id, count=params.count)  #Score can be recorded for each epoch
     save_summary_data(entity, project, sweep_id, summary_file_path=params.summary_file_path, root_path=file_root_path)
+    if params.get_result:
+        sys.exit(0)
     if params.tune_mode == "pipeline" or params.tune_mode == "pipeline_params":
         get_step3_yaml(result_load_path=f"{params.summary_file_path}", step2_pipeline_planer=pipeline_planer,
                        conf_load_path=f"{Path(params.root_path).resolve().parent}/step3_default_params.yaml",