var name changes for consistency

bzamanlooy · bzamanlooy · commit 230bc15cfc4f · 2025-12-17T16:17:54.000-03:00
diff --git a/examples/tartan_federer_attack/README.md b/examples/tartan_federer_attack/README.md
@@ -2,18 +2,14 @@
 
 This example runs a Tartan–Federer membership inference attack using trained TabDDPM models. The pipeline optionally performs a data processing step to prepare population datasets for tratining and validating the attack and then executes the attack using the trained classifier.
 
-##
 
 ## Data Processing
 
-#TODO: Train 30 target models with real data and synthetic data in the same way of the MIDST # competition and store them under attack_config.models_base_dir. Upload them to a google # drive and add the link here.
-The data processing step constructs population datasets resembling the real data available to the attacker. A selected subset of `train_with_id.csv` files is collected from `tabddpm_1` to `tabddpm_6` located under:
+#TODO: Train 30 target models with real data and synthetic data in the same way of the MIDST # competition  Upload them to a google # drive and add the link here. Currently, we only have 6.
 
-```
-examples/tartan_federer_attack/tabddpm_trained_with_20k/tabddpm_white_box
-```
+Download the folder from `https://drive.google.com/uc?export=download&id=12gzxNzFzKCF13IzJjZdk3Ba5XTaIrLjO` and store them under `data_paths.midst_data_path`. The data processing step constructs population datasets used for training the attacks, resembling the real data available to the attacker using the training data correspoinding to each available target model.
 
-For each selected model, both `train_with_id.csv` and `challenge_with_id.csv` are loaded. All training datasets are merged into a single dataframe and all challenge datasets are merged into a single dataframe. Any training samples that also appear in the challenge dataset are removed, and duplicate samples are dropped based on configured identifier columns.
+For each selected folder, both `train_with_id.csv` and `challenge_with_id.csv` are loaded. All training datasets are merged into a single dataframe and all challenge datasets are merged into a single dataframe. Any training samples that also appear in the challenge dataset are removed, and duplicate samples are dropped based on configured identifier columns.
 
 The model indices used to build the population datasets for training and validation are specified in the configuration file:
 
diff --git a/examples/tartan_federer_attack/configs/experiment_config.yaml b/examples/tartan_federer_attack/configs/experiment_config.yaml
diff --git a/examples/tartan_federer_attack/run_attack.py b/examples/tartan_federer_attack/run_attack.py
@@ -80,12 +80,13 @@ def run_data_processing(config: dict[str, Any]) -> None:
     log(INFO, "Running data processing pipeline...")
 
     population_data_path = Path(config["data_paths"]["population_data_path"])
+    midst_data_path = Path(config["data_paths"]["midst_data_path"])
     population_data_path.mkdir(parents=True, exist_ok=True)
 
     population_data_for_training_attack = prepare_population_dataset_for_attack(
         model_indices=config["data_processing_config"]["population_attack_indices_to_collect_for_training"],
         model_type=config["data_processing_config"]["model_type"],
-        models_base_dir=Path(config["data_paths"]["midst_data_path"]),
+        models_base_dir=midst_data_path,
         columns_for_deduplication=config["data_processing_config"]["columns_for_deduplication"],
     )
 
@@ -97,7 +98,7 @@ def run_data_processing(config: dict[str, Any]) -> None:
     population_data_for_validating_attack = prepare_population_dataset_for_attack(
         model_indices=config["data_processing_config"]["population_attack_indices_to_collect_for_validation"],
         model_type=config["data_processing_config"]["model_type"],
-        models_base_dir=Path(config["data_paths"]["midst_data_path"]),
+        models_base_dir=midst_data_path,
         columns_for_deduplication=config["data_processing_config"]["columns_for_deduplication"],
     )
 
@@ -134,26 +135,26 @@ def run_attack(config: DictConfig) -> None:
     attack_cfg = cfg["attack_config"]
     classifier_cfg = cfg["classifier_config"]
 
-    mia_performance_train, mia_performance_val, mia_performance_test = tartan_federer_attack(
-        model_type=attack_cfg["model_type"],
-        model_data_dir=Path(attack_cfg["models_base_dir"]),
-        target_model_subdir=Path(attack_cfg["target_shadow_model_subdir"]),
-        samples_per_train_model=attack_cfg["samples_per_train_model"],
-        sample_per_val_model=attack_cfg["samples_per_val_model"],
-        num_noise_per_time_step=attack_cfg["num_noise_per_time_step"],
-        timesteps=attack_cfg["timesteps"],
-        additional_timesteps=attack_cfg["additional_timesteps"],
-        predictions_file_format=attack_cfg["predictions_file_name"],
-        results_path=Path(attack_cfg["results_path"]),
-        test_indices=attack_cfg["test_indices"],
+    _mia_performance_train, _mia_performance_val, _mia_performance_test = tartan_federer_attack(
         train_indices=attack_cfg["train_indices"],
         val_indices=attack_cfg["val_indices"],
+        test_indices=attack_cfg["test_indices"],
         columns_for_deduplication=attack_cfg["columns_for_deduplication"],
-        classifier_hidden_dim=classifier_cfg["hidden_dim"],
+        timesteps=attack_cfg["timesteps"],
+        additional_timesteps=attack_cfg["additional_timesteps"],
+        num_noise_per_time_step=attack_cfg["num_noise_per_time_step"],
+        samples_per_train_model=attack_cfg["samples_per_train_model"],
+        samples_per_val_model=attack_cfg["samples_per_val_model"],
         classifier_num_epochs=classifier_cfg["num_epochs"],
+        classifier_hidden_dim=classifier_cfg["hidden_dim"],
         classifier_learning_rate=classifier_cfg["learning_rate"],
-        meta_dir=Path(config["data_paths"]["metadata_dir"]),
+        model_type=attack_cfg["model_type"],
+        predictions_file_name=attack_cfg["predictions_file_name"],
         population_data_dir=Path(data_cfg["population_data_path"]),
+        model_data_dir=Path(config["data_paths"]["midst_data_path"]),
+        meta_dir=Path(config["data_paths"]["metadata_dir"]),
+        target_model_subdir=Path(attack_cfg["target_shadow_model_subdir"]),
+        results_path=Path(attack_cfg["results_path"]),
     )
 
     unset_all_random_seeds()
diff --git a/src/midst_toolkit/attacks/tartan_federer/tartan_federer_attack.py b/src/midst_toolkit/attacks/tartan_federer/tartan_federer_attack.py
@@ -402,7 +402,7 @@ def train_tartan_federer_attack_classifier(
     additional_timesteps: list[int],
     num_noise_per_time_step: int,
     samples_per_train_model: int,
-    sample_per_val_model: int,
+    samples_per_val_model: int,
     classifier_num_epochs: int,
     classifier_hidden_dim: int,
     classifier_learning_rate: float,
@@ -427,7 +427,7 @@ def train_tartan_federer_attack_classifier(
         num_noise_per_time_step: Number of Gaussian noise samples to be used for each timestep in the loss computation.
         samples_per_train_model: Number of samples drawn from the training data (members) of train indices and
                                  non-members for training the binary classifier.
-        sample_per_val_model: Number of samples drawn from the training data (members) of validation indices and
+        samples_per_val_model: Number of samples drawn from the training data (members) of validation indices and
                               non-members for validating the binary classifier.
         classifier_num_epochs: Number of epochs used to train the MLP as the binary classifier.
         classifier_hidden_dim: The width of the 3-layer MLP trained as the binary classifier.
@@ -457,7 +457,7 @@ def train_tartan_federer_attack_classifier(
     y_train = np.zeros([total_data_num_for_train])
 
     if val_indices is not None:
-        total_data_num_for_validation = sample_per_val_model * 2 * len(val_indices)
+        total_data_num_for_validation = samples_per_val_model * 2 * len(val_indices)
         x_val = np.zeros([total_data_num_for_validation, input_dimension])
         y_val = np.zeros([total_data_num_for_validation])
     else:
@@ -488,7 +488,7 @@ def train_tartan_federer_attack_classifier(
                 model_dir,
                 population_df_for_validation,
                 columns_for_deduplication,
-                sample_per_val_model,
+                samples_per_val_model,
                 "data_for_validating_MIA.csv",
             )
 
@@ -528,7 +528,7 @@ def train_tartan_federer_attack_classifier(
                     timestep_count += 1
 
                 elif val_indices is not None and model_number in val_indices:
-                    batch_size = sample_per_val_model * 2
+                    batch_size = samples_per_val_model * 2
                     predictions = get_score(
                         model_dir,
                         model_path,
@@ -543,12 +543,12 @@ def train_tartan_federer_attack_classifier(
                     )
                     assert x_val is not None and y_val is not None
                     x_val[
-                        sample_per_val_model * 2 * val_count : sample_per_val_model * 2 * (val_count + 1),
+                        samples_per_val_model * 2 * val_count : samples_per_val_model * 2 * (val_count + 1),
                         timestep_count * num_noise_per_time_step : (timestep_count + 1) * num_noise_per_time_step,
                     ] = predictions.detach().squeeze().cpu().numpy()
 
-                    y_val[sample_per_val_model * 2 * val_count : sample_per_val_model * 2 * (val_count + 1)] = (
-                        np.concatenate([np.zeros(sample_per_val_model), np.ones(sample_per_val_model)])
+                    y_val[samples_per_val_model * 2 * val_count : samples_per_val_model * 2 * (val_count + 1)] = (
+                        np.concatenate([np.zeros(samples_per_val_model), np.ones(samples_per_val_model)])
                     )
 
                     timestep_count += 1
@@ -579,12 +579,12 @@ def tartan_federer_attack(
     additional_timesteps: list[int],
     num_noise_per_time_step: int,
     samples_per_train_model: int,
-    sample_per_val_model: int,
+    samples_per_val_model: int,
     classifier_num_epochs: int,
     classifier_hidden_dim: int,
     classifier_learning_rate: float,
     model_type: str,
-    predictions_file_format: str,
+    predictions_file_name: str,
     population_data_dir: Path,
     model_data_dir: Path,
     meta_dir: Path,
@@ -609,14 +609,14 @@ def tartan_federer_attack(
         num_noise_per_time_step: Number of Gaussian noise samples to be used for each timestep in the loss computation.
         samples_per_train_model: Number of samples drawn from the training data (members) of train indices and
                                  non-members for training the binary classifier.
-        sample_per_val_model: Number of samples drawn from the training data (members) of validation indices and
+        samples_per_val_model: Number of samples drawn from the training data (members) of validation indices and
                               non-members for validating the binary classifier.
         classifier_num_epochs: Number of epochs used to train the MLP as the binary classifier.
         classifier_hidden_dim: The width of the 3-layer MLP trained as the binary classifier.
         classifier_learning_rate: Learning rate used to train the binary classifier.
         population_data_dir: Directory containing the population datasets used to train and validate the attack.
         model_type: Type of diffusion model, e.g., "tabddpm" for ClavaDDPM-single-table.
-        predictions_file_format: Format for naming the MIA prediction files.
+        predictions_file_name: Format for naming the MIA prediction files.
         model_data_dir: Base directory containing all the trained diffusion models.
         meta_dir: Directory containing metadata about the datasets, including a file named `dataset_meta.json`.
         target_model_subdir: Sub-directory within each model directory containing the trained diffusion model
@@ -636,7 +636,7 @@ def tartan_federer_attack(
         val_indices=val_indices,
         columns_for_deduplication=columns_for_deduplication,
         samples_per_train_model=samples_per_train_model,
-        sample_per_val_model=sample_per_val_model,
+        samples_per_val_model=samples_per_val_model,
         population_data_dir=population_data_dir,
         model_type=model_type,
         model_data_dir=model_data_dir,
@@ -651,7 +651,7 @@ def tartan_federer_attack(
         classifier_learning_rate=classifier_learning_rate,
     )
 
-    predictions_file_name = f"{predictions_file_format}.csv"
+    predictions_file_name = f"{predictions_file_name}.csv"
 
     if val_indices is None:
         model_folders_indices = np.concatenate((train_indices, test_indices))
diff --git a/tests/integration/attacks/tartan_federer/test_tartan_federer_attack.py b/tests/integration/attacks/tartan_federer/test_tartan_federer_attack.py
@@ -28,11 +28,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit():
         "classifier_hidden_dim": 20,
         "classifier_num_epochs": 200,
         "samples_per_train_model": 3000,
-        "sample_per_val_model": 10,
+        "samples_per_val_model": 10,
         "num_noise_per_time_step": 30,
         "timesteps": [5, 10, 15],
         "additional_timesteps": [0],
-        "predictions_file_format": "challenge_label_predictions",
+        "predictions_file_name": "challenge_label_predictions",
         # TODO: Make results path a temp directory
         "results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
         "test_indices": [5, 6],
@@ -83,11 +83,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit_single_model():
         "classifier_hidden_dim": 100,
         "classifier_num_epochs": 200,
         "samples_per_train_model": 3000,
-        "sample_per_val_model": 10,
+        "samples_per_val_model": 10,
         "num_noise_per_time_step": 30,
         "timesteps": [5, 10, 15],
         "additional_timesteps": [0],
-        "predictions_file_format": "challenge_label_predictions",
+        "predictions_file_name": "challenge_label_predictions",
         # TODO: Make results path a temp directory
         "results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
         "test_indices": [3],
@@ -138,11 +138,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit_no_validation():
         "classifier_hidden_dim": 100,
         "classifier_num_epochs": 200,
         "samples_per_train_model": 3000,
-        "sample_per_val_model": 10,
+        "samples_per_val_model": 10,
         "num_noise_per_time_step": 30,
         "timesteps": [5, 10, 15],
         "additional_timesteps": [0],
-        "predictions_file_format": "challenge_label_predictions",
+        "predictions_file_name": "challenge_label_predictions",
         # TODO: Make results path a temp directory
         "results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
         "test_indices": [2],