Skip to content

Commit 230bc15

Browse files
committed
var name changes for consistency
1 parent 0670e55 commit 230bc15

File tree

5 files changed

+40
-90
lines changed

5 files changed

+40
-90
lines changed

examples/tartan_federer_attack/README.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,14 @@
22

33
This example runs a Tartan–Federer membership inference attack using trained TabDDPM models. The pipeline optionally performs a data processing step to prepare population datasets for tratining and validating the attack and then executes the attack using the trained classifier.
44

5-
##
65

76
## Data Processing
87

9-
#TODO: Train 30 target models with real data and synthetic data in the same way of the MIDST # competition and store them under attack_config.models_base_dir. Upload them to a google # drive and add the link here.
10-
The data processing step constructs population datasets resembling the real data available to the attacker. A selected subset of `train_with_id.csv` files is collected from `tabddpm_1` to `tabddpm_6` located under:
8+
#TODO: Train 30 target models with real data and synthetic data in the same way of the MIDST # competition Upload them to a google # drive and add the link here. Currently, we only have 6.
119

12-
```
13-
examples/tartan_federer_attack/tabddpm_trained_with_20k/tabddpm_white_box
14-
```
10+
Download the folder from `https://drive.google.com/uc?export=download&id=12gzxNzFzKCF13IzJjZdk3Ba5XTaIrLjO` and store them under `data_paths.midst_data_path`. The data processing step constructs population datasets used for training the attacks, resembling the real data available to the attacker using the training data correspoinding to each available target model.
1511

16-
For each selected model, both `train_with_id.csv` and `challenge_with_id.csv` are loaded. All training datasets are merged into a single dataframe and all challenge datasets are merged into a single dataframe. Any training samples that also appear in the challenge dataset are removed, and duplicate samples are dropped based on configured identifier columns.
12+
For each selected folder, both `train_with_id.csv` and `challenge_with_id.csv` are loaded. All training datasets are merged into a single dataframe and all challenge datasets are merged into a single dataframe. Any training samples that also appear in the challenge dataset are removed, and duplicate samples are dropped based on configured identifier columns.
1713

1814
The model indices used to build the population datasets for training and validation are specified in the configuration file:
1915

examples/tartan_federer_attack/configs/experiment_config.yaml

Lines changed: 0 additions & 47 deletions
This file was deleted.

examples/tartan_federer_attack/run_attack.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,13 @@ def run_data_processing(config: dict[str, Any]) -> None:
8080
log(INFO, "Running data processing pipeline...")
8181

8282
population_data_path = Path(config["data_paths"]["population_data_path"])
83+
midst_data_path = Path(config["data_paths"]["midst_data_path"])
8384
population_data_path.mkdir(parents=True, exist_ok=True)
8485

8586
population_data_for_training_attack = prepare_population_dataset_for_attack(
8687
model_indices=config["data_processing_config"]["population_attack_indices_to_collect_for_training"],
8788
model_type=config["data_processing_config"]["model_type"],
88-
models_base_dir=Path(config["data_paths"]["midst_data_path"]),
89+
models_base_dir=midst_data_path,
8990
columns_for_deduplication=config["data_processing_config"]["columns_for_deduplication"],
9091
)
9192

@@ -97,7 +98,7 @@ def run_data_processing(config: dict[str, Any]) -> None:
9798
population_data_for_validating_attack = prepare_population_dataset_for_attack(
9899
model_indices=config["data_processing_config"]["population_attack_indices_to_collect_for_validation"],
99100
model_type=config["data_processing_config"]["model_type"],
100-
models_base_dir=Path(config["data_paths"]["midst_data_path"]),
101+
models_base_dir=midst_data_path,
101102
columns_for_deduplication=config["data_processing_config"]["columns_for_deduplication"],
102103
)
103104

@@ -134,26 +135,26 @@ def run_attack(config: DictConfig) -> None:
134135
attack_cfg = cfg["attack_config"]
135136
classifier_cfg = cfg["classifier_config"]
136137

137-
mia_performance_train, mia_performance_val, mia_performance_test = tartan_federer_attack(
138-
model_type=attack_cfg["model_type"],
139-
model_data_dir=Path(attack_cfg["models_base_dir"]),
140-
target_model_subdir=Path(attack_cfg["target_shadow_model_subdir"]),
141-
samples_per_train_model=attack_cfg["samples_per_train_model"],
142-
sample_per_val_model=attack_cfg["samples_per_val_model"],
143-
num_noise_per_time_step=attack_cfg["num_noise_per_time_step"],
144-
timesteps=attack_cfg["timesteps"],
145-
additional_timesteps=attack_cfg["additional_timesteps"],
146-
predictions_file_format=attack_cfg["predictions_file_name"],
147-
results_path=Path(attack_cfg["results_path"]),
148-
test_indices=attack_cfg["test_indices"],
138+
_mia_performance_train, _mia_performance_val, _mia_performance_test = tartan_federer_attack(
149139
train_indices=attack_cfg["train_indices"],
150140
val_indices=attack_cfg["val_indices"],
141+
test_indices=attack_cfg["test_indices"],
151142
columns_for_deduplication=attack_cfg["columns_for_deduplication"],
152-
classifier_hidden_dim=classifier_cfg["hidden_dim"],
143+
timesteps=attack_cfg["timesteps"],
144+
additional_timesteps=attack_cfg["additional_timesteps"],
145+
num_noise_per_time_step=attack_cfg["num_noise_per_time_step"],
146+
samples_per_train_model=attack_cfg["samples_per_train_model"],
147+
samples_per_val_model=attack_cfg["samples_per_val_model"],
153148
classifier_num_epochs=classifier_cfg["num_epochs"],
149+
classifier_hidden_dim=classifier_cfg["hidden_dim"],
154150
classifier_learning_rate=classifier_cfg["learning_rate"],
155-
meta_dir=Path(config["data_paths"]["metadata_dir"]),
151+
model_type=attack_cfg["model_type"],
152+
predictions_file_name=attack_cfg["predictions_file_name"],
156153
population_data_dir=Path(data_cfg["population_data_path"]),
154+
model_data_dir=Path(config["data_paths"]["midst_data_path"]),
155+
meta_dir=Path(config["data_paths"]["metadata_dir"]),
156+
target_model_subdir=Path(attack_cfg["target_shadow_model_subdir"]),
157+
results_path=Path(attack_cfg["results_path"]),
157158
)
158159

159160
unset_all_random_seeds()

src/midst_toolkit/attacks/tartan_federer/tartan_federer_attack.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def train_tartan_federer_attack_classifier(
402402
additional_timesteps: list[int],
403403
num_noise_per_time_step: int,
404404
samples_per_train_model: int,
405-
sample_per_val_model: int,
405+
samples_per_val_model: int,
406406
classifier_num_epochs: int,
407407
classifier_hidden_dim: int,
408408
classifier_learning_rate: float,
@@ -427,7 +427,7 @@ def train_tartan_federer_attack_classifier(
427427
num_noise_per_time_step: Number of Gaussian noise samples to be used for each timestep in the loss computation.
428428
samples_per_train_model: Number of samples drawn from the training data (members) of train indices and
429429
non-members for training the binary classifier.
430-
sample_per_val_model: Number of samples drawn from the training data (members) of validation indices and
430+
samples_per_val_model: Number of samples drawn from the training data (members) of validation indices and
431431
non-members for validating the binary classifier.
432432
classifier_num_epochs: Number of epochs used to train the MLP as the binary classifier.
433433
classifier_hidden_dim: The width of the 3-layer MLP trained as the binary classifier.
@@ -457,7 +457,7 @@ def train_tartan_federer_attack_classifier(
457457
y_train = np.zeros([total_data_num_for_train])
458458

459459
if val_indices is not None:
460-
total_data_num_for_validation = sample_per_val_model * 2 * len(val_indices)
460+
total_data_num_for_validation = samples_per_val_model * 2 * len(val_indices)
461461
x_val = np.zeros([total_data_num_for_validation, input_dimension])
462462
y_val = np.zeros([total_data_num_for_validation])
463463
else:
@@ -488,7 +488,7 @@ def train_tartan_federer_attack_classifier(
488488
model_dir,
489489
population_df_for_validation,
490490
columns_for_deduplication,
491-
sample_per_val_model,
491+
samples_per_val_model,
492492
"data_for_validating_MIA.csv",
493493
)
494494

@@ -528,7 +528,7 @@ def train_tartan_federer_attack_classifier(
528528
timestep_count += 1
529529

530530
elif val_indices is not None and model_number in val_indices:
531-
batch_size = sample_per_val_model * 2
531+
batch_size = samples_per_val_model * 2
532532
predictions = get_score(
533533
model_dir,
534534
model_path,
@@ -543,12 +543,12 @@ def train_tartan_federer_attack_classifier(
543543
)
544544
assert x_val is not None and y_val is not None
545545
x_val[
546-
sample_per_val_model * 2 * val_count : sample_per_val_model * 2 * (val_count + 1),
546+
samples_per_val_model * 2 * val_count : samples_per_val_model * 2 * (val_count + 1),
547547
timestep_count * num_noise_per_time_step : (timestep_count + 1) * num_noise_per_time_step,
548548
] = predictions.detach().squeeze().cpu().numpy()
549549

550-
y_val[sample_per_val_model * 2 * val_count : sample_per_val_model * 2 * (val_count + 1)] = (
551-
np.concatenate([np.zeros(sample_per_val_model), np.ones(sample_per_val_model)])
550+
y_val[samples_per_val_model * 2 * val_count : samples_per_val_model * 2 * (val_count + 1)] = (
551+
np.concatenate([np.zeros(samples_per_val_model), np.ones(samples_per_val_model)])
552552
)
553553

554554
timestep_count += 1
@@ -579,12 +579,12 @@ def tartan_federer_attack(
579579
additional_timesteps: list[int],
580580
num_noise_per_time_step: int,
581581
samples_per_train_model: int,
582-
sample_per_val_model: int,
582+
samples_per_val_model: int,
583583
classifier_num_epochs: int,
584584
classifier_hidden_dim: int,
585585
classifier_learning_rate: float,
586586
model_type: str,
587-
predictions_file_format: str,
587+
predictions_file_name: str,
588588
population_data_dir: Path,
589589
model_data_dir: Path,
590590
meta_dir: Path,
@@ -609,14 +609,14 @@ def tartan_federer_attack(
609609
num_noise_per_time_step: Number of Gaussian noise samples to be used for each timestep in the loss computation.
610610
samples_per_train_model: Number of samples drawn from the training data (members) of train indices and
611611
non-members for training the binary classifier.
612-
sample_per_val_model: Number of samples drawn from the training data (members) of validation indices and
612+
samples_per_val_model: Number of samples drawn from the training data (members) of validation indices and
613613
non-members for validating the binary classifier.
614614
classifier_num_epochs: Number of epochs used to train the MLP as the binary classifier.
615615
classifier_hidden_dim: The width of the 3-layer MLP trained as the binary classifier.
616616
classifier_learning_rate: Learning rate used to train the binary classifier.
617617
population_data_dir: Directory containing the population datasets used to train and validate the attack.
618618
model_type: Type of diffusion model, e.g., "tabddpm" for ClavaDDPM-single-table.
619-
predictions_file_format: Format for naming the MIA prediction files.
619+
predictions_file_name: Format for naming the MIA prediction files.
620620
model_data_dir: Base directory containing all the trained diffusion models.
621621
meta_dir: Directory containing metadata about the datasets, including a file named `dataset_meta.json`.
622622
target_model_subdir: Sub-directory within each model directory containing the trained diffusion model
@@ -636,7 +636,7 @@ def tartan_federer_attack(
636636
val_indices=val_indices,
637637
columns_for_deduplication=columns_for_deduplication,
638638
samples_per_train_model=samples_per_train_model,
639-
sample_per_val_model=sample_per_val_model,
639+
samples_per_val_model=samples_per_val_model,
640640
population_data_dir=population_data_dir,
641641
model_type=model_type,
642642
model_data_dir=model_data_dir,
@@ -651,7 +651,7 @@ def tartan_federer_attack(
651651
classifier_learning_rate=classifier_learning_rate,
652652
)
653653

654-
predictions_file_name = f"{predictions_file_format}.csv"
654+
predictions_file_name = f"{predictions_file_name}.csv"
655655

656656
if val_indices is None:
657657
model_folders_indices = np.concatenate((train_indices, test_indices))

tests/integration/attacks/tartan_federer/test_tartan_federer_attack.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit():
2828
"classifier_hidden_dim": 20,
2929
"classifier_num_epochs": 200,
3030
"samples_per_train_model": 3000,
31-
"sample_per_val_model": 10,
31+
"samples_per_val_model": 10,
3232
"num_noise_per_time_step": 30,
3333
"timesteps": [5, 10, 15],
3434
"additional_timesteps": [0],
35-
"predictions_file_format": "challenge_label_predictions",
35+
"predictions_file_name": "challenge_label_predictions",
3636
# TODO: Make results path a temp directory
3737
"results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
3838
"test_indices": [5, 6],
@@ -83,11 +83,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit_single_model():
8383
"classifier_hidden_dim": 100,
8484
"classifier_num_epochs": 200,
8585
"samples_per_train_model": 3000,
86-
"sample_per_val_model": 10,
86+
"samples_per_val_model": 10,
8787
"num_noise_per_time_step": 30,
8888
"timesteps": [5, 10, 15],
8989
"additional_timesteps": [0],
90-
"predictions_file_format": "challenge_label_predictions",
90+
"predictions_file_name": "challenge_label_predictions",
9191
# TODO: Make results path a temp directory
9292
"results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
9393
"test_indices": [3],
@@ -138,11 +138,11 @@ def test_tf_attack_whitebox_tiny_config_midst_toolkit_no_validation():
138138
"classifier_hidden_dim": 100,
139139
"classifier_num_epochs": 200,
140140
"samples_per_train_model": 3000,
141-
"sample_per_val_model": 10,
141+
"samples_per_val_model": 10,
142142
"num_noise_per_time_step": 30,
143143
"timesteps": [5, 10, 15],
144144
"additional_timesteps": [0],
145-
"predictions_file_format": "challenge_label_predictions",
145+
"predictions_file_name": "challenge_label_predictions",
146146
# TODO: Make results path a temp directory
147147
"results_path": Path(__file__).parent / "assets" / "tartan_federer_attack_results",
148148
"test_indices": [2],

0 commit comments

Comments
 (0)