Skip to content

Commit 01572d3

Browse files
committed
Cleaning
1 parent 34d81ba commit 01572d3

File tree

3 files changed

+17
-14
lines changed

3 files changed

+17
-14
lines changed

examples/ensemble_attack/configs/experiment_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ data_processing_config:
5050
original_population_data_path: /projects/midst-experiments/ensemble_attack/competition/population_data/ #Attack's collected population for DOMIAS
5151
# The column name in the data to be used for stratified splitting.
5252
column_to_stratify: "trans_type" # Attention: This value is not documented in the original codebase.
53-
folder_ranges: #Specify folder ranges for any of the mentioned splits.
53+
folder_ranges: # Specify folder ranges for any of the mentioned splits.
5454
train: [[1, 21]] # Folders to be used for train data collection in the experiments
5555
test: [[21, 31] , [31, 41]]
5656
# File names in MIDST data directories.

examples/ensemble_attack/real_data_collection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ def collect_population_data_ensemble(
165165
save_dir: The path where the collected population data should be saved.
166166
original_repo_population: The original population data collected from the MIDST challenge repository.
167167
population_splits: A list indicating the data splits to be collected for population data.
168-
This is basically a list of strings containing the folder names under attack folders that are
168+
This is a list of strings containing the folder names under attack folders that are
169169
considered for population collection. If None, the default list of ``["train"]`` is set in the
170170
function based on the original attack implementation.
171171
challenge_splits: A list indicating the data splits to be collected for challenge points.
172-
This is basically a list of strings containing the folder names under attack folders that are
172+
This is a list of strings containing the folder names under attack folders that are
173173
considered for challenge data collection. If None, the default list of ``["train", "dev", "final"]``
174174
is set in the function based on the original attack implementation.
175175

examples/ensemble_attack/test_attack_model.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def extract_and_drop_id_column(
6464
# Extract ID column from the dataframe
6565
with open(data_types_file_path, "r") as f:
6666
column_types = json.load(f)
67+
68+
assert "id_column_name" in column_types, f"{data_types_file_path} must contain 'id_column_name' key."
6769
id_column_name = column_types["id_column_name"]
6870

6971
assert id_column_name in data_frame.columns, f"Dataframe must have {id_column_name} column"
@@ -160,7 +162,7 @@ def collect_challenge_and_train_data(
160162
df_challenge_experiment = collect_midst_data(
161163
midst_data_input_dir=targets_data_path,
162164
attack_types=challenge_attack_types,
163-
data_splits=["test"], # change to test for 10k, and change to final for 20k
165+
data_splits=["test"], # For ensemble experiments, change to ``test`` for 10k, and change to ``final`` for 20k
164166
dataset="challenge",
165167
data_processing_config=data_processing_config,
166168
)
@@ -191,9 +193,9 @@ def select_challenge_data_for_training(
191193
Args:
192194
attack_rmia_shadow_training_data_choice: Strategy for creating challenge train data for RMIA shadow training.
193195
It can be one of the following:
194-
- "only_challenge": Use only challenge experiment data.
195-
- "only_train": Use only master train data. Note that this option contracts with the original
196-
design and purpose of training RMIA shadow models on the challenge points as
196+
- "only_challenge": Use only challenge experiment data (``df_challenge_experiment``).
197+
- "only_train": Use only master train data (``df_master_train``). Note that this option contracts
198+
with the original design and purpose of training RMIA shadow models on the challenge points as
197199
RMIA signals (IN train signals) for challenge points could only be computed if
198200
shadow models are trained on these points.
199201
- "combined": Combine both challenge experiment data and master train data. This can
@@ -255,13 +257,14 @@ def run_metaclassifier_testing(
255257
config: DictConfig,
256258
) -> None:
257259
"""
258-
Function to run the attack on a target model using a trained metaclassifier.
259-
Note that RMIA shadow models need to be trained for every new target model's challenge dataset.
260-
However, we load the previously trained metaclassifier model and use it for new target models.
261-
Unlike the training phase, in the testing phase, we don't need to train a shadow target model
260+
Function to run the attack on a single target model using a trained metaclassifier.
261+
Note that RMIA shadow models need to be trained for every new set of target models on
262+
their collected challenge data, but once they are trained for the first target, we can reuse them
263+
for the other targets in the same experiment.
264+
Unlike the training phase, in the testing phase, we don't need to train a target shadow model
262265
since we already have access to the synthetic data of a real target model.
263266
All the collected population data that is used for training, is still needed during testing to compute some
264-
of the signals.
267+
of the signals (DOMIAS).
265268
Test prediction probabilities are saved to the specified attack result path in the config.
266269
267270
Args:
@@ -289,7 +292,7 @@ def run_metaclassifier_testing(
289292

290293
# 2) Read target model's challenge data and synthetic data.
291294
# Back-box attacker has only access to the target model's synthetic data and challenge points.
292-
# We also load challenge labels to report the attack performance.
295+
# We also load challenge labels to report the attack performance at the end.
293296
challenge_data_path = Path(config.target_model.challenge_data_path)
294297
challenge_label_path = Path(config.target_model.challenge_label_path)
295298

@@ -319,7 +322,7 @@ def run_metaclassifier_testing(
319322
shadow_data_paths = [Path(path) for path in config.shadow_training.final_shadow_models_path]
320323
assert len(shadow_data_paths) == 3, "The attack_data_paths list must contain exactly three elements."
321324

322-
# If shadows are already trained for test (models_exists is True), don't need to train again.
325+
# If shadows are already trained for test (``models_exists`` is True), don't need to train again.
323326
# Load shadow training collection from previously trained shadow models.
324327
shadow_data_collection, models_exists = load_trained_rmia_shadows_for_test_phase(shadow_data_paths)
325328

0 commit comments

Comments
 (0)