@@ -64,6 +64,8 @@ def extract_and_drop_id_column(
6464 # Extract ID column from the dataframe
6565 with open (data_types_file_path , "r" ) as f :
6666 column_types = json .load (f )
67+
68+ assert "id_column_name" in column_types , f"{ data_types_file_path } must contain 'id_column_name' key."
6769 id_column_name = column_types ["id_column_name" ]
6870
6971 assert id_column_name in data_frame .columns , f"Dataframe must have { id_column_name } column"
@@ -160,7 +162,7 @@ def collect_challenge_and_train_data(
160162 df_challenge_experiment = collect_midst_data (
161163 midst_data_input_dir = targets_data_path ,
162164 attack_types = challenge_attack_types ,
163- data_splits = ["test" ], # change to test for 10k, and change to final for 20k
165+ data_splits = ["test" ], # For ensemble experiments, change to `` test`` for 10k, and change to `` final`` for 20k
164166 dataset = "challenge" ,
165167 data_processing_config = data_processing_config ,
166168 )
@@ -191,9 +193,9 @@ def select_challenge_data_for_training(
191193 Args:
192194 attack_rmia_shadow_training_data_choice: Strategy for creating challenge train data for RMIA shadow training.
193195 It can be one of the following:
194- - "only_challenge": Use only challenge experiment data.
195- - "only_train": Use only master train data. Note that this option contracts with the original
196- design and purpose of training RMIA shadow models on the challenge points as
196+ - "only_challenge": Use only challenge experiment data (``df_challenge_experiment``) .
197+ - "only_train": Use only master train data (``df_master_train``) . Note that this option contracts
198+ with the original design and purpose of training RMIA shadow models on the challenge points as
197199 RMIA signals (IN train signals) for challenge points could only be computed if
198200 shadow models are trained on these points.
199201 - "combined": Combine both challenge experiment data and master train data. This can
@@ -255,13 +257,14 @@ def run_metaclassifier_testing(
255257 config : DictConfig ,
256258) -> None :
257259 """
258- Function to run the attack on a target model using a trained metaclassifier.
259- Note that RMIA shadow models need to be trained for every new target model's challenge dataset.
260- However, we load the previously trained metaclassifier model and use it for new target models.
261- Unlike the training phase, in the testing phase, we don't need to train a shadow target model
260+ Function to run the attack on a single target model using a trained metaclassifier.
261+ Note that RMIA shadow models need to be trained for every new set of target models on
262+ their collected challenge data, but once they are trained for the first target, we can reuse them
263+ for the other targets in the same experiment.
264+ Unlike the training phase, in the testing phase, we don't need to train a target shadow model
262265 since we already have access to the synthetic data of a real target model.
263266 All the collected population data that is used for training, is still needed during testing to compute some
264- of the signals.
267+ of the signals (DOMIAS) .
265268 Test prediction probabilities are saved to the specified attack result path in the config.
266269
267270 Args:
@@ -289,7 +292,7 @@ def run_metaclassifier_testing(
289292
290293 # 2) Read target model's challenge data and synthetic data.
291294 # Back-box attacker has only access to the target model's synthetic data and challenge points.
292- # We also load challenge labels to report the attack performance.
295+ # We also load challenge labels to report the attack performance at the end .
293296 challenge_data_path = Path (config .target_model .challenge_data_path )
294297 challenge_label_path = Path (config .target_model .challenge_label_path )
295298
@@ -319,7 +322,7 @@ def run_metaclassifier_testing(
319322 shadow_data_paths = [Path (path ) for path in config .shadow_training .final_shadow_models_path ]
320323 assert len (shadow_data_paths ) == 3 , "The attack_data_paths list must contain exactly three elements."
321324
322- # If shadows are already trained for test (models_exists is True), don't need to train again.
325+ # If shadows are already trained for test (`` models_exists`` is True), don't need to train again.
323326 # Load shadow training collection from previously trained shadow models.
324327 shadow_data_collection , models_exists = load_trained_rmia_shadows_for_test_phase (shadow_data_paths )
325328
0 commit comments