@@ -29,7 +29,7 @@ def train_fine_tuned_shadow_models(
2929 table_name : str ,
3030 id_column_name : str ,
3131 pre_training_data_size : int = 60000 ,
32- synthetic_data_size : int = 20000 ,
32+ number_of_points_to_synthesize : int = 20000 ,
3333 init_data_seed : int | None = None ,
3434 random_seed : int | None = None ,
3535) -> Path :
@@ -72,7 +72,8 @@ def train_fine_tuned_shadow_models(
7272 table_name: Name of the main table to be used for training the TabDDPM model.
7373 id_column_name: Name of the ID column in the data.
7474 pre_training_data_size: Size of the initial training set, defaults to 60,000.
75- synthetic_data_size: Size of the synthetic data to be generated by each shadow model, defaults to 20,000.
75+ number_of_points_to_synthesize: Size of the synthetic data to be generated by each shadow model,
76+ defaults to 20,000.
7677 init_data_seed: Random seed for the initial training set.
7778 random_seed: Random seed used for reproducibility, defaults to None.
7879
@@ -136,7 +137,10 @@ def train_fine_tuned_shadow_models(
136137 f"Initial model with ID { init_model_id } trained and saved at { initial_model_path } ." ,
137138 )
138139 else :
139- log (INFO , f"Initial model with ID { init_model_id } already exists, loading it from disk." )
140+ log (
141+ INFO ,
142+ f"Initial model with ID { init_model_id } already exists, loading it from disk." ,
143+ )
140144 with open (initial_model_path , "rb" ) as f :
141145 initial_model_training_results = pickle .load (f )
142146
@@ -173,10 +177,13 @@ def train_fine_tuned_shadow_models(
173177 fine_tuning_diffusion_iterations = fine_tuning_config .fine_tune_diffusion_iterations ,
174178 fine_tuning_classifier_iterations = fine_tuning_config .fine_tune_classifier_iterations ,
175179 synthesize = True ,
176- synthetic_data_size = synthetic_data_size ,
180+ number_of_points_to_synthesize = number_of_points_to_synthesize ,
177181 )
178182 assert train_result .synthetic_data is not None , "Fine-tuned models should generate synthetic data."
179- log (INFO , f"Fine-tuned model { model_id } generated { len (train_result .synthetic_data )} synthetic samples." )
183+ log (
184+ INFO ,
185+ f"Fine-tuned model { model_id } generated { len (train_result .synthetic_data )} synthetic samples." ,
186+ )
180187 attack_data ["fine_tuned_results" ].append (train_result )
181188
182189 # Pickle dump the results
@@ -195,7 +202,7 @@ def train_shadow_on_half_challenge_data(
195202 training_json_config_paths : DictConfig ,
196203 table_name : str ,
197204 id_column_name : str ,
198- synthetic_data_size : int = 20000 ,
205+ number_of_points_to_synthesize : int = 20000 ,
199206 random_seed : int | None = None ,
200207) -> Path :
201208 """
@@ -219,7 +226,8 @@ def train_shadow_on_half_challenge_data(
219226 - tabddpm_training_config_path (str): Path to table's training config json file.
220227 table_name: Name of the main table to be used for training the TabDDPM model.
221228 id_column_name: Name of the ID column in the data.
222- synthetic_data_size: Size of the synthetic data to be generated by each shadow model, defaults to 20,000.
229+ number_of_points_to_synthesize: Size of the synthetic data to be generated by each shadow model,
230+ defaults to 20,000.
223231 random_seed: Random seed used for reproducibility, defaults to None.
224232
225233 Returns:
@@ -235,7 +243,8 @@ def train_shadow_on_half_challenge_data(
235243 selected_id_lists : list [list [int ]] = [[] for _ in range (n_models )]
236244 # Assign each unique_id to half of the random lists
237245 for uid in unique_ids :
238- selected_lists = random .sample (range (n_models ), half_models ) # Select 2 random list indices
246+ # Select 2 random list indices
247+ selected_lists = random .sample (range (n_models ), half_models )
239248 for idx in selected_lists :
240249 selected_id_lists [idx ].append (uid )
241250
@@ -279,10 +288,13 @@ def train_shadow_on_half_challenge_data(
279288 configs ,
280289 save_dir ,
281290 synthesize = True ,
282- synthetic_data_size = synthetic_data_size ,
291+ number_of_points_to_synthesize = number_of_points_to_synthesize ,
283292 )
284293 assert train_result .synthetic_data is not None , "Trained shadow model did not generate synthetic data."
285- log (INFO , f"Trained shadow model { model_id } generated { len (train_result .synthetic_data )} synthetic samples." )
294+ log (
295+ INFO ,
296+ f"Trained shadow model { model_id } generated { len (train_result .synthetic_data )} synthetic samples." ,
297+ )
286298
287299 attack_data ["trained_results" ].append (train_result )
288300
@@ -304,7 +316,7 @@ def train_three_sets_of_shadow_models(
304316 id_column_name : str ,
305317 n_models_per_set : int = 4 ,
306318 n_reps : int = 12 ,
307- synthetic_data_size : int = 20000 ,
319+ number_of_points_to_synthesize : int = 20000 ,
308320 random_seed : int | None = None ,
309321) -> tuple [Path , Path , Path ]:
310322 """
@@ -352,7 +364,8 @@ def train_three_sets_of_shadow_models(
352364 id_column_name: Name of the ID column in the data.
353365 n_models_per_set: Number of shadow models to train by each approach. Must be an even number. Defaults to 4.
354366 n_reps: Number of repetitions for each challenge point in the fine-tuning or training sets, defaults to 12.
355- synthetic_data_size: Size of the synthetic data to be generated by each shadow model, defaults to 20,000.
367+ number_of_points_to_synthesize: Size of the synthetic data to be generated by each shadow model,
368+ defaults to 20,000.
356369 random_seed: Random seed used for reproducibility, defaults to None.
357370
358371 Returns:
@@ -376,7 +389,7 @@ def train_three_sets_of_shadow_models(
376389 table_name = table_name ,
377390 id_column_name = id_column_name ,
378391 pre_training_data_size = fine_tuning_config .pre_train_data_size ,
379- synthetic_data_size = synthetic_data_size ,
392+ number_of_points_to_synthesize = number_of_points_to_synthesize ,
380393 init_data_seed = random_seed ,
381394 random_seed = random_seed ,
382395 )
@@ -399,7 +412,7 @@ def train_three_sets_of_shadow_models(
399412 table_name = table_name ,
400413 id_column_name = id_column_name ,
401414 pre_training_data_size = fine_tuning_config .pre_train_data_size ,
402- synthetic_data_size = synthetic_data_size ,
415+ number_of_points_to_synthesize = number_of_points_to_synthesize ,
403416 # Setting a different seed for the second train set
404417 init_data_seed = random_seed + 1 if random_seed is not None else None ,
405418 random_seed = random_seed ,
@@ -418,7 +431,7 @@ def train_three_sets_of_shadow_models(
418431 training_json_config_paths = training_json_config_paths ,
419432 table_name = table_name ,
420433 id_column_name = id_column_name ,
421- synthetic_data_size = synthetic_data_size ,
434+ number_of_points_to_synthesize = number_of_points_to_synthesize ,
422435 random_seed = random_seed ,
423436 )
424437 log (
0 commit comments