@@ -411,14 +411,22 @@ def _get_data(q, o, how_many, offset, model_settings, loss, overlapped_prefix,
411411 shiftby , mode , use_audio , use_video , video_findfile ,
412412 data_index , labels_list , np_rng , data_dir ,
413413 audio_read_plugin , audio_read_plugin_kwargs ,
414- video_read_plugin , video_read_plugin_kwargs ):
414+ video_read_plugin , video_read_plugin_kwargs ,
415+ augmentation_plugin , augmentation_parameters ):
415416 q .cancel_join_thread ()
416417
417418 from lib import compute_background , load_audio_read_plugin , load_video_read_plugin
418419 load_audio_read_plugin (audio_read_plugin , audio_read_plugin_kwargs )
419420 load_video_read_plugin (video_read_plugin , video_read_plugin_kwargs )
420421 from lib import audio_read , video_read
421422
423+ if use_audio and mode == 'training' :
424+ import importlib
425+ sys .path .insert (0 ,os .path .dirname (augmentation_plugin ))
426+ tmp = importlib .import_module (os .path .basename (augmentation_plugin ))
427+ def augment (audio_slice , augmentation_parameters ):
428+ return tmp .augment (audio_slice , augmentation_parameters )
429+
422430 while True :
423431 # Pick one of the partitions to choose sounds from.
424432 pick_deterministically = mode != 'training'
@@ -498,29 +506,8 @@ def _get_data(q, o, how_many, offset, model_settings, loss, overlapped_prefix,
498506 labels [i - offset , labels_list .index (root )] = target
499507 sounds [- 1 ].append ({k : v for k ,v in overlapped_sound .items () if k != 'overlaps' })
500508
501- # augmentation
502509 if use_audio and mode == 'training' :
503- volume_range = [float (x ) for x in model_settings ['augment_volume' ].split (',' )]
504- noise_range = [float (x ) for x in model_settings ['augment_noise' ].split (',' )]
505- dc_range = [float (x ) for x in model_settings ['augment_dc' ].split (',' )]
506- reverse_bool = model_settings ['augment_reverse' ] == 'yes'
507- invert_bool = model_settings ['augment_invert' ] == 'yes'
508- if volume_range != [1 ,1 ]:
509- volume_ranges = np .random .uniform (* volume_range , (nsounds ,1 ,audio_nchannels ))
510- audio_slice *= volume_ranges
511- if noise_range != [0 ,0 ]:
512- noise_ranges = np .random .uniform (* noise_range , (nsounds ,1 ,audio_nchannels ))
513- noises = np .random .normal (0 , noise_ranges , audio_slice .shape )
514- audio_slice += noises
515- if dc_range != [0 ,0 ]:
516- dc_ranges = np .random .uniform (* dc_range , (nsounds ,1 ,audio_nchannels ))
517- audio_slice += dc_ranges
518- if reverse_bool :
519- ireverse = np .random .choice ([False ,True ], nsounds )
520- audio_slice [ireverse ] = np .flip (audio_slice [ireverse ], axis = 1 )
521- if invert_bool :
522- iinvert = np .random .choice ([- 1 ,1 ], (nsounds ,1 ,1 ))
523- audio_slice *= iinvert
510+ audio_slice = augment (audio_slice , augmentation_parameters )
524511
525512 if loss == 'autoencoder' :
526513 labels = audio_slice
@@ -533,7 +520,8 @@ def _get_data(q, o, how_many, offset, model_settings, loss, overlapped_prefix,
533520 q .put ([video_slice , labels , sounds ])
534521
535522def get_data (how_many , offset , model_settings , loss , overlapped_prefix ,
536- shiftby , mode , use_audio , use_video , video_findfile ):
523+ shiftby , mode , use_audio , use_video , video_findfile ,
524+ augmentation_plugin , augmentation_parameters ):
537525 """Gather sounds from the data set, applying transformations as needed.
538526
539527 When the mode is 'training', a random selection of sounds will be returned,
@@ -575,7 +563,8 @@ def get_data(how_many, offset, model_settings, loss, overlapped_prefix,
575563 mode , use_audio , use_video , video_findfile ,
576564 data_index , labels_list , np_rng , data_dir ,
577565 audio_read_plugin , audio_read_plugin_kwargs ,
578- video_read_plugin , video_read_plugin_kwargs ),
566+ video_read_plugin , video_read_plugin_kwargs ,
567+ augmentation_plugin , augmentation_parameters ),
579568 daemon = True )
580569 p .start ()
581570 processes [mode ].append (p )
0 commit comments