@@ -352,17 +352,17 @@ def predict(
352352 """
353353 import torch # lgtm [py/repeated-import]
354354
355- x_in = np .empty (len (x ), dtype = object )
356- x_in [:] = list (x )
355+ # Apply preprocessing
356+ x_preprocessed , _ = self ._apply_preprocessing (x , y = None , fit = False )
357+
358+ x_in = np .empty (len (x_preprocessed ), dtype = object )
359+ x_in [:] = list (x_preprocessed )
357360
358361 # Put the model in the eval mode
359362 self ._model .eval ()
360363
361- # Apply preprocessing
362- x_preprocessed , _ = self ._apply_preprocessing (x_in , y = None , fit = False )
363-
364364 # Transform x into the model input space
365- inputs , _ , input_rates , _ , batch_idx = self ._transform_model_input (x = x_preprocessed )
365+ inputs , _ , input_rates , _ , batch_idx = self ._transform_model_input (x = x_in )
366366
367367 # Compute real input sizes
368368 input_sizes = input_rates .mul_ (inputs .size ()[- 1 ]).int ()
@@ -437,21 +437,19 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
437437 lengths. A possible example of `y` could be: `y = np.array(['SIXTY ONE', 'HELLO'])`.
438438 :return: Loss gradients of the same shape as `x`.
439439 """
440- x_in = np .empty (len (x ), dtype = object )
441- x_in [:] = list (x )
440+ # Apply preprocessing
441+ x_preprocessed , _ = self ._apply_preprocessing (x , None , fit = False )
442+
443+ x_in = np .empty (len (x_preprocessed ), dtype = object )
444+ x_in [:] = list (x_preprocessed )
442445
443446 # Put the model in the training mode, otherwise CUDA can't backpropagate through the model.
444447 # However, model uses batch norm layers which need to be frozen
445448 self ._model .train ()
446449 self .set_batchnorm (train = False )
447450
448- # Apply preprocessing
449- x_preprocessed , y_preprocessed = self ._apply_preprocessing (x_in , y , fit = False )
450-
451451 # Transform data into the model input space
452- inputs , targets , input_rates , target_sizes , _ = self ._transform_model_input (
453- x = x_preprocessed , y = y_preprocessed , compute_gradient = True
454- )
452+ inputs , targets , input_rates , target_sizes , _ = self ._transform_model_input (x = x_in , y = y , compute_gradient = True )
455453
456454 # Compute real input sizes
457455 input_sizes = input_rates .mul_ (inputs .size ()[- 1 ]).int ()
@@ -484,8 +482,8 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
484482
485483 # Get results
486484 results_list = []
487- for i , _ in enumerate (x_preprocessed ):
488- results_list .append (x_preprocessed [i ].grad .cpu ().numpy ().copy ())
485+ for i , _ in enumerate (x_in ):
486+ results_list .append (x_in [i ].grad .cpu ().numpy ().copy ())
489487
490488 results = np .array (results_list )
491489
@@ -494,7 +492,7 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
494492 results_ [:] = list (results )
495493 results = results_
496494
497- results = self ._apply_preprocessing_gradient (x_in , results )
495+ results = self ._apply_preprocessing_gradient (x , results )
498496
499497 if x .dtype != object :
500498 results = np .array ([i for i in results ], dtype = x .dtype ) # pylint: disable=R1721
@@ -521,18 +519,19 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
521519 """
522520 import random
523521
524- x_in = np .empty (len (x ), dtype = object )
525- x_in [:] = list (x )
522+ # Apply preprocessing
523+ x_preprocessed , _ = self ._apply_preprocessing (x , None , fit = True )
524+ y_preprocessed = y
525+
526+ x_in = np .empty (len (x_preprocessed ), dtype = object )
527+ x_in [:] = list (x_preprocessed )
526528
527529 # Put the model in the training mode
528530 self ._model .train ()
529531
530532 if self .optimizer is None : # pragma: no cover
531533 raise ValueError ("An optimizer is required to train the model, but none was provided." )
532534
533- # Apply preprocessing
534- x_preprocessed , y_preprocessed = self ._apply_preprocessing (x_in , y , fit = True )
535-
536535 # Train with batch processing
537536 num_batch = int (np .ceil (len (x_preprocessed ) / float (batch_size )))
538537 ind = np .arange (len (x_preprocessed ))
0 commit comments