jmitrevs
diff --git a/‎docs/advanced/model_optimization.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/advanced/model_optimization.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/optimization/__init__.py‎
Lines changed: 20 additions & 10 deletions b/‎hls4ml/optimization/__init__.py‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎hls4ml/optimization/attributes.py‎
Lines changed: 6 additions & 4 deletions b/‎hls4ml/optimization/attributes.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎hls4ml/optimization/config.py‎
Lines changed: 5 additions & 3 deletions b/‎hls4ml/optimization/config.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎hls4ml/optimization/keras/__init__.py‎
Lines changed: 36 additions & 23 deletions b/‎hls4ml/optimization/keras/__init__.py‎
Lines changed: 36 additions & 23 deletions
diff --git a/‎hls4ml/optimization/keras/builder.py‎
Lines changed: 11 additions & 5 deletions b/‎hls4ml/optimization/keras/builder.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎hls4ml/optimization/keras/config.py‎
Lines changed: 2 additions & 1 deletion b/‎hls4ml/optimization/keras/config.py‎
Lines changed: 2 additions & 1 deletion
@@ -3,7 +3,7 @@ hls4ml Optimization API
 ========================
 
 Pruning and weight sharing are effective techniques to reduce model footprint and computational requirements. The hls4ml Optimization API introduces hardware-aware pruning and weight sharing.
-By defining custom objectives, the algorithm solves a Knapsack optimization problem aimed at maximizing model performance, while keeping the target resource(s) at a minimum. Out-of-the box objectives include network sparsity, GPU FLOPs, Vivado DSPs, memory utilization etc. 
+By defining custom objectives, the algorithm solves a Knapsack optimization problem aimed at maximizing model performance, while keeping the target resource(s) at a minimum. Out-of-the box objectives include network sparsity, GPU FLOPs, Vivado DSPs, memory utilization etc.
 
 The code block below showcases three use cases of the hls4ml Optimization API - network sparsity (unstructured pruning), GPU FLOPs (structured pruning) and Vivado DSP utilization (pattern pruning). First, we start with unstructured pruning:
 
@@ -115,6 +115,6 @@ Finally, optimizing Vivado DSPs is possible, given a hls4ml config:
 There are two more Vivado "optimizers" - VivadoFFEstimator, aimed at reducing register utilisation and VivadoMultiObjectiveEstimator, aimed at optimising BRAM and DSP utilisation.
 Note, to ensure DSPs are optimized, "unrolled" Dense multiplication must be used before synthesing HLS, by modifying the config:
 .. code-block:: Python
-    hls_config = config_from_keras_model(optimized_model)     
+    hls_config = config_from_keras_model(optimized_model)
     hls_config['Model']['DenseResourceImplementation'] = 'Unrolled'
     # Any addition hls4ml config, such as strategy, reuse factor etc...
@@ -3,6 +3,8 @@
 from hls4ml.optimization.attributes import get_attributes_from_keras_model_and_hls4ml_config
 from hls4ml.optimization.keras import optimize_model
 
+default_regularization_range = np.logspace(-6, -2, num=16).tolist()
+
 
 def optimize_keras_for_hls4ml(
     keras_model,
@@ -20,7 +22,7 @@ def optimize_keras_for_hls4ml(
     validation_metric,
     increasing,
     rtol,
-    callbacks=[],
+    callbacks=None,
     ranking_metric='l1',
     local=False,
     verbose=False,
@@ -29,16 +31,18 @@ def optimize_keras_for_hls4ml(
     directory='hls4ml-optimization',
     tuner='Bayesian',
     knapsack_solver='CBC_MIP',
-    regularization_range=np.logspace(-6, -2, num=16).tolist(),
+    regularization_range=default_regularization_range,
 ):
     '''
     Top-level function for optimizing a Keras model, given hls4ml config and a hardware objective(s)
 
     Args:
     - keras_model (keras.Model): Model to be optimized
     - hls_config (dict): hls4ml configuration, obtained from hls4ml.utils.config.config_from_keras_model(...)
-    - objective (hls4ml.optimization.objectives.ObjectiveEstimator): Parameter, hardware or user-defined objective of optimization
-    - scheduler (hls4ml.optimization.schduler.OptimizationScheduler): Sparsity scheduler, choose between constant, polynomial and binary
+    - objective (hls4ml.optimization.objectives.ObjectiveEstimator):
+        Parameter, hardware or user-defined objective of optimization
+    - scheduler (hls4ml.optimization.schduler.OptimizationScheduler):
+        Sparsity scheduler, choose between constant, polynomial and binary
     - X_train (np.array): Training inputs
     - y_train (np.array): Training labels
     - X_val (np.array): Validation inputs
@@ -48,19 +52,25 @@ def optimize_keras_for_hls4ml(
     - optimizer (keras.optimizers.Optimizer or equivalent-string description): Optimizer used during training
     - loss_fn (keras.losses.Loss or equivalent loss description): Loss function used during training
     - validation_metric (keras.metrics.Metric or equivalent loss description): Validation metric, used as a baseline
-    - increasing (boolean): If the metric improves with increased values; e.g. accuracy -> increasing = True, MSE -> increasing = False
-    - rtol (float): Relative tolerance; pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric
+    - increasing (boolean): If the metric improves with increased values;
+        e.g. accuracy -> increasing = True, MSE -> increasing = False
+    - rtol (float): Relative tolerance;
+        pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric
 
     Kwargs:
     - callbacks (list of keras.callbacks.Callback) Currently not supported, developed in future versions
-    - ranking_metric (string): Metric used for rannking weights and structures; currently supported l1, l2, saliency and Oracle
+    - ranking_metric (string): Metric used for rannking weights and structures;
+        currently supported l1, l2, saliency and Oracle
     - local (boolean): Layer-wise or global pruning
     - verbose (boolean): Display debug logs during model optimization
-    - rewinding_epochs (int): Number of epochs to retrain model without weight freezing, allows regrowth of previously pruned weights
-    - cutoff_bad_trials (int): After how many bad trials (performance below threshold), should model pruning / weight sharing stop
+    - rewinding_epochs (int): Number of epochs to retrain model without weight freezing,
+        allows regrowth of previously pruned weights
+    - cutoff_bad_trials (int): After how many bad trials (performance below threshold),
+        should model pruning / weight sharing stop
     - directory (string): Directory to store temporary results
     - tuner (str): Tuning alogorithm, choose between Bayesian, Hyperband and None
-    - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing; default usually works well; for very large networks, greedy algorithm might be more suitable
+    - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing;
+        default usually works well; for very large networks, greedy algorithm might be more suitable
     - regularization_range (list): List of suitable hyperparameters for weight decay
     '''
 
 
@@ -30,10 +30,10 @@ def __init__(
         if not isinstance(output_precision, (FixedPrecisionType, IntegerPrecisionType)):
             raise Exception('Layer weight precision is not in valid format')
 
-        if not strategy in ('Latency', 'latency', 'Resource', 'resource'):
+        if strategy not in ('Latency', 'latency', 'Resource', 'resource'):
             raise Exception('Unknown layer strategy')
 
-        if not io_type in ('io_parallel', 'io_stream'):
+        if io_type not in ('io_parallel', 'io_stream'):
             raise Exception('Unknown IO type')
 
         self.n_in = n_in
@@ -95,8 +95,10 @@ class LayerAttributes:
         - input_shape (tuple): Layer input shape
         - output_shape (tuple): Layer output shape
         - optimizable (bool): Should optimizations (pruning, weight sharing) be applied to this layer
-        - optimization_attributes (OptimizationAttributes): Type of optimization, optimization vs weight sharing, block shape and pattern offset
-        - args (dict): Additional information, e.g. hls4mlAttributes; dictionary so it can be generic enough for different platforms
+        - optimization_attributes (OptimizationAttributes): Type of optimization,
+            pruning or weight sharing, block shape and pattern offset
+        - args (dict): Additional information,
+            e.g. hls4mlAttributes; dictionary so it can be generic enough for different platforms
     '''
 
     def __init__(
 
@@ -17,15 +17,17 @@
         - Dense: Neurons, determined by their outgoing connections (columns in Keras weight tensors)
         - Conv2D: Filters (structures of size filt_width x filt_height x n_chan)
         - Notes:
-            - For Dense, it was also possible optimize by incoming connections (rows); however, removing zero neurons becomes harder
+            - For Dense, it was also possible optimize by incoming connections (rows);
+                However, removing zero neurons becomes harder because of Keras Surgeon
             - For Conv2D, significant literature explored pruning channels; currently not supported
     - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
 
 3. Pattern:
     - Pruning: Y
     - Weight sharing: Y
-    - Description: Zeroes out or quantizes all the weights in a group.
-       Groups are determined by a variable, n, and every n-th weight in the flattened, transposed (Resource) weight tensor is collected and stored in the same group
+    - Description: Zeroes out or quantizes all the weights in a group
+       Groups are determined by a variable, n, and every n-th weight in the flattened,
+       Transposed (Resource) weight tensor is collected and stored in the same group
        Equivalent to pruning/quantizing weight processed by the same DSP in hls4ml
     - Supports: All layers in SUPPORTED_LAYERS (hls4ml.optimization.keras)
 
 
@@ -16,6 +16,7 @@
 from hls4ml.optimization.scheduler import OptimizationScheduler
 
 np_config.enable_numpy_behavior()
+default_regularization_range = np.logspace(-6, -2, num=16).tolist()
 
 
 def optimize_model(
@@ -34,7 +35,7 @@ def optimize_model(
     validation_metric,
     increasing,
     rtol,
-    callbacks=[],
+    callbacks=None,
     ranking_metric='l1',
     local=False,
     verbose=False,
@@ -43,46 +44,59 @@ def optimize_model(
     directory=TMP_DIRECTORY,
     tuner='Bayesian',
     knapsack_solver='CBC_MIP',
-    regularization_range=np.logspace(-6, -2, num=16).tolist(),
+    regularization_range=default_regularization_range,
 ):
     '''
     Top-level function for optimizing a Keras model, given objectives
 
     Args:
     - model (keras.Model): Model to be optimized
-    - model_attributes (dict): Layer-wise model attributes, obtained from hls4ml.optimization.get_attributes_from_keras_model(...)
-    - objective (hls4ml.optimization.objectives.ObjectiveEstimator): Parameter, hardware or user-defined objective of optimization
-    - scheduler (hls4ml.optimization.schduler.OptimizationScheduler): Sparsity scheduler, choose between constant, polynomial and binary
+    - model_attributes (dict): Layer-wise model attributes,
+        obtained from hls4ml.optimization.get_attributes_from_keras_model(...)
+    - objective (hls4ml.optimization.objectives.ObjectiveEstimator):
+        Parameter, hardware or user-defined objective of optimization
+    - scheduler (hls4ml.optimization.schduler.OptimizationScheduler):
+        Sparsity scheduler, choose between constant, polynomial and binary
     - X_train (np.array): Training inputs
     - y_train (np.array): Training labels
     - X_val (np.array): Validation inputs
     - y_val (np.array): Validation labels
     - batch_size (int): Batch size during training
     - epochs (int): Maximum number of epochs to fine-tune model, in one iteration of pruning
-    - optimizer (keras.optimizers.Optimizer or equivalent-string description): Optimizer used during training
-    - loss_fn (keras.losses.Loss or equivalent loss description): Loss function used during training
-    - validation_metric (keras.metrics.Metric or equivalent loss description): Validation metric, used as a baseline
-    - increasing (boolean): If the metric improves with increased values; e.g. accuracy -> increasing = True, MSE -> increasing = False
-    - rtol (float): Relative tolerance; pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric
+    - optimizer (keras.optimizers.Optimizer or equivalent-string description):
+        Optimizer used during training
+    - loss_fn (keras.losses.Loss or equivalent loss description):
+        Loss function used during training
+    - validation_metric (keras.metrics.Metric or equivalent loss description):
+        Validation metric, used as a baseline
+    - increasing (boolean): If the metric improves with increased values;
+        e.g. accuracy -> increasing = True, MSE -> increasing = False
+    - rtol (float): Relative tolerance;
+        pruning stops when pruned_validation_metric < (or >) rtol * baseline_validation_metric
 
     Kwargs:
     - callbacks (list of keras.callbacks.Callback) Currently not supported, developed in future versions
-    - ranking_metric (string): Metric used for rannking weights and structures; currently supported l1, l2, saliency and Oracle
+    - ranking_metric (string): Metric used for rannking weights and structures;
+        currently supported l1, l2, saliency and Oracle
     - local (boolean): Layer-wise or global pruning
     - verbose (boolean): Display debug logs during model optimization
-    - rewinding_epochs (int): Number of epochs to retrain model without weight freezing, allows regrowth of previously pruned weights
-    - cutoff_bad_trials (int): After how many bad trials (performance below threshold), should model pruning / weight sharing stop
+    - rewinding_epochs (int): Number of epochs to retrain model without weight freezing,
+        allows regrowth of previously pruned weights
+    - cutoff_bad_trials (int): After how many bad trials (performance below threshold),
+        should model pruning / weight sharing stop
     - directory (string): Directory to store temporary results
     - tuner (str): Tuning alogorithm, choose between Bayesian, Hyperband and None
-    - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing; default usually works well; for very large networks, greedy algorithm might be more suitable
+    - knapsack_solver (str): Algorithm to solve Knapsack problem when optimizing;
+        default usually works well; for very large networks, greedy algorithm might be more suitable
     - regularization_range (list): List of suitable hyperparameters for weight decay
     '''
 
     if not isinstance(scheduler, OptimizationScheduler):
         raise Exception(
             'Scheduler must be an instance of from hls4ml.optimization.scheduler.OptimizationScheduler'
-            + 'If you provided string description (e.g. \'constant\'), please use an object instance (i.e. ConstantScheduler())'
-            'For a full list of supported schedulers and their description, refer to hls4ml.optimization.scheduler.'
+            'If you provided string description (e.g. \'constant\')'
+            'Please use an object instance (i.e. ConstantScheduler()).'
+            'For a full list of supported schedulers, refer to hls4ml.optimization.scheduler.'
         )
 
     if epochs <= rewinding_epochs:
@@ -164,7 +178,8 @@ def optimize_model(
     masked_backprop = MaskedBackprop(optimizable_model, loss_fn, model_attributes)
 
     # In certain cases, the model might underperform at the current sparsity level, but perform better at a higher sparsity
-    # Therefore, monitor the models performance over several sparsity levels and only stop pruning after high loss over several trials
+    # Therefore, monitor the models performance over several sparsity levels and
+    # Only stop pruning after high loss over several trials
     bad_trials = 0
     sparsity_conditions = True
     target_sparsity = scheduler.get_sparsity()
@@ -222,13 +237,11 @@ def optimize_model(
             # Evaluate on validation set and print epoch summary
             if verbose:
                 val_res = optimizable_model.evaluate(validation_dataset, verbose=0, return_dict=False)
-                print(
-                    f'Epoch: {epoch + 1} - Time: {time.time() - start_time}s - Average training loss: {round(epoch_loss_avg.result(), 3)}'
-                )
+                t = time.time() - start_time
+                avg_loss = round(epoch_loss_avg.result(), 3)
+                print(f'Epoch: {epoch + 1} - Time: {t}s - Average training loss: {avg_loss}')
                 print(f'Epoch: {epoch + 1} - learning_rate: {optimizable_model.optimizer.learning_rate.numpy()}')
-                print(
-                    f'Epoch: {epoch + 1} - Loss on validation set: {val_res[0]} - Performance on validation set: {val_res[1]}'
-                )
+                print(f'Epoch: {epoch + 1} - Validation loss: {val_res[0]} - Performance on validation set: {val_res[1]}')
 
         # Check if model works after pruning
         pruned_performance = optimizable_model.evaluate(validation_dataset, verbose=0, return_dict=False)[-1]
 
@@ -90,6 +90,9 @@ def build(self, hp):
         return model_to_prune
 
 
+default_regularization_range = np.logspace(-6, -2, num=16).tolist()
+
+
 def build_optimizable_model(
     model,
     attributes,
@@ -104,7 +107,7 @@ def build_optimizable_model(
     verbose=False,
     directory=TMP_DIRECTORY,
     tuner='Bayesian',
-    regularization_range=np.logspace(-6, -2, num=15).tolist(),
+    regularization_range=default_regularization_range,
 ):
     '''
     Function identifying optimizable layers and adding a regularization loss
@@ -128,10 +131,13 @@ def build_optimizable_model(
     - learning_rate_range (list): List of suitable hyperparameters for learning rate
 
     Notes:
-    - In general, the regularization and learning rate ranges do not need to be provided, as the implementation sets a generic enough range.
-      However, if the user has an idea on the possible range on hyperparameter ranges (eg. VGG-16 weight decay ~10^-5), the tuning will complete faster
-    - The default tuner is Bayesian & when coupled with the correct ranges of hyperparameters, it performs quite well, fast. However, older version of Keras Tuner had a crashing bug with Bayesian Tuner
-    - In general, the directory does not need to be specified. However, if pruning several models simultaneously, to avoid conflicting intermediate results, it is useful to specify directory
+    - In general, the regularization and learning rate ranges do not need to be provided,
+        as the implementation sets a generic enough range. if the user has an idea on the
+        possible range on hyperparameter ranges, the tuning will complete faster.
+    - The default tuner is Bayesian & when coupled with the correct ranges of hyperparameters,
+        it performs quite well, fast. However, older version of Keras Tuner had a crashing bug with it.
+    - In general, the directory does not need to be specified. However, if pruning several models simultaneously,
+        to avoid conflicting intermediate results, it is useful to specify directory.
     '''
     # User provided manual hyper-parameters for regularisation loss
     # TODO - Maybe we could extend this to be hyper-parameters per layer? or layer-type?
 
@@ -14,7 +14,8 @@
 
 1. l1 - groups of weights are ranked by their l1 norm
 2. l2 - groups of weights are ranked by their l2 norm
-3. oracle - abs(dL / dw * w), introduced by Molchanov et al. (2016) Pruning Convolutional Neural Networks for Resource Efficient Inference
+3. oracle - abs(dL / dw * w), introduced by Molchanov et al. (2016)
+    Pruning Convolutional Neural Networks for Resource Efficient Inference
 4. saliency - (d^2L / dw^2 * w)^2, introduced by Lecun et al. (1989) Optimal Brain Damage
 '''
 SUPPORTED_METRICS = ('l1', 'l2', 'oracle', 'saliency')