sdatkinson
diff --git a/‎NAM/wavenet.cpp‎
Lines changed: 26 additions & 19 deletions b/‎NAM/wavenet.cpp‎
Lines changed: 26 additions & 19 deletions
diff --git a/‎NAM/wavenet.h‎
Lines changed: 20 additions & 11 deletions b/‎NAM/wavenet.h‎
Lines changed: 20 additions & 11 deletions
diff --git a/‎tools/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎tools/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/run_tests.cpp‎
Lines changed: 3 additions & 0 deletions b/‎tools/run_tests.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tools/test/test_wavenet/test_full.cpp‎
Lines changed: 13 additions & 8 deletions b/‎tools/test/test_wavenet/test_full.cpp‎
Lines changed: 13 additions & 8 deletions
@@ -13,12 +13,14 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize)
 {
   _conv.SetMaxBufferSize(maxBufferSize);
   _input_mixin.SetMaxBufferSize(maxBufferSize);
-  _z.resize(this->_conv.get_out_channels(), maxBufferSize);
+  const long z_channels = this->_conv.get_out_channels(); // This is 2*bottleneck when gated, bottleneck when not
+  _z.resize(z_channels, maxBufferSize);
   _1x1.SetMaxBufferSize(maxBufferSize);
   // Pre-allocate output buffers
   const long channels = this->get_channels();
   this->_output_next_layer.resize(channels, maxBufferSize);
-  this->_output_head.resize(channels, maxBufferSize);
+  // _output_head stores the activated portion: bottleneck rows (the actual bottleneck value, not doubled)
+  this->_output_head.resize(this->_bottleneck, maxBufferSize);
 }
 
 void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
@@ -30,7 +32,7 @@ void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
 
 void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames)
 {
-  const long channels = this->get_channels();
+  const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels
 
   // Step 1: input convolutions
   this->_conv.Process(input, num_frames);
@@ -50,19 +52,20 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
     // do this column-wise:
     for (int i = 0; i < num_frames; i++)
     {
-      this->_activation->apply(this->_z.block(0, i, channels, 1));
+      this->_activation->apply(this->_z.block(0, i, bottleneck, 1));
       // TODO Need to support other activation functions here instead of hardcoded sigmoid
-      activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1));
+      activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(bottleneck, i, bottleneck, 1));
     }
-    this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array();
-    _1x1.process_(_z.topRows(channels), num_frames); // Might not be RT safe
+    this->_z.block(0, 0, bottleneck, num_frames).array() *=
+      this->_z.block(bottleneck, 0, bottleneck, num_frames).array();
+    _1x1.process_(_z.topRows(bottleneck), num_frames); // Might not be RT safe
   }
 
   // Store output to head (skip connection: activated conv output)
   if (!this->_gated)
     this->_output_head.leftCols(num_frames).noalias() = this->_z.leftCols(num_frames);
   else
-    this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(channels).leftCols(num_frames);
+    this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(bottleneck).leftCols(num_frames);
   // Store output to next layer (residual connection: input + _1x1 output)
   this->_output_next_layer.leftCols(num_frames).noalias() =
     input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames);
@@ -72,15 +75,17 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
 // LayerArray =================================================================
 
 nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
-                                       const int channels, const int kernel_size, const std::vector<int>& dilations,
-                                       const std::string activation, const bool gated, const bool head_bias,
-                                       const int groups_input, const int groups_1x1)
+                                       const int channels, const int bottleneck, const int kernel_size,
+                                       const std::vector<int>& dilations, const std::string activation,
+                                       const bool gated, const bool head_bias, const int groups_input,
+                                       const int groups_1x1)
 : _rechannel(input_size, channels, false)
-, _head_rechannel(channels, head_size, head_bias)
+, _head_rechannel(bottleneck, head_size, head_bias)
+, _bottleneck(bottleneck)
 {
   for (size_t i = 0; i < dilations.size(); i++)
-    this->_layers.push_back(
-      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
+    this->_layers.push_back(_Layer(
+      condition_size, channels, bottleneck, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
 }
 
 void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
@@ -94,7 +99,7 @@ void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
   // Pre-allocate output buffers
   const long channels = this->_get_channels();
   this->_layer_outputs.resize(channels, maxBufferSize);
-  this->_head_inputs.resize(channels, maxBufferSize);
+  this->_head_inputs.resize(this->_bottleneck, maxBufferSize);
 }
 
 
@@ -199,9 +204,9 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
   {
     this->_layer_arrays.push_back(nam::wavenet::_LayerArray(
       layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
-      layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
-      layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
-      layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
+      layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size,
+      layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gated,
+      layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
     if (i > 0)
       if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
       {
@@ -300,8 +305,10 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
     nlohmann::json layer_config = config["layers"][i];
     const int groups = layer_config.value("groups", 1); // defaults to 1
     const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
+    const int channels = layer_config["channels"];
+    const int bottleneck = layer_config.value("bottleneck", channels); // defaults to channels if not present
     layer_array_params.push_back(nam::wavenet::LayerArrayParams(
-      layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
+      layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], channels, bottleneck,
       layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
       layer_config["head_bias"], groups, groups_1x1));
   }
 
@@ -16,13 +16,14 @@ namespace wavenet
 class _Layer
 {
 public:
-  _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
+  _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation,
          const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
-  : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
-  , _input_mixin(condition_size, gated ? 2 * channels : channels, false)
-  , _1x1(channels, channels, true, groups_1x1)
+  : _conv(channels, gated ? 2 * bottleneck : bottleneck, kernel_size, true, dilation, groups_input)
+  , _input_mixin(condition_size, gated ? 2 * bottleneck : bottleneck, false)
+  , _1x1(bottleneck, channels, true, groups_1x1)
   , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
-  , _gated(gated) {};
+  , _gated(gated)
+  , _bottleneck(bottleneck) {};
   // Resize all arrays to be able to process `maxBufferSize` frames.
   void SetMaxBufferSize(const int maxBufferSize);
   // Set the parameters of this module
@@ -71,18 +72,21 @@ class _Layer
 
   activations::Activation* _activation;
   const bool _gated;
+  const int _bottleneck; // Internal channel count (not doubled when gated)
 };
 
 class LayerArrayParams
 {
 public:
   LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
-                   const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
-                   const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
+                   const int bottleneck_, const int kernel_size_, const std::vector<int>&& dilations_,
+                   const std::string activation_, const bool gated_, const bool head_bias_, const int groups_input,
+                   const int groups_1x1_)
   : input_size(input_size_)
   , condition_size(condition_size_)
   , head_size(head_size_)
   , channels(channels_)
+  , bottleneck(bottleneck_)
   , kernel_size(kernel_size_)
   , dilations(std::move(dilations_))
   , activation(activation_)
@@ -97,6 +101,7 @@ class LayerArrayParams
   const int condition_size;
   const int head_size;
   const int channels;
+  const int bottleneck;
   const int kernel_size;
   std::vector<int> dilations;
   const std::string activation;
@@ -111,8 +116,9 @@ class _LayerArray
 {
 public:
   _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
-              const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
-              const bool head_bias, const int groups_input, const int groups_1x1);
+              const int bottleneck, const int kernel_size, const std::vector<int>& dilations,
+              const std::string activation, const bool gated, const bool head_bias, const int groups_input,
+              const int groups_1x1);
 
   void SetMaxBufferSize(const int maxBufferSize);
 
@@ -150,12 +156,15 @@ class _LayerArray
   std::vector<_Layer> _layers;
   // Output from last layer (for next layer array)
   Eigen::MatrixXf _layer_outputs;
-  // Accumulated head inputs from all layers
+  // Accumulated head inputs from all layers (bottleneck channels)
   Eigen::MatrixXf _head_inputs;
 
-  // Rechannel for the head
+  // Rechannel for the head (bottleneck -> head_size)
   Conv1x1 _head_rechannel;
 
+  // Bottleneck size (internal channel count)
+  const int _bottleneck;
+
   long _get_channels() const;
   // Common processing logic after head inputs are set
   void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames);
 
@@ -20,7 +20,7 @@ set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0")
 # Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG
 # We use a compile option to undefine it, which works on GCC, Clang, and MSVC
 target_compile_options(run_tests PRIVATE
-	$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-U_NDEBUG>
+	$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-UNDEBUG>
 )
 
 source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})
 
@@ -104,6 +104,8 @@ int main()
   test_wavenet::test_layer::test_non_gated_layer();
   test_wavenet::test_layer::test_layer_activations();
   test_wavenet::test_layer::test_layer_multichannel();
+  test_wavenet::test_layer::test_layer_bottleneck();
+  test_wavenet::test_layer::test_layer_bottleneck_gated();
   test_wavenet::test_layer_array::test_layer_array_basic();
   test_wavenet::test_layer_array::test_layer_array_receptive_field();
   test_wavenet::test_layer_array::test_layer_array_with_head_input();
@@ -118,6 +120,7 @@ int main()
   test_wavenet::test_conv1d_grouped_process_realtime_safe();
   test_wavenet::test_conv1d_grouped_dilated_process_realtime_safe();
   test_wavenet::test_layer_process_realtime_safe();
+  test_wavenet::test_layer_bottleneck_process_realtime_safe();
   test_wavenet::test_layer_grouped_process_realtime_safe();
   test_wavenet::test_layer_array_process_realtime_safe();
   test_wavenet::test_process_realtime_safe();
 
@@ -19,6 +19,7 @@ void test_wavenet_model()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -29,7 +30,7 @@ void test_wavenet_model()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -85,15 +86,16 @@ void test_wavenet_multiple_arrays()
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   // First array
   std::vector<int> dilations1{1};
+  const int bottleneck = channels;
   const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations1), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations1), activation,
+                                                              gated, head_bias, groups, groups_1x1));
   // Second array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
-                                                              kernel_size, std::move(dilations2), activation, gated,
-                                                              head_bias, groups, groups_1x1));
+                                                              bottleneck, kernel_size, std::move(dilations2), activation,
+                                                              gated, head_bias, groups, groups_1x1));
 
   std::vector<float> weights;
   // Array 0: rechannel, layer, head_rechannel
@@ -127,6 +129,7 @@ void test_wavenet_zero_input()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -137,7 +140,7 @@ void test_wavenet_zero_input()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -168,6 +171,7 @@ void test_wavenet_different_buffer_sizes()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 1;
   std::vector<int> dilations{1};
   const std::string activation = "ReLU";
@@ -178,7 +182,7 @@ void test_wavenet_different_buffer_sizes()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
@@ -210,6 +214,7 @@ void test_wavenet_prewarm()
   const int condition_size = 1;
   const int head_size = 1;
   const int channels = 1;
+  const int bottleneck = channels;
   const int kernel_size = 3;
   std::vector<int> dilations{1, 2, 4};
   const std::string activation = "ReLU";
@@ -220,7 +225,7 @@ void test_wavenet_prewarm()
   const int groups = 1;
   const int groups_1x1 = 1;
 
-  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
+  nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
                                         std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0")`
`20`	`20`	`# Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG`
`21`	`21`	`# We use a compile option to undefine it, which works on GCC, Clang, and MSVC`
`22`	`22`	`target_compile_options(run_tests PRIVATE`
`23`		`- $<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-U_NDEBUG>`
	`23`	`+ $<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-UNDEBUG>`
`24`	`24`	`)`
`25`	`25`
`26`	`26`	`source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})`