diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index 8940314..dc46891 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -2,6 +2,7 @@ #include // pow, tanh, expf #include #include +#include #include #include #include @@ -206,8 +207,21 @@ std::unique_ptr nam::linear::Factory(const nlohmann::json& config, std // Conv1x1 ==================================================================== -nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias) +nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups) { + // Validate that channels divide evenly by groups + if (in_channels % groups != 0) + { + throw std::runtime_error("in_channels (" + std::to_string(in_channels) + ") must be divisible by numGroups (" + + std::to_string(groups) + ")"); + } + if (out_channels % groups != 0) + { + throw std::runtime_error("out_channels (" + std::to_string(out_channels) + ") must be divisible by numGroups (" + + std::to_string(groups) + ")"); + } + + this->_num_groups = groups; this->_weight.resize(out_channels, in_channels); this->_do_bias = _bias; if (_bias) @@ -222,9 +236,28 @@ void nam::Conv1x1::SetMaxBufferSize(const int maxBufferSize) void nam::Conv1x1::set_weights_(std::vector::iterator& weights) { - for (int i = 0; i < this->_weight.rows(); i++) - for (int j = 0; j < this->_weight.cols(); j++) - this->_weight(i, j) = *(weights++); + if (this->_weight.size() > 0) + { + const long out_channels = this->_weight.rows(); + const long in_channels = this->_weight.cols(); + const int numGroups = this->_num_groups; + const long out_per_group = out_channels / numGroups; + const long in_per_group = in_channels / numGroups; + + // For grouped convolutions, weights are organized per group + // Weight layout: weights are [group0, group1, ..., groupN-1] + // Each group's weight matrix is (out_channels/numGroups, in_channels/numGroups) + for (int g = 0; g < numGroups; g++) + { + for (auto i = 0; i < out_per_group; i++) + { + for (auto j = 0; j < in_per_group; j++) + { + this->_weight(g * out_per_group + i, g * in_per_group + j) = *(weights++); + } + } + } + } if (this->_do_bias) for (int i = 0; i < this->_bias.size(); i++) this->_bias(i) = *(weights++); @@ -232,16 +265,85 @@ void nam::Conv1x1::set_weights_(std::vector::iterator& weights) Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int num_frames) const { - if (this->_do_bias) - return (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias; + const int numGroups = this->_num_groups; + const long in_channels = get_in_channels(); + const long out_channels = get_out_channels(); + const long in_per_group = in_channels / numGroups; + const long out_per_group = out_channels / numGroups; + + Eigen::MatrixXf result(out_channels, num_frames); + + if (numGroups == 1) + { + // Standard convolution (no grouping) + if (this->_do_bias) + result = (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias; + else + result = this->_weight * input.leftCols(num_frames); + } else - return this->_weight * input.leftCols(num_frames); + { + // Grouped convolution: process each group separately + result.setZero(); + for (int g = 0; g < numGroups; g++) + { + // Extract input slice for this group + auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group); + + // Extract weight slice for this group + auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group); + + // Extract output slice for this group + auto output_group = result.middleRows(g * out_per_group, out_per_group); + + // Perform grouped convolution: output_group = weight_group * input_group + output_group.noalias() = weight_group * input_group; + } + + // Add bias if present + if (this->_do_bias) + result.colwise() += this->_bias; + } + + return result; } void nam::Conv1x1::process_(const Eigen::MatrixXf& input, const int num_frames) { assert(num_frames <= _output.cols()); - _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames); + + const int numGroups = this->_num_groups; + const long in_channels = get_in_channels(); + const long out_channels = get_out_channels(); + const long in_per_group = in_channels / numGroups; + const long out_per_group = out_channels / numGroups; + + if (numGroups == 1) + { + // Standard convolution (no grouping) + _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames); + } + else + { + // Grouped convolution: process each group separately + _output.leftCols(num_frames).setZero(); + for (int g = 0; g < numGroups; g++) + { + // Extract input slice for this group + auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group); + + // Extract weight slice for this group + auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group); + + // Extract output slice for this group + auto output_group = _output.leftCols(num_frames).middleRows(g * out_per_group, out_per_group); + + // Perform grouped convolution: output_group = weight_group * input_group + output_group.noalias() = weight_group * input_group; + } + } + + // Add bias if present if (this->_do_bias) { _output.leftCols(num_frames).colwise() += this->_bias; diff --git a/NAM/dsp.h b/NAM/dsp.h index 3f9df92..f359a68 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -177,7 +177,7 @@ std::unique_ptr Factory(const nlohmann::json& config, std::vector& w class Conv1x1 { public: - Conv1x1(const int in_channels, const int out_channels, const bool _bias); + Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups = 1); // Get the entire internal output buffer. This is intended for internal wiring // between layers/arrays; callers should treat the buffer as pre-allocated // storage and only consider the first `num_frames` columns valid for a given @@ -199,6 +199,7 @@ class Conv1x1 protected: Eigen::MatrixXf _weight; Eigen::VectorXf _bias; + int _num_groups; private: Eigen::MatrixXf _output; diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index a1075ae..2a1aba7 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -74,13 +74,13 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, const int kernel_size, const std::vector& dilations, const std::string activation, const bool gated, const bool head_bias, - const int groups_input) + const int groups_input, const int groups_1x1) : _rechannel(input_size, channels, false) , _head_rechannel(channels, head_size, head_bias) { for (size_t i = 0; i < dilations.size(); i++) this->_layers.push_back( - _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input)); + _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1)); } void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) @@ -201,7 +201,7 @@ nam::wavenet::WaveNet::WaveNet(const std::vector layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size, layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias, - layer_array_params[i].groups_input)); + layer_array_params[i].groups_input, layer_array_params[i].groups_1x1)); if (i > 0) if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) { @@ -299,10 +299,11 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st { nlohmann::json layer_config = config["layers"][i]; const int groups = layer_config.value("groups", 1); // defaults to 1 + const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1 layer_array_params.push_back(nam::wavenet::LayerArrayParams( layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"], layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"], - layer_config["head_bias"], groups)); + layer_config["head_bias"], groups, groups_1x1)); } const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; diff --git a/NAM/wavenet.h b/NAM/wavenet.h index b460ef5..71d2eff 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -17,10 +17,10 @@ class _Layer { public: _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation, - const std::string activation, const bool gated, const int groups_input) + const std::string activation, const bool gated, const int groups_input, const int groups_1x1) : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input) , _input_mixin(condition_size, gated ? 2 * channels : channels, false) - , _1x1(channels, channels, true) + , _1x1(channels, channels, true, groups_1x1) , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters , _gated(gated) {}; // Resize all arrays to be able to process `maxBufferSize` frames. @@ -78,7 +78,7 @@ class LayerArrayParams public: LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_, const int kernel_size_, const std::vector&& dilations_, const std::string activation_, - const bool gated_, const bool head_bias_, const int groups_input) + const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_) : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) @@ -89,6 +89,7 @@ class LayerArrayParams , gated(gated_) , head_bias(head_bias_) , groups_input(groups_input) + , groups_1x1(groups_1x1_) { } @@ -102,6 +103,7 @@ class LayerArrayParams const bool gated; const bool head_bias; const int groups_input; + const int groups_1x1; }; // An array of layers with the same channels, kernel sizes, activations. @@ -110,7 +112,7 @@ class _LayerArray public: _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, const int kernel_size, const std::vector& dilations, const std::string activation, const bool gated, - const bool head_bias, const int groups_input); + const bool head_bias, const int groups_input, const int groups_1x1); void SetMaxBufferSize(const int maxBufferSize); diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index e879050..aa28629 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -4,6 +4,7 @@ #include #include "test/test_activations.cpp" #include "test/test_conv1d.cpp" +#include "test/test_conv_1x1.cpp" #include "test/test_convnet.cpp" #include "test/test_dsp.cpp" #include "test/test_fast_lut.cpp" @@ -83,6 +84,21 @@ int main() test_conv1d::test_process_grouped_channel_isolation(); test_conv1d::test_get_num_weights_grouped(); + test_conv_1x1::test_construct(); + test_conv_1x1::test_construct_with_groups(); + test_conv_1x1::test_construct_validation_in_channels(); + test_conv_1x1::test_construct_validation_out_channels(); + test_conv_1x1::test_process_basic(); + test_conv_1x1::test_process_with_bias(); + test_conv_1x1::test_process_underscore(); + test_conv_1x1::test_process_grouped_basic(); + test_conv_1x1::test_process_grouped_with_bias(); + test_conv_1x1::test_process_grouped_multiple_groups(); + test_conv_1x1::test_process_grouped_channel_isolation(); + test_conv_1x1::test_process_underscore_grouped(); + test_conv_1x1::test_set_max_buffer_size(); + test_conv_1x1::test_process_multiple_calls(); + test_wavenet::test_layer::test_gated(); test_wavenet::test_layer::test_layer_getters(); test_wavenet::test_layer::test_non_gated_layer(); diff --git a/tools/test/test_conv_1x1.cpp b/tools/test/test_conv_1x1.cpp new file mode 100644 index 0000000..cb3e234 --- /dev/null +++ b/tools/test/test_conv_1x1.cpp @@ -0,0 +1,495 @@ +// Tests for Conv1x1 + +#include +#include +#include +#include +#include +#include + +#include "NAM/dsp.h" + +namespace test_conv_1x1 +{ +// Test basic construction +void test_construct() +{ + const int in_channels = 2; + const int out_channels = 3; + const bool do_bias = false; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + assert(conv.get_in_channels() == in_channels); + assert(conv.get_out_channels() == out_channels); +} + +// Test construction with groups (default should be 1) +void test_construct_with_groups() +{ + const int in_channels = 4; + const int out_channels = 6; + const bool do_bias = false; + const int groups = 2; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + assert(conv.get_in_channels() == in_channels); + assert(conv.get_out_channels() == out_channels); +} + +// Test construction validation - in_channels not divisible by groups +void test_construct_validation_in_channels() +{ + const int in_channels = 5; + const int out_channels = 6; + const bool do_bias = false; + const int groups = 2; // 5 not divisible by 2 + bool threw = false; + try + { + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + } + catch (const std::runtime_error&) + { + threw = true; + } + assert(threw); +} + +// Test construction validation - out_channels not divisible by groups +void test_construct_validation_out_channels() +{ + const int in_channels = 4; + const int out_channels = 5; + const bool do_bias = false; + const int groups = 2; // 5 not divisible by 2 + bool threw = false; + try + { + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + } + catch (const std::runtime_error&) + { + threw = true; + } + assert(threw); +} + +// Test basic process without groups +void test_process_basic() +{ + const int in_channels = 2; + const int out_channels = 3; + const bool do_bias = false; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + const int num_frames = 2; + + // Set weights: 3x2 matrix + // [1.0, 2.0] + // [3.0, 4.0] + // [5.0, 6.0] + std::vector weights{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 1.0f; + input(1, 0) = 2.0f; + input(0, 1) = 3.0f; + input(1, 1) = 4.0f; + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Frame 0: [1.0, 2.0] * [1.0; 2.0] = [5.0, 11.0, 17.0] + assert(std::abs(output(0, 0) - 5.0f) < 0.01f); // 1.0*1.0 + 2.0*2.0 + assert(std::abs(output(1, 0) - 11.0f) < 0.01f); // 3.0*1.0 + 4.0*2.0 + assert(std::abs(output(2, 0) - 17.0f) < 0.01f); // 5.0*1.0 + 6.0*2.0 + // Frame 1: [1.0, 2.0] * [3.0; 4.0] = [11.0, 25.0, 39.0] + assert(std::abs(output(0, 1) - 11.0f) < 0.01f); // 1.0*3.0 + 2.0*4.0 + assert(std::abs(output(1, 1) - 25.0f) < 0.01f); // 3.0*3.0 + 4.0*4.0 + assert(std::abs(output(2, 1) - 39.0f) < 0.01f); // 5.0*3.0 + 6.0*4.0 +} + +// Test process with bias +void test_process_with_bias() +{ + const int in_channels = 2; + const int out_channels = 2; + const bool do_bias = true; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + const int num_frames = 1; + + // Set weights: 2x2 identity matrix + // [1.0, 0.0] + // [0.0, 1.0] + // Bias: [10.0, 20.0] + std::vector weights{1.0f, 0.0f, 0.0f, 1.0f, 10.0f, 20.0f}; + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 5.0f; + input(1, 0) = 7.0f; + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Output should be input + bias (identity weights) + assert(std::abs(output(0, 0) - 15.0f) < 0.01f); // 5.0 + 10.0 + assert(std::abs(output(1, 0) - 27.0f) < 0.01f); // 7.0 + 20.0 +} + +// Test process_ method (stores to internal buffer) +void test_process_underscore() +{ + const int in_channels = 2; + const int out_channels = 2; + const bool do_bias = false; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + const int num_frames = 1; + + // Set weights: 2x2 identity matrix + std::vector weights{1.0f, 0.0f, 0.0f, 1.0f}; + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 3.0f; + input(1, 0) = 4.0f; + + conv.process_(input, num_frames); + auto output = conv.GetOutput().leftCols(num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + assert(std::abs(output(0, 0) - 3.0f) < 0.01f); + assert(std::abs(output(1, 0) - 4.0f) < 0.01f); +} + +// Test basic grouped convolution with 2 groups +void test_process_grouped_basic() +{ + const int in_channels = 4; + const int out_channels = 4; + const bool do_bias = false; + const int groups = 2; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + const int num_frames = 2; + + // For grouped convolution with 2 groups: + // Group 0: processes in_channels[0:1] -> out_channels[0:1] + // Group 1: processes in_channels[2:3] -> out_channels[2:3] + // Each group has out_per_group=2, in_per_group=2 + // Weight layout: [group0, group1] + // Group 0: identity matrix (2x2) + // Group 1: scale by 2.0 (2x2) + std::vector weights; + // Group 0: identity + weights.push_back(1.0f); // out[0], in[0] + weights.push_back(0.0f); // out[0], in[1] + weights.push_back(0.0f); // out[1], in[0] + weights.push_back(1.0f); // out[1], in[1] + // Group 1: scale by 2.0 + weights.push_back(2.0f); // out[2], in[2] + weights.push_back(0.0f); // out[2], in[3] + weights.push_back(0.0f); // out[3], in[2] + weights.push_back(2.0f); // out[3], in[3] + + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 1.0f; // Group 0, channel 0 + input(1, 0) = 2.0f; // Group 0, channel 1 + input(2, 0) = 3.0f; // Group 1, channel 0 + input(3, 0) = 4.0f; // Group 1, channel 1 + input(0, 1) = 5.0f; + input(1, 1) = 6.0f; + input(2, 1) = 7.0f; + input(3, 1) = 8.0f; + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Group 0: identity transformation + assert(std::abs(output(0, 0) - 1.0f) < 0.01f); // out[0] = in[0] + assert(std::abs(output(1, 0) - 2.0f) < 0.01f); // out[1] = in[1] + // Group 1: double transformation + assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // out[2] = 2.0 * in[2] + assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // out[3] = 2.0 * in[3] + // Frame 1 + assert(std::abs(output(0, 1) - 5.0f) < 0.01f); + assert(std::abs(output(1, 1) - 6.0f) < 0.01f); + assert(std::abs(output(2, 1) - 14.0f) < 0.01f); // 2.0 * 7.0 + assert(std::abs(output(3, 1) - 16.0f) < 0.01f); // 2.0 * 8.0 +} + +// Test grouped convolution with bias +void test_process_grouped_with_bias() +{ + const int in_channels = 4; + const int out_channels = 4; + const bool do_bias = true; + const int groups = 2; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + const int num_frames = 1; + + std::vector weights; + // Group 0 weights (2x2 identity) + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + // Group 1 weights (2x2 identity) + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + // Bias: [1.0, 2.0, 3.0, 4.0] + weights.push_back(1.0f); + weights.push_back(2.0f); + weights.push_back(3.0f); + weights.push_back(4.0f); + + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 10.0f; + input(1, 0) = 20.0f; + input(2, 0) = 30.0f; + input(3, 0) = 40.0f; + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Output should be input + bias (identity weights) + assert(std::abs(output(0, 0) - 11.0f) < 0.01f); // 10.0 + 1.0 + assert(std::abs(output(1, 0) - 22.0f) < 0.01f); // 20.0 + 2.0 + assert(std::abs(output(2, 0) - 33.0f) < 0.01f); // 30.0 + 3.0 + assert(std::abs(output(3, 0) - 44.0f) < 0.01f); // 40.0 + 4.0 +} + +// Test grouped convolution with 4 groups +void test_process_grouped_multiple_groups() +{ + const int in_channels = 8; + const int out_channels = 8; + const bool do_bias = false; + const int groups = 4; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + const int num_frames = 1; + + // Each group processes 2 input channels -> 2 output channels + std::vector weights; + // Group 0: scale by 1.0 + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + // Group 1: scale by 2.0 + weights.push_back(2.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(2.0f); + // Group 2: scale by 3.0 + weights.push_back(3.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(3.0f); + // Group 3: scale by 4.0 + weights.push_back(4.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(4.0f); + + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + for (int i = 0; i < in_channels; i++) + { + input(i, 0) = static_cast(i + 1); + } + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Group 0: channels 0-1 scaled by 1.0 + assert(std::abs(output(0, 0) - 1.0f) < 0.01f); + assert(std::abs(output(1, 0) - 2.0f) < 0.01f); + // Group 1: channels 2-3 scaled by 2.0 + assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // 3.0 * 2.0 + assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // 4.0 * 2.0 + // Group 2: channels 4-5 scaled by 3.0 + assert(std::abs(output(4, 0) - 15.0f) < 0.01f); // 5.0 * 3.0 + assert(std::abs(output(5, 0) - 18.0f) < 0.01f); // 6.0 * 3.0 + // Group 3: channels 6-7 scaled by 4.0 + assert(std::abs(output(6, 0) - 28.0f) < 0.01f); // 7.0 * 4.0 + assert(std::abs(output(7, 0) - 32.0f) < 0.01f); // 8.0 * 4.0 +} + +// Test that groups don't mix channels (channel isolation) +void test_process_grouped_channel_isolation() +{ + const int in_channels = 6; + const int out_channels = 6; + const bool do_bias = false; + const int groups = 3; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + const int num_frames = 1; + + // 3 groups, each processes 2 channels + // Group 0: channels 0-1, set to zero (zero matrix) + // Group 1: channels 2-3, identity + // Group 2: channels 4-5, identity + std::vector weights; + // Group 0: zero matrix + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + // Group 1: identity + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + // Group 2: identity + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 10.0f; // Should be zeroed by group 0 + input(1, 0) = 20.0f; // Should be zeroed by group 0 + input(2, 0) = 30.0f; // Should pass through group 1 + input(3, 0) = 40.0f; // Should pass through group 1 + input(4, 0) = 50.0f; // Should pass through group 2 + input(5, 0) = 60.0f; // Should pass through group 2 + + Eigen::MatrixXf output = conv.process(input, num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + // Group 0: should be zero + assert(std::abs(output(0, 0)) < 0.01f); + assert(std::abs(output(1, 0)) < 0.01f); + // Group 1: should pass through + assert(std::abs(output(2, 0) - 30.0f) < 0.01f); + assert(std::abs(output(3, 0) - 40.0f) < 0.01f); + // Group 2: should pass through + assert(std::abs(output(4, 0) - 50.0f) < 0.01f); + assert(std::abs(output(5, 0) - 60.0f) < 0.01f); +} + +// Test process_ with groups +void test_process_underscore_grouped() +{ + const int in_channels = 4; + const int out_channels = 4; + const bool do_bias = false; + const int groups = 2; + nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups); + const int num_frames = 1; + + // Group 0: identity, Group 1: scale by 2.0 + std::vector weights; + // Group 0: identity + weights.push_back(1.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(1.0f); + // Group 1: scale by 2.0 + weights.push_back(2.0f); + weights.push_back(0.0f); + weights.push_back(0.0f); + weights.push_back(2.0f); + + auto it = weights.begin(); + conv.set_weights_(it); + + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input(in_channels, num_frames); + input(0, 0) = 1.0f; + input(1, 0) = 2.0f; + input(2, 0) = 3.0f; + input(3, 0) = 4.0f; + + conv.process_(input, num_frames); + auto output = conv.GetOutput().leftCols(num_frames); + + assert(output.rows() == out_channels); + assert(output.cols() == num_frames); + assert(std::abs(output(0, 0) - 1.0f) < 0.01f); + assert(std::abs(output(1, 0) - 2.0f) < 0.01f); + assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // 2.0 * 3.0 + assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // 2.0 * 4.0 +} + +// Test SetMaxBufferSize +void test_set_max_buffer_size() +{ + const int in_channels = 2; + const int out_channels = 3; + const bool do_bias = false; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + const int maxBufferSize = 128; + + conv.SetMaxBufferSize(maxBufferSize); + auto output = conv.GetOutput(); + assert(output.rows() == out_channels); + assert(output.cols() == maxBufferSize); +} + +// Test multiple calls to process +void test_process_multiple_calls() +{ + const int in_channels = 2; + const int out_channels = 2; + const bool do_bias = false; + nam::Conv1x1 conv(in_channels, out_channels, do_bias); + // Identity matrix + std::vector weights{1.0f, 0.0f, 0.0f, 1.0f}; + auto it = weights.begin(); + conv.set_weights_(it); + conv.SetMaxBufferSize(64); + + Eigen::MatrixXf input1(in_channels, 1); + input1(0, 0) = 1.0f; + input1(1, 0) = 2.0f; + + Eigen::MatrixXf output1 = conv.process(input1, 1); + assert(std::abs(output1(0, 0) - 1.0f) < 0.01f); + assert(std::abs(output1(1, 0) - 2.0f) < 0.01f); + + Eigen::MatrixXf input2(in_channels, 1); + input2(0, 0) = 3.0f; + input2(1, 0) = 4.0f; + + Eigen::MatrixXf output2 = conv.process(input2, 1); + assert(std::abs(output2(0, 0) - 3.0f) < 0.01f); + assert(std::abs(output2(1, 0) - 4.0f) < 0.01f); +} +} // namespace test_conv_1x1 diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index 2fc20c9..3d20679 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -27,9 +27,10 @@ void test_wavenet_model() const float head_scale = 1.0f; const bool with_head = false; const int groups = 1; + const int groups_1x1 = 1; nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, - std::move(dilations), activation, gated, head_bias, groups); + std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -84,14 +85,15 @@ void test_wavenet_multiple_arrays() std::vector layer_array_params; // First array std::vector dilations1{1}; + const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, kernel_size, std::move(dilations1), activation, gated, - head_bias, groups)); + head_bias, groups, groups_1x1)); // Second array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, kernel_size, std::move(dilations2), activation, gated, - head_bias, groups)); + head_bias, groups, groups_1x1)); std::vector weights; // Array 0: rechannel, layer, head_rechannel @@ -133,9 +135,10 @@ void test_wavenet_zero_input() const float head_scale = 1.0f; const bool with_head = false; const int groups = 1; + const int groups_1x1 = 1; nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, - std::move(dilations), activation, gated, head_bias, groups); + std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -173,9 +176,10 @@ void test_wavenet_different_buffer_sizes() const float head_scale = 1.0f; const bool with_head = false; const int groups = 1; + const int groups_1x1 = 1; nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, - std::move(dilations), activation, gated, head_bias, groups); + std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -214,9 +218,10 @@ void test_wavenet_prewarm() const float head_scale = 1.0f; const bool with_head = false; const int groups = 1; + const int groups_1x1 = 1; nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size, - std::move(dilations), activation, gated, head_bias, groups); + std::move(dilations), activation, gated, head_bias, groups, groups_1x1); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index 1a27bea..10eccf4 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -23,7 +23,9 @@ void test_gated() const std::string activation = "ReLU"; const bool gated = true; const int groups_input = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input); + const int groups_1x1 = 1; + auto layer = + nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // Conv, input mixin, 1x1 std::vector weights{ @@ -95,8 +97,10 @@ void test_layer_getters() const std::string activation = "Tanh"; const bool gated = false; const int groups_input = 1; + const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input); + auto layer = + nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); assert(layer.get_channels() == channels); assert(layer.get_kernel_size() == kernelSize); @@ -113,8 +117,10 @@ void test_non_gated_layer() const std::string activation = "ReLU"; const bool gated = false; const int groups_input = 1; + const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input); + auto layer = + nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel // Conv: (1,1,1) weight + (1,) bias @@ -178,7 +184,9 @@ void test_layer_activations() // Test Tanh activation { const int groups_input = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input); + const int groups_1x1 = 1; + auto layer = + nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1); std::vector weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f}; auto it = weights.begin(); layer.set_weights_(it); @@ -210,8 +218,10 @@ void test_layer_multichannel() const std::string activation = "ReLU"; const bool gated = false; const int groups_input = 1; + const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input); + auto layer = + nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1); assert(layer.get_channels() == channels); diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index 7614562..41c435a 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -25,9 +25,10 @@ void test_layer_array_basic() const bool gated = false; const bool head_bias = false; const int groups = 1; + const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray( - input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, + activation, gated, head_bias, groups, groups_1x1); const int numFrames = 4; layer_array.SetMaxBufferSize(numFrames); @@ -80,9 +81,10 @@ void test_layer_array_receptive_field() const bool gated = false; const bool head_bias = false; const int groups = 1; + const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray( - input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, + activation, gated, head_bias, groups, groups_1x1); long rf = layer_array.get_receptive_field(); // Expected: sum of dilation * (kernel_size - 1) for each layer @@ -107,9 +109,10 @@ void test_layer_array_with_head_input() const bool gated = false; const bool head_bias = false; const int groups = 1; + const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray( - input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, + activation, gated, head_bias, groups, groups_1x1); const int numFrames = 2; layer_array.SetMaxBufferSize(numFrames); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 3991164..a7a5e8f 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -434,8 +434,10 @@ void test_layer_process_realtime_safe() const std::string activation = "ReLU"; const bool gated = false; const int groups_input = 1; + const int groups_1x1 = 1; - auto layer = nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input); + auto layer = + nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1); // Set weights std::vector weights{1.0f, 0.0f, // Conv (weight, bias) @@ -486,8 +488,10 @@ void test_layer_grouped_process_realtime_safe() const std::string activation = "ReLU"; const bool gated = false; const int groups_input = 2; // groups_input > 1 + const int groups_1x1 = 2; // 1x1 is also grouped - auto layer = nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input); + auto layer = + nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1); // Set weights for grouped convolution // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels @@ -525,16 +529,20 @@ void test_layer_grouped_process_realtime_safe() weights.push_back(1.0f); weights.push_back(1.0f); weights.push_back(1.0f); - // 1x1: (channels, channels) = (4, 4) weights + (4,) bias - // Identity matrix - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - weights.push_back((i == j) ? 1.0f : 0.0f); - } - } - // 1x1 bias: zeros + // 1x1: grouped with groups_1x1=2, channels=4 + // Each group processes 2 channels: Group 0 (channels 0-1), Group 1 (channels 2-3) + // Weight layout: for each group g, for each (out_ch, in_ch) in that group + // Group 0: identity matrix for channels 0-1 (2x2) + weights.push_back(1.0f); // out_ch=0, in_ch=0 + weights.push_back(0.0f); // out_ch=0, in_ch=1 + weights.push_back(0.0f); // out_ch=1, in_ch=0 + weights.push_back(1.0f); // out_ch=1, in_ch=1 + // Group 1: identity matrix for channels 2-3 (2x2) + weights.push_back(1.0f); // out_ch=2, in_ch=2 + weights.push_back(0.0f); // out_ch=2, in_ch=3 + weights.push_back(0.0f); // out_ch=3, in_ch=2 + weights.push_back(1.0f); // out_ch=3, in_ch=3 + // 1x1 bias: 4 values (one per output channel) weights.push_back(0.0f); weights.push_back(0.0f); weights.push_back(0.0f); @@ -590,9 +598,10 @@ void test_layer_array_process_realtime_safe() const bool gated = false; const bool head_bias = false; const int groups = 1; + const int groups_1x1 = 1; - auto layer_array = nam::wavenet::_LayerArray( - input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups); + auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations, + activation, gated, head_bias, groups, groups_1x1); // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) std::vector weights{1.0f, // Rechannel @@ -657,14 +666,15 @@ void test_process_realtime_safe() std::vector layer_array_params; // First layer array std::vector dilations1{1}; + const int groups_1x1 = 1; layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels, kernel_size, std::move(dilations1), activation, gated, - head_bias, groups)); + head_bias, groups, groups_1x1)); // Second layer array (head_size of first must match channels of second) std::vector dilations2{1}; layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels, kernel_size, std::move(dilations2), activation, gated, - head_bias, groups)); + head_bias, groups, groups_1x1)); // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) // Array 1: same structure