Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions NAM/wavenet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize)
{
_conv.SetMaxBufferSize(maxBufferSize);
_input_mixin.SetMaxBufferSize(maxBufferSize);
_z.resize(this->_conv.get_out_channels(), maxBufferSize);
const long z_channels = this->_conv.get_out_channels(); // This is 2*bottleneck when gated, bottleneck when not
_z.resize(z_channels, maxBufferSize);
_1x1.SetMaxBufferSize(maxBufferSize);
// Pre-allocate output buffers
const long channels = this->get_channels();
this->_output_next_layer.resize(channels, maxBufferSize);
this->_output_head.resize(channels, maxBufferSize);
// _output_head stores the activated portion: bottleneck rows (the actual bottleneck value, not doubled)
this->_output_head.resize(this->_bottleneck, maxBufferSize);
}

void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)
Expand All @@ -30,7 +32,7 @@ void nam::wavenet::_Layer::set_weights_(std::vector<float>::iterator& weights)

void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames)
{
const long channels = this->get_channels();
const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels

// Step 1: input convolutions
this->_conv.Process(input, num_frames);
Expand All @@ -50,19 +52,20 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
// do this column-wise:
for (int i = 0; i < num_frames; i++)
{
this->_activation->apply(this->_z.block(0, i, channels, 1));
this->_activation->apply(this->_z.block(0, i, bottleneck, 1));
// TODO Need to support other activation functions here instead of hardcoded sigmoid
activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1));
activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(bottleneck, i, bottleneck, 1));
}
this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array();
_1x1.process_(_z.topRows(channels), num_frames); // Might not be RT safe
this->_z.block(0, 0, bottleneck, num_frames).array() *=
this->_z.block(bottleneck, 0, bottleneck, num_frames).array();
_1x1.process_(_z.topRows(bottleneck), num_frames); // Might not be RT safe
}

// Store output to head (skip connection: activated conv output)
if (!this->_gated)
this->_output_head.leftCols(num_frames).noalias() = this->_z.leftCols(num_frames);
else
this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(channels).leftCols(num_frames);
this->_output_head.leftCols(num_frames).noalias() = this->_z.topRows(bottleneck).leftCols(num_frames);
// Store output to next layer (residual connection: input + _1x1 output)
this->_output_next_layer.leftCols(num_frames).noalias() =
input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames);
Expand All @@ -72,15 +75,17 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
// LayerArray =================================================================

nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
const int channels, const int kernel_size, const std::vector<int>& dilations,
const std::string activation, const bool gated, const bool head_bias,
const int groups_input, const int groups_1x1)
const int channels, const int bottleneck, const int kernel_size,
const std::vector<int>& dilations, const std::string activation,
const bool gated, const bool head_bias, const int groups_input,
const int groups_1x1)
: _rechannel(input_size, channels, false)
, _head_rechannel(channels, head_size, head_bias)
, _head_rechannel(bottleneck, head_size, head_bias)
, _bottleneck(bottleneck)
{
for (size_t i = 0; i < dilations.size(); i++)
this->_layers.push_back(
_Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
this->_layers.push_back(_Layer(
condition_size, channels, bottleneck, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
}

void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
Expand All @@ -94,7 +99,7 @@ void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
// Pre-allocate output buffers
const long channels = this->_get_channels();
this->_layer_outputs.resize(channels, maxBufferSize);
this->_head_inputs.resize(channels, maxBufferSize);
this->_head_inputs.resize(this->_bottleneck, maxBufferSize);
}


Expand Down Expand Up @@ -199,9 +204,9 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
{
this->_layer_arrays.push_back(nam::wavenet::_LayerArray(
layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size,
layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gated,
layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
if (i > 0)
if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
{
Expand Down Expand Up @@ -300,8 +305,10 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
nlohmann::json layer_config = config["layers"][i];
const int groups = layer_config.value("groups", 1); // defaults to 1
const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
const int channels = layer_config["channels"];
const int bottleneck = layer_config.value("bottleneck", channels); // defaults to channels if not present
layer_array_params.push_back(nam::wavenet::LayerArrayParams(
layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], channels, bottleneck,
layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
layer_config["head_bias"], groups, groups_1x1));
}
Expand Down
31 changes: 20 additions & 11 deletions NAM/wavenet.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ namespace wavenet
class _Layer
{
public:
_Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
_Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation,
const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
: _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
, _input_mixin(condition_size, gated ? 2 * channels : channels, false)
, _1x1(channels, channels, true, groups_1x1)
: _conv(channels, gated ? 2 * bottleneck : bottleneck, kernel_size, true, dilation, groups_input)
, _input_mixin(condition_size, gated ? 2 * bottleneck : bottleneck, false)
, _1x1(bottleneck, channels, true, groups_1x1)
, _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
, _gated(gated) {};
, _gated(gated)
, _bottleneck(bottleneck) {};
// Resize all arrays to be able to process `maxBufferSize` frames.
void SetMaxBufferSize(const int maxBufferSize);
// Set the parameters of this module
Expand Down Expand Up @@ -71,18 +72,21 @@ class _Layer

activations::Activation* _activation;
const bool _gated;
const int _bottleneck; // Internal channel count (not doubled when gated)
};

class LayerArrayParams
{
public:
LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
const int bottleneck_, const int kernel_size_, const std::vector<int>&& dilations_,
const std::string activation_, const bool gated_, const bool head_bias_, const int groups_input,
const int groups_1x1_)
: input_size(input_size_)
, condition_size(condition_size_)
, head_size(head_size_)
, channels(channels_)
, bottleneck(bottleneck_)
, kernel_size(kernel_size_)
, dilations(std::move(dilations_))
, activation(activation_)
Expand All @@ -97,6 +101,7 @@ class LayerArrayParams
const int condition_size;
const int head_size;
const int channels;
const int bottleneck;
const int kernel_size;
std::vector<int> dilations;
const std::string activation;
Expand All @@ -111,8 +116,9 @@ class _LayerArray
{
public:
_LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
const bool head_bias, const int groups_input, const int groups_1x1);
const int bottleneck, const int kernel_size, const std::vector<int>& dilations,
const std::string activation, const bool gated, const bool head_bias, const int groups_input,
const int groups_1x1);

void SetMaxBufferSize(const int maxBufferSize);

Expand Down Expand Up @@ -150,12 +156,15 @@ class _LayerArray
std::vector<_Layer> _layers;
// Output from last layer (for next layer array)
Eigen::MatrixXf _layer_outputs;
// Accumulated head inputs from all layers
// Accumulated head inputs from all layers (bottleneck channels)
Eigen::MatrixXf _head_inputs;

// Rechannel for the head
// Rechannel for the head (bottleneck -> head_size)
Conv1x1 _head_rechannel;

// Bottleneck size (internal channel count)
const int _bottleneck;

long _get_channels() const;
// Common processing logic after head inputs are set
void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames);
Expand Down
2 changes: 1 addition & 1 deletion tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ set_target_properties(run_tests PROPERTIES COMPILE_OPTIONS "-O0")
# Release/RelWithDebInfo/MinSizeRel build types automatically define NDEBUG
# We use a compile option to undefine it, which works on GCC, Clang, and MSVC
target_compile_options(run_tests PRIVATE
$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-U_NDEBUG>
$<$<OR:$<CONFIG:Release>,$<CONFIG:RelWithDebInfo>,$<CONFIG:MinSizeRel>>:-UNDEBUG>
)

source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES})
Expand Down
3 changes: 3 additions & 0 deletions tools/run_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ int main()
test_wavenet::test_layer::test_non_gated_layer();
test_wavenet::test_layer::test_layer_activations();
test_wavenet::test_layer::test_layer_multichannel();
test_wavenet::test_layer::test_layer_bottleneck();
test_wavenet::test_layer::test_layer_bottleneck_gated();
test_wavenet::test_layer_array::test_layer_array_basic();
test_wavenet::test_layer_array::test_layer_array_receptive_field();
test_wavenet::test_layer_array::test_layer_array_with_head_input();
Expand All @@ -118,6 +120,7 @@ int main()
test_wavenet::test_conv1d_grouped_process_realtime_safe();
test_wavenet::test_conv1d_grouped_dilated_process_realtime_safe();
test_wavenet::test_layer_process_realtime_safe();
test_wavenet::test_layer_bottleneck_process_realtime_safe();
test_wavenet::test_layer_grouped_process_realtime_safe();
test_wavenet::test_layer_array_process_realtime_safe();
test_wavenet::test_process_realtime_safe();
Expand Down
21 changes: 13 additions & 8 deletions tools/test/test_wavenet/test_full.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ void test_wavenet_model()
const int condition_size = 1;
const int head_size = 1;
const int channels = 1;
const int bottleneck = channels;
const int kernel_size = 1;
std::vector<int> dilations{1};
const std::string activation = "ReLU";
Expand All @@ -29,7 +30,7 @@ void test_wavenet_model()
const int groups = 1;
const int groups_1x1 = 1;

nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
layer_array_params.push_back(std::move(params));
Expand Down Expand Up @@ -85,15 +86,16 @@ void test_wavenet_multiple_arrays()
std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
// First array
std::vector<int> dilations1{1};
const int bottleneck = channels;
const int groups_1x1 = 1;
layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
kernel_size, std::move(dilations1), activation, gated,
head_bias, groups, groups_1x1));
bottleneck, kernel_size, std::move(dilations1), activation,
gated, head_bias, groups, groups_1x1));
// Second array (head_size of first must match channels of second)
std::vector<int> dilations2{1};
layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
kernel_size, std::move(dilations2), activation, gated,
head_bias, groups, groups_1x1));
bottleneck, kernel_size, std::move(dilations2), activation,
gated, head_bias, groups, groups_1x1));

std::vector<float> weights;
// Array 0: rechannel, layer, head_rechannel
Expand Down Expand Up @@ -127,6 +129,7 @@ void test_wavenet_zero_input()
const int condition_size = 1;
const int head_size = 1;
const int channels = 1;
const int bottleneck = channels;
const int kernel_size = 1;
std::vector<int> dilations{1};
const std::string activation = "ReLU";
Expand All @@ -137,7 +140,7 @@ void test_wavenet_zero_input()
const int groups = 1;
const int groups_1x1 = 1;

nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
layer_array_params.push_back(std::move(params));
Expand Down Expand Up @@ -168,6 +171,7 @@ void test_wavenet_different_buffer_sizes()
const int condition_size = 1;
const int head_size = 1;
const int channels = 1;
const int bottleneck = channels;
const int kernel_size = 1;
std::vector<int> dilations{1};
const std::string activation = "ReLU";
Expand All @@ -178,7 +182,7 @@ void test_wavenet_different_buffer_sizes()
const int groups = 1;
const int groups_1x1 = 1;

nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
layer_array_params.push_back(std::move(params));
Expand Down Expand Up @@ -210,6 +214,7 @@ void test_wavenet_prewarm()
const int condition_size = 1;
const int head_size = 1;
const int channels = 1;
const int bottleneck = channels;
const int kernel_size = 3;
std::vector<int> dilations{1, 2, 4};
const std::string activation = "ReLU";
Expand All @@ -220,7 +225,7 @@ void test_wavenet_prewarm()
const int groups = 1;
const int groups_1x1 = 1;

nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size,
std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
layer_array_params.push_back(std::move(params));
Expand Down
Loading