diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp
index 8940314..dc46891 100644
--- a/NAM/dsp.cpp
+++ b/NAM/dsp.cpp
@@ -2,6 +2,7 @@
 #include <cmath> // pow, tanh, expf
 #include <filesystem>
 #include <fstream>
+#include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -206,8 +207,21 @@ std::unique_ptr<nam::DSP> nam::linear::Factory(const nlohmann::json& config, std
 
 // Conv1x1 ====================================================================
 
-nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias)
+nam::Conv1x1::Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups)
 {
+  // Validate that channels divide evenly by groups
+  if (in_channels % groups != 0)
+  {
+    throw std::runtime_error("in_channels (" + std::to_string(in_channels) + ") must be divisible by numGroups ("
+                             + std::to_string(groups) + ")");
+  }
+  if (out_channels % groups != 0)
+  {
+    throw std::runtime_error("out_channels (" + std::to_string(out_channels) + ") must be divisible by numGroups ("
+                             + std::to_string(groups) + ")");
+  }
+
+  this->_num_groups = groups;
   this->_weight.resize(out_channels, in_channels);
   this->_do_bias = _bias;
   if (_bias)
@@ -222,9 +236,28 @@ void nam::Conv1x1::SetMaxBufferSize(const int maxBufferSize)
 
 void nam::Conv1x1::set_weights_(std::vector<float>::iterator& weights)
 {
-  for (int i = 0; i < this->_weight.rows(); i++)
-    for (int j = 0; j < this->_weight.cols(); j++)
-      this->_weight(i, j) = *(weights++);
+  if (this->_weight.size() > 0)
+  {
+    const long out_channels = this->_weight.rows();
+    const long in_channels = this->_weight.cols();
+    const int numGroups = this->_num_groups;
+    const long out_per_group = out_channels / numGroups;
+    const long in_per_group = in_channels / numGroups;
+
+    // For grouped convolutions, weights are organized per group
+    // Weight layout: weights are [group0, group1, ..., groupN-1]
+    // Each group's weight matrix is (out_channels/numGroups, in_channels/numGroups)
+    for (int g = 0; g < numGroups; g++)
+    {
+      for (auto i = 0; i < out_per_group; i++)
+      {
+        for (auto j = 0; j < in_per_group; j++)
+        {
+          this->_weight(g * out_per_group + i, g * in_per_group + j) = *(weights++);
+        }
+      }
+    }
+  }
   if (this->_do_bias)
     for (int i = 0; i < this->_bias.size(); i++)
       this->_bias(i) = *(weights++);
@@ -232,16 +265,85 @@ void nam::Conv1x1::set_weights_(std::vector<float>::iterator& weights)
 
 Eigen::MatrixXf nam::Conv1x1::process(const Eigen::MatrixXf& input, const int num_frames) const
 {
-  if (this->_do_bias)
-    return (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
+  const int numGroups = this->_num_groups;
+  const long in_channels = get_in_channels();
+  const long out_channels = get_out_channels();
+  const long in_per_group = in_channels / numGroups;
+  const long out_per_group = out_channels / numGroups;
+
+  Eigen::MatrixXf result(out_channels, num_frames);
+
+  if (numGroups == 1)
+  {
+    // Standard convolution (no grouping)
+    if (this->_do_bias)
+      result = (this->_weight * input.leftCols(num_frames)).colwise() + this->_bias;
+    else
+      result = this->_weight * input.leftCols(num_frames);
+  }
   else
-    return this->_weight * input.leftCols(num_frames);
+  {
+    // Grouped convolution: process each group separately
+    result.setZero();
+    for (int g = 0; g < numGroups; g++)
+    {
+      // Extract input slice for this group
+      auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);
+
+      // Extract weight slice for this group
+      auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);
+
+      // Extract output slice for this group
+      auto output_group = result.middleRows(g * out_per_group, out_per_group);
+
+      // Perform grouped convolution: output_group = weight_group * input_group
+      output_group.noalias() = weight_group * input_group;
+    }
+
+    // Add bias if present
+    if (this->_do_bias)
+      result.colwise() += this->_bias;
+  }
+
+  return result;
 }
 
 void nam::Conv1x1::process_(const Eigen::MatrixXf& input, const int num_frames)
 {
   assert(num_frames <= _output.cols());
-  _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);
+
+  const int numGroups = this->_num_groups;
+  const long in_channels = get_in_channels();
+  const long out_channels = get_out_channels();
+  const long in_per_group = in_channels / numGroups;
+  const long out_per_group = out_channels / numGroups;
+
+  if (numGroups == 1)
+  {
+    // Standard convolution (no grouping)
+    _output.leftCols(num_frames).noalias() = this->_weight * input.leftCols(num_frames);
+  }
+  else
+  {
+    // Grouped convolution: process each group separately
+    _output.leftCols(num_frames).setZero();
+    for (int g = 0; g < numGroups; g++)
+    {
+      // Extract input slice for this group
+      auto input_group = input.leftCols(num_frames).middleRows(g * in_per_group, in_per_group);
+
+      // Extract weight slice for this group
+      auto weight_group = this->_weight.block(g * out_per_group, g * in_per_group, out_per_group, in_per_group);
+
+      // Extract output slice for this group
+      auto output_group = _output.leftCols(num_frames).middleRows(g * out_per_group, out_per_group);
+
+      // Perform grouped convolution: output_group = weight_group * input_group
+      output_group.noalias() = weight_group * input_group;
+    }
+  }
+
+  // Add bias if present
   if (this->_do_bias)
   {
     _output.leftCols(num_frames).colwise() += this->_bias;
diff --git a/NAM/dsp.h b/NAM/dsp.h
index 3f9df92..f359a68 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -177,7 +177,7 @@ std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& w
 class Conv1x1
 {
 public:
-  Conv1x1(const int in_channels, const int out_channels, const bool _bias);
+  Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups = 1);
   // Get the entire internal output buffer. This is intended for internal wiring
   // between layers/arrays; callers should treat the buffer as pre-allocated
   // storage and only consider the first `num_frames` columns valid for a given
@@ -199,6 +199,7 @@ class Conv1x1
 protected:
   Eigen::MatrixXf _weight;
   Eigen::VectorXf _bias;
+  int _num_groups;
 
 private:
   Eigen::MatrixXf _output;
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
index a1075ae..2a1aba7 100644
--- a/NAM/wavenet.cpp
+++ b/NAM/wavenet.cpp
@@ -74,13 +74,13 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma
 nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size,
                                        const int channels, const int kernel_size, const std::vector<int>& dilations,
                                        const std::string activation, const bool gated, const bool head_bias,
-                                       const int groups_input)
+                                       const int groups_input, const int groups_1x1)
 : _rechannel(input_size, channels, false)
 , _head_rechannel(channels, head_size, head_bias)
 {
   for (size_t i = 0; i < dilations.size(); i++)
     this->_layers.push_back(
-      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input));
+      _Layer(condition_size, channels, kernel_size, dilations[i], activation, gated, groups_input, groups_1x1));
 }
 
 void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize)
@@ -201,7 +201,7 @@ nam::wavenet::WaveNet::WaveNet(const std::vector<nam::wavenet::LayerArrayParams>
       layer_array_params[i].input_size, layer_array_params[i].condition_size, layer_array_params[i].head_size,
       layer_array_params[i].channels, layer_array_params[i].kernel_size, layer_array_params[i].dilations,
       layer_array_params[i].activation, layer_array_params[i].gated, layer_array_params[i].head_bias,
-      layer_array_params[i].groups_input));
+      layer_array_params[i].groups_input, layer_array_params[i].groups_1x1));
     if (i > 0)
       if (layer_array_params[i].channels != layer_array_params[i - 1].head_size)
       {
@@ -299,10 +299,11 @@ std::unique_ptr<nam::DSP> nam::wavenet::Factory(const nlohmann::json& config, st
   {
     nlohmann::json layer_config = config["layers"][i];
     const int groups = layer_config.value("groups", 1); // defaults to 1
+    const int groups_1x1 = layer_config.value("groups_1x1", 1); // defaults to 1
     layer_array_params.push_back(nam::wavenet::LayerArrayParams(
       layer_config["input_size"], layer_config["condition_size"], layer_config["head_size"], layer_config["channels"],
       layer_config["kernel_size"], layer_config["dilations"], layer_config["activation"], layer_config["gated"],
-      layer_config["head_bias"], groups));
+      layer_config["head_bias"], groups, groups_1x1));
   }
   const bool with_head = !config["head"].is_null();
   const float head_scale = config["head_scale"];
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index b460ef5..71d2eff 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -17,10 +17,10 @@ class _Layer
 {
 public:
   _Layer(const int condition_size, const int channels, const int kernel_size, const int dilation,
-         const std::string activation, const bool gated, const int groups_input)
+         const std::string activation, const bool gated, const int groups_input, const int groups_1x1)
   : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation, groups_input)
   , _input_mixin(condition_size, gated ? 2 * channels : channels, false)
-  , _1x1(channels, channels, true)
+  , _1x1(channels, channels, true, groups_1x1)
   , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
   , _gated(gated) {};
   // Resize all arrays to be able to process `maxBufferSize` frames.
@@ -78,7 +78,7 @@ class LayerArrayParams
 public:
   LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
                    const int kernel_size_, const std::vector<int>&& dilations_, const std::string activation_,
-                   const bool gated_, const bool head_bias_, const int groups_input)
+                   const bool gated_, const bool head_bias_, const int groups_input, const int groups_1x1_)
   : input_size(input_size_)
   , condition_size(condition_size_)
   , head_size(head_size_)
@@ -89,6 +89,7 @@ class LayerArrayParams
   , gated(gated_)
   , head_bias(head_bias_)
   , groups_input(groups_input)
+  , groups_1x1(groups_1x1_)
   {
   }
 
@@ -102,6 +103,7 @@ class LayerArrayParams
   const bool gated;
   const bool head_bias;
   const int groups_input;
+  const int groups_1x1;
 };
 
 // An array of layers with the same channels, kernel sizes, activations.
@@ -110,7 +112,7 @@ class _LayerArray
 public:
   _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
               const int kernel_size, const std::vector<int>& dilations, const std::string activation, const bool gated,
-              const bool head_bias, const int groups_input);
+              const bool head_bias, const int groups_input, const int groups_1x1);
 
   void SetMaxBufferSize(const int maxBufferSize);
 
diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
index e879050..aa28629 100644
--- a/tools/run_tests.cpp
+++ b/tools/run_tests.cpp
@@ -4,6 +4,7 @@
 #include <iostream>
 #include "test/test_activations.cpp"
 #include "test/test_conv1d.cpp"
+#include "test/test_conv_1x1.cpp"
 #include "test/test_convnet.cpp"
 #include "test/test_dsp.cpp"
 #include "test/test_fast_lut.cpp"
@@ -83,6 +84,21 @@ int main()
   test_conv1d::test_process_grouped_channel_isolation();
   test_conv1d::test_get_num_weights_grouped();
 
+  test_conv_1x1::test_construct();
+  test_conv_1x1::test_construct_with_groups();
+  test_conv_1x1::test_construct_validation_in_channels();
+  test_conv_1x1::test_construct_validation_out_channels();
+  test_conv_1x1::test_process_basic();
+  test_conv_1x1::test_process_with_bias();
+  test_conv_1x1::test_process_underscore();
+  test_conv_1x1::test_process_grouped_basic();
+  test_conv_1x1::test_process_grouped_with_bias();
+  test_conv_1x1::test_process_grouped_multiple_groups();
+  test_conv_1x1::test_process_grouped_channel_isolation();
+  test_conv_1x1::test_process_underscore_grouped();
+  test_conv_1x1::test_set_max_buffer_size();
+  test_conv_1x1::test_process_multiple_calls();
+
   test_wavenet::test_layer::test_gated();
   test_wavenet::test_layer::test_layer_getters();
   test_wavenet::test_layer::test_non_gated_layer();
diff --git a/tools/test/test_conv_1x1.cpp b/tools/test/test_conv_1x1.cpp
new file mode 100644
index 0000000..cb3e234
--- /dev/null
+++ b/tools/test/test_conv_1x1.cpp
@@ -0,0 +1,495 @@
+// Tests for Conv1x1
+
+#include <Eigen/Dense>
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <stdexcept>
+#include <vector>
+
+#include "NAM/dsp.h"
+
+namespace test_conv_1x1
+{
+// Test basic construction
+void test_construct()
+{
+  const int in_channels = 2;
+  const int out_channels = 3;
+  const bool do_bias = false;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  assert(conv.get_in_channels() == in_channels);
+  assert(conv.get_out_channels() == out_channels);
+}
+
+// Test construction with groups (default should be 1)
+void test_construct_with_groups()
+{
+  const int in_channels = 4;
+  const int out_channels = 6;
+  const bool do_bias = false;
+  const int groups = 2;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  assert(conv.get_in_channels() == in_channels);
+  assert(conv.get_out_channels() == out_channels);
+}
+
+// Test construction validation - in_channels not divisible by groups
+void test_construct_validation_in_channels()
+{
+  const int in_channels = 5;
+  const int out_channels = 6;
+  const bool do_bias = false;
+  const int groups = 2; // 5 not divisible by 2
+  bool threw = false;
+  try
+  {
+    nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  }
+  catch (const std::runtime_error&)
+  {
+    threw = true;
+  }
+  assert(threw);
+}
+
+// Test construction validation - out_channels not divisible by groups
+void test_construct_validation_out_channels()
+{
+  const int in_channels = 4;
+  const int out_channels = 5;
+  const bool do_bias = false;
+  const int groups = 2; // 5 not divisible by 2
+  bool threw = false;
+  try
+  {
+    nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  }
+  catch (const std::runtime_error&)
+  {
+    threw = true;
+  }
+  assert(threw);
+}
+
+// Test basic process without groups
+void test_process_basic()
+{
+  const int in_channels = 2;
+  const int out_channels = 3;
+  const bool do_bias = false;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  const int num_frames = 2;
+
+  // Set weights: 3x2 matrix
+  // [1.0, 2.0]
+  // [3.0, 4.0]
+  // [5.0, 6.0]
+  std::vector<float> weights{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 1.0f;
+  input(1, 0) = 2.0f;
+  input(0, 1) = 3.0f;
+  input(1, 1) = 4.0f;
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Frame 0: [1.0, 2.0] * [1.0; 2.0] = [5.0, 11.0, 17.0]
+  assert(std::abs(output(0, 0) - 5.0f) < 0.01f); // 1.0*1.0 + 2.0*2.0
+  assert(std::abs(output(1, 0) - 11.0f) < 0.01f); // 3.0*1.0 + 4.0*2.0
+  assert(std::abs(output(2, 0) - 17.0f) < 0.01f); // 5.0*1.0 + 6.0*2.0
+  // Frame 1: [1.0, 2.0] * [3.0; 4.0] = [11.0, 25.0, 39.0]
+  assert(std::abs(output(0, 1) - 11.0f) < 0.01f); // 1.0*3.0 + 2.0*4.0
+  assert(std::abs(output(1, 1) - 25.0f) < 0.01f); // 3.0*3.0 + 4.0*4.0
+  assert(std::abs(output(2, 1) - 39.0f) < 0.01f); // 5.0*3.0 + 6.0*4.0
+}
+
+// Test process with bias
+void test_process_with_bias()
+{
+  const int in_channels = 2;
+  const int out_channels = 2;
+  const bool do_bias = true;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  const int num_frames = 1;
+
+  // Set weights: 2x2 identity matrix
+  // [1.0, 0.0]
+  // [0.0, 1.0]
+  // Bias: [10.0, 20.0]
+  std::vector<float> weights{1.0f, 0.0f, 0.0f, 1.0f, 10.0f, 20.0f};
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 5.0f;
+  input(1, 0) = 7.0f;
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Output should be input + bias (identity weights)
+  assert(std::abs(output(0, 0) - 15.0f) < 0.01f); // 5.0 + 10.0
+  assert(std::abs(output(1, 0) - 27.0f) < 0.01f); // 7.0 + 20.0
+}
+
+// Test process_ method (stores to internal buffer)
+void test_process_underscore()
+{
+  const int in_channels = 2;
+  const int out_channels = 2;
+  const bool do_bias = false;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  const int num_frames = 1;
+
+  // Set weights: 2x2 identity matrix
+  std::vector<float> weights{1.0f, 0.0f, 0.0f, 1.0f};
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 3.0f;
+  input(1, 0) = 4.0f;
+
+  conv.process_(input, num_frames);
+  auto output = conv.GetOutput().leftCols(num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  assert(std::abs(output(0, 0) - 3.0f) < 0.01f);
+  assert(std::abs(output(1, 0) - 4.0f) < 0.01f);
+}
+
+// Test basic grouped convolution with 2 groups
+void test_process_grouped_basic()
+{
+  const int in_channels = 4;
+  const int out_channels = 4;
+  const bool do_bias = false;
+  const int groups = 2;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  const int num_frames = 2;
+
+  // For grouped convolution with 2 groups:
+  // Group 0: processes in_channels[0:1] -> out_channels[0:1]
+  // Group 1: processes in_channels[2:3] -> out_channels[2:3]
+  // Each group has out_per_group=2, in_per_group=2
+  // Weight layout: [group0, group1]
+  // Group 0: identity matrix (2x2)
+  // Group 1: scale by 2.0 (2x2)
+  std::vector<float> weights;
+  // Group 0: identity
+  weights.push_back(1.0f); // out[0], in[0]
+  weights.push_back(0.0f); // out[0], in[1]
+  weights.push_back(0.0f); // out[1], in[0]
+  weights.push_back(1.0f); // out[1], in[1]
+  // Group 1: scale by 2.0
+  weights.push_back(2.0f); // out[2], in[2]
+  weights.push_back(0.0f); // out[2], in[3]
+  weights.push_back(0.0f); // out[3], in[2]
+  weights.push_back(2.0f); // out[3], in[3]
+
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 1.0f; // Group 0, channel 0
+  input(1, 0) = 2.0f; // Group 0, channel 1
+  input(2, 0) = 3.0f; // Group 1, channel 0
+  input(3, 0) = 4.0f; // Group 1, channel 1
+  input(0, 1) = 5.0f;
+  input(1, 1) = 6.0f;
+  input(2, 1) = 7.0f;
+  input(3, 1) = 8.0f;
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Group 0: identity transformation
+  assert(std::abs(output(0, 0) - 1.0f) < 0.01f); // out[0] = in[0]
+  assert(std::abs(output(1, 0) - 2.0f) < 0.01f); // out[1] = in[1]
+  // Group 1: double transformation
+  assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // out[2] = 2.0 * in[2]
+  assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // out[3] = 2.0 * in[3]
+  // Frame 1
+  assert(std::abs(output(0, 1) - 5.0f) < 0.01f);
+  assert(std::abs(output(1, 1) - 6.0f) < 0.01f);
+  assert(std::abs(output(2, 1) - 14.0f) < 0.01f); // 2.0 * 7.0
+  assert(std::abs(output(3, 1) - 16.0f) < 0.01f); // 2.0 * 8.0
+}
+
+// Test grouped convolution with bias
+void test_process_grouped_with_bias()
+{
+  const int in_channels = 4;
+  const int out_channels = 4;
+  const bool do_bias = true;
+  const int groups = 2;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  const int num_frames = 1;
+
+  std::vector<float> weights;
+  // Group 0 weights (2x2 identity)
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+  // Group 1 weights (2x2 identity)
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+  // Bias: [1.0, 2.0, 3.0, 4.0]
+  weights.push_back(1.0f);
+  weights.push_back(2.0f);
+  weights.push_back(3.0f);
+  weights.push_back(4.0f);
+
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 10.0f;
+  input(1, 0) = 20.0f;
+  input(2, 0) = 30.0f;
+  input(3, 0) = 40.0f;
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Output should be input + bias (identity weights)
+  assert(std::abs(output(0, 0) - 11.0f) < 0.01f); // 10.0 + 1.0
+  assert(std::abs(output(1, 0) - 22.0f) < 0.01f); // 20.0 + 2.0
+  assert(std::abs(output(2, 0) - 33.0f) < 0.01f); // 30.0 + 3.0
+  assert(std::abs(output(3, 0) - 44.0f) < 0.01f); // 40.0 + 4.0
+}
+
+// Test grouped convolution with 4 groups
+void test_process_grouped_multiple_groups()
+{
+  const int in_channels = 8;
+  const int out_channels = 8;
+  const bool do_bias = false;
+  const int groups = 4;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  const int num_frames = 1;
+
+  // Each group processes 2 input channels -> 2 output channels
+  std::vector<float> weights;
+  // Group 0: scale by 1.0
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+  // Group 1: scale by 2.0
+  weights.push_back(2.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(2.0f);
+  // Group 2: scale by 3.0
+  weights.push_back(3.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(3.0f);
+  // Group 3: scale by 4.0
+  weights.push_back(4.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(4.0f);
+
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  for (int i = 0; i < in_channels; i++)
+  {
+    input(i, 0) = static_cast<float>(i + 1);
+  }
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Group 0: channels 0-1 scaled by 1.0
+  assert(std::abs(output(0, 0) - 1.0f) < 0.01f);
+  assert(std::abs(output(1, 0) - 2.0f) < 0.01f);
+  // Group 1: channels 2-3 scaled by 2.0
+  assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // 3.0 * 2.0
+  assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // 4.0 * 2.0
+  // Group 2: channels 4-5 scaled by 3.0
+  assert(std::abs(output(4, 0) - 15.0f) < 0.01f); // 5.0 * 3.0
+  assert(std::abs(output(5, 0) - 18.0f) < 0.01f); // 6.0 * 3.0
+  // Group 3: channels 6-7 scaled by 4.0
+  assert(std::abs(output(6, 0) - 28.0f) < 0.01f); // 7.0 * 4.0
+  assert(std::abs(output(7, 0) - 32.0f) < 0.01f); // 8.0 * 4.0
+}
+
+// Test that groups don't mix channels (channel isolation)
+void test_process_grouped_channel_isolation()
+{
+  const int in_channels = 6;
+  const int out_channels = 6;
+  const bool do_bias = false;
+  const int groups = 3;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  const int num_frames = 1;
+
+  // 3 groups, each processes 2 channels
+  // Group 0: channels 0-1, set to zero (zero matrix)
+  // Group 1: channels 2-3, identity
+  // Group 2: channels 4-5, identity
+  std::vector<float> weights;
+  // Group 0: zero matrix
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  // Group 1: identity
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+  // Group 2: identity
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 10.0f; // Should be zeroed by group 0
+  input(1, 0) = 20.0f; // Should be zeroed by group 0
+  input(2, 0) = 30.0f; // Should pass through group 1
+  input(3, 0) = 40.0f; // Should pass through group 1
+  input(4, 0) = 50.0f; // Should pass through group 2
+  input(5, 0) = 60.0f; // Should pass through group 2
+
+  Eigen::MatrixXf output = conv.process(input, num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  // Group 0: should be zero
+  assert(std::abs(output(0, 0)) < 0.01f);
+  assert(std::abs(output(1, 0)) < 0.01f);
+  // Group 1: should pass through
+  assert(std::abs(output(2, 0) - 30.0f) < 0.01f);
+  assert(std::abs(output(3, 0) - 40.0f) < 0.01f);
+  // Group 2: should pass through
+  assert(std::abs(output(4, 0) - 50.0f) < 0.01f);
+  assert(std::abs(output(5, 0) - 60.0f) < 0.01f);
+}
+
+// Test process_ with groups
+void test_process_underscore_grouped()
+{
+  const int in_channels = 4;
+  const int out_channels = 4;
+  const bool do_bias = false;
+  const int groups = 2;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias, groups);
+  const int num_frames = 1;
+
+  // Group 0: identity, Group 1: scale by 2.0
+  std::vector<float> weights;
+  // Group 0: identity
+  weights.push_back(1.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(1.0f);
+  // Group 1: scale by 2.0
+  weights.push_back(2.0f);
+  weights.push_back(0.0f);
+  weights.push_back(0.0f);
+  weights.push_back(2.0f);
+
+  auto it = weights.begin();
+  conv.set_weights_(it);
+
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input(in_channels, num_frames);
+  input(0, 0) = 1.0f;
+  input(1, 0) = 2.0f;
+  input(2, 0) = 3.0f;
+  input(3, 0) = 4.0f;
+
+  conv.process_(input, num_frames);
+  auto output = conv.GetOutput().leftCols(num_frames);
+
+  assert(output.rows() == out_channels);
+  assert(output.cols() == num_frames);
+  assert(std::abs(output(0, 0) - 1.0f) < 0.01f);
+  assert(std::abs(output(1, 0) - 2.0f) < 0.01f);
+  assert(std::abs(output(2, 0) - 6.0f) < 0.01f); // 2.0 * 3.0
+  assert(std::abs(output(3, 0) - 8.0f) < 0.01f); // 2.0 * 4.0
+}
+
+// Test SetMaxBufferSize
+void test_set_max_buffer_size()
+{
+  const int in_channels = 2;
+  const int out_channels = 3;
+  const bool do_bias = false;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  const int maxBufferSize = 128;
+
+  conv.SetMaxBufferSize(maxBufferSize);
+  auto output = conv.GetOutput();
+  assert(output.rows() == out_channels);
+  assert(output.cols() == maxBufferSize);
+}
+
+// Test multiple calls to process
+void test_process_multiple_calls()
+{
+  const int in_channels = 2;
+  const int out_channels = 2;
+  const bool do_bias = false;
+  nam::Conv1x1 conv(in_channels, out_channels, do_bias);
+  // Identity matrix
+  std::vector<float> weights{1.0f, 0.0f, 0.0f, 1.0f};
+  auto it = weights.begin();
+  conv.set_weights_(it);
+  conv.SetMaxBufferSize(64);
+
+  Eigen::MatrixXf input1(in_channels, 1);
+  input1(0, 0) = 1.0f;
+  input1(1, 0) = 2.0f;
+
+  Eigen::MatrixXf output1 = conv.process(input1, 1);
+  assert(std::abs(output1(0, 0) - 1.0f) < 0.01f);
+  assert(std::abs(output1(1, 0) - 2.0f) < 0.01f);
+
+  Eigen::MatrixXf input2(in_channels, 1);
+  input2(0, 0) = 3.0f;
+  input2(1, 0) = 4.0f;
+
+  Eigen::MatrixXf output2 = conv.process(input2, 1);
+  assert(std::abs(output2(0, 0) - 3.0f) < 0.01f);
+  assert(std::abs(output2(1, 0) - 4.0f) < 0.01f);
+}
+} // namespace test_conv_1x1
diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp
index 2fc20c9..3d20679 100644
--- a/tools/test/test_wavenet/test_full.cpp
+++ b/tools/test/test_wavenet/test_full.cpp
@@ -27,9 +27,10 @@ void test_wavenet_model()
   const float head_scale = 1.0f;
   const bool with_head = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
   nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
-                                        std::move(dilations), activation, gated, head_bias, groups);
+                                        std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
 
@@ -84,14 +85,15 @@ void test_wavenet_multiple_arrays()
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   // First array
   std::vector<int> dilations1{1};
+  const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
                                                               kernel_size, std::move(dilations1), activation, gated,
-                                                              head_bias, groups));
+                                                              head_bias, groups, groups_1x1));
   // Second array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
                                                               kernel_size, std::move(dilations2), activation, gated,
-                                                              head_bias, groups));
+                                                              head_bias, groups, groups_1x1));
 
   std::vector<float> weights;
   // Array 0: rechannel, layer, head_rechannel
@@ -133,9 +135,10 @@ void test_wavenet_zero_input()
   const float head_scale = 1.0f;
   const bool with_head = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
   nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
-                                        std::move(dilations), activation, gated, head_bias, groups);
+                                        std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
 
@@ -173,9 +176,10 @@ void test_wavenet_different_buffer_sizes()
   const float head_scale = 1.0f;
   const bool with_head = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
   nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
-                                        std::move(dilations), activation, gated, head_bias, groups);
+                                        std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
 
@@ -214,9 +218,10 @@ void test_wavenet_prewarm()
   const float head_scale = 1.0f;
   const bool with_head = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
   nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, kernel_size,
-                                        std::move(dilations), activation, gated, head_bias, groups);
+                                        std::move(dilations), activation, gated, head_bias, groups, groups_1x1);
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   layer_array_params.push_back(std::move(params));
 
diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp
index 1a27bea..10eccf4 100644
--- a/tools/test/test_wavenet/test_layer.cpp
+++ b/tools/test/test_wavenet/test_layer.cpp
@@ -23,7 +23,9 @@ void test_gated()
   const std::string activation = "ReLU";
   const bool gated = true;
   const int groups_input = 1;
-  auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input);
+  const int groups_1x1 = 1;
+  auto layer =
+    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   // Conv, input mixin, 1x1
   std::vector<float> weights{
@@ -95,8 +97,10 @@ void test_layer_getters()
   const std::string activation = "Tanh";
   const bool gated = false;
   const int groups_input = 1;
+  const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input);
+  auto layer =
+    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   assert(layer.get_channels() == channels);
   assert(layer.get_kernel_size() == kernelSize);
@@ -113,8 +117,10 @@ void test_non_gated_layer()
   const std::string activation = "ReLU";
   const bool gated = false;
   const int groups_input = 1;
+  const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input);
+  auto layer =
+    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel
   // Conv: (1,1,1) weight + (1,) bias
@@ -178,7 +184,9 @@ void test_layer_activations()
   // Test Tanh activation
   {
     const int groups_input = 1;
-    auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input);
+    const int groups_1x1 = 1;
+    auto layer =
+      nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, "Tanh", gated, groups_input, groups_1x1);
     std::vector<float> weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f};
     auto it = weights.begin();
     layer.set_weights_(it);
@@ -210,8 +218,10 @@ void test_layer_multichannel()
   const std::string activation = "ReLU";
   const bool gated = false;
   const int groups_input = 1;
+  const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input);
+  auto layer =
+    nam::wavenet::_Layer(conditionSize, channels, kernelSize, dilation, activation, gated, groups_input, groups_1x1);
 
   assert(layer.get_channels() == channels);
 
diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp
index 7614562..41c435a 100644
--- a/tools/test/test_wavenet/test_layer_array.cpp
+++ b/tools/test/test_wavenet/test_layer_array.cpp
@@ -25,9 +25,10 @@ void test_layer_array_basic()
   const bool gated = false;
   const bool head_bias = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(
-    input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
+                                               activation, gated, head_bias, groups, groups_1x1);
 
   const int numFrames = 4;
   layer_array.SetMaxBufferSize(numFrames);
@@ -80,9 +81,10 @@ void test_layer_array_receptive_field()
   const bool gated = false;
   const bool head_bias = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(
-    input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
+                                               activation, gated, head_bias, groups, groups_1x1);
 
   long rf = layer_array.get_receptive_field();
   // Expected: sum of dilation * (kernel_size - 1) for each layer
@@ -107,9 +109,10 @@ void test_layer_array_with_head_input()
   const bool gated = false;
   const bool head_bias = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(
-    input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
+                                               activation, gated, head_bias, groups, groups_1x1);
 
   const int numFrames = 2;
   layer_array.SetMaxBufferSize(numFrames);
diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp
index 3991164..a7a5e8f 100644
--- a/tools/test/test_wavenet/test_real_time_safe.cpp
+++ b/tools/test/test_wavenet/test_real_time_safe.cpp
@@ -434,8 +434,10 @@ void test_layer_process_realtime_safe()
   const std::string activation = "ReLU";
   const bool gated = false;
   const int groups_input = 1;
+  const int groups_1x1 = 1;
 
-  auto layer = nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input);
+  auto layer =
+    nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
 
   // Set weights
   std::vector<float> weights{1.0f, 0.0f, // Conv (weight, bias)
@@ -486,8 +488,10 @@ void test_layer_grouped_process_realtime_safe()
   const std::string activation = "ReLU";
   const bool gated = false;
   const int groups_input = 2; // groups_input > 1
+  const int groups_1x1 = 2; // 1x1 is also grouped
 
-  auto layer = nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input);
+  auto layer =
+    nam::wavenet::_Layer(condition_size, channels, kernel_size, dilation, activation, gated, groups_input, groups_1x1);
 
   // Set weights for grouped convolution
   // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels
@@ -525,16 +529,20 @@ void test_layer_grouped_process_realtime_safe()
   weights.push_back(1.0f);
   weights.push_back(1.0f);
   weights.push_back(1.0f);
-  // 1x1: (channels, channels) = (4, 4) weights + (4,) bias
-  // Identity matrix
-  for (int i = 0; i < 4; i++)
-  {
-    for (int j = 0; j < 4; j++)
-    {
-      weights.push_back((i == j) ? 1.0f : 0.0f);
-    }
-  }
-  // 1x1 bias: zeros
+  // 1x1: grouped with groups_1x1=2, channels=4
+  // Each group processes 2 channels: Group 0 (channels 0-1), Group 1 (channels 2-3)
+  // Weight layout: for each group g, for each (out_ch, in_ch) in that group
+  // Group 0: identity matrix for channels 0-1 (2x2)
+  weights.push_back(1.0f); // out_ch=0, in_ch=0
+  weights.push_back(0.0f); // out_ch=0, in_ch=1
+  weights.push_back(0.0f); // out_ch=1, in_ch=0
+  weights.push_back(1.0f); // out_ch=1, in_ch=1
+  // Group 1: identity matrix for channels 2-3 (2x2)
+  weights.push_back(1.0f); // out_ch=2, in_ch=2
+  weights.push_back(0.0f); // out_ch=2, in_ch=3
+  weights.push_back(0.0f); // out_ch=3, in_ch=2
+  weights.push_back(1.0f); // out_ch=3, in_ch=3
+  // 1x1 bias: 4 values (one per output channel)
   weights.push_back(0.0f);
   weights.push_back(0.0f);
   weights.push_back(0.0f);
@@ -590,9 +598,10 @@ void test_layer_array_process_realtime_safe()
   const bool gated = false;
   const bool head_bias = false;
   const int groups = 1;
+  const int groups_1x1 = 1;
 
-  auto layer_array = nam::wavenet::_LayerArray(
-    input_size, condition_size, head_size, channels, kernel_size, dilations, activation, gated, head_bias, groups);
+  auto layer_array = nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, kernel_size, dilations,
+                                               activation, gated, head_bias, groups, groups_1x1);
 
   // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1)
   std::vector<float> weights{1.0f, // Rechannel
@@ -657,14 +666,15 @@ void test_process_realtime_safe()
   std::vector<nam::wavenet::LayerArrayParams> layer_array_params;
   // First layer array
   std::vector<int> dilations1{1};
+  const int groups_1x1 = 1;
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, channels,
                                                               kernel_size, std::move(dilations1), activation, gated,
-                                                              head_bias, groups));
+                                                              head_bias, groups, groups_1x1));
   // Second layer array (head_size of first must match channels of second)
   std::vector<int> dilations2{1};
   layer_array_params.push_back(nam::wavenet::LayerArrayParams(head_size, condition_size, head_size, channels,
                                                               kernel_size, std::move(dilations2), activation, gated,
-                                                              head_bias, groups));
+                                                              head_bias, groups, groups_1x1));
 
   // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1)
   //          Array 1: same structure