Adding activation functions and fast LUT implementation (#177)

jfsantos · João Felipe Santos · web-flow · commit 6ea03b1b7baf · 2026-01-12T23:11:27.000-08:00
* Added a few new activation functions. LeakyReLU is broken because of how activations classes are pre-instantiated in activations.cpp.

* Fixed initialization issues with LeakyReLU, will require changes to code currently using the default constructor.

* Added tests to fast_lut

* Addressing comments

* Refactored FastLUT into an activation class, added functions to enable/disable LUT for tanh and sigmoid

---------

Co-authored-by: João Felipe Santos &lt;santosjf@pm.me&gt;
diff --git a/NAM/activations.cpp b/NAM/activations.cpp
@@ -4,16 +4,21 @@ nam::activations::ActivationTanh _TANH = nam::activations::ActivationTanh();
 nam::activations::ActivationFastTanh _FAST_TANH = nam::activations::ActivationFastTanh();
 nam::activations::ActivationHardTanh _HARD_TANH = nam::activations::ActivationHardTanh();
 nam::activations::ActivationReLU _RELU = nam::activations::ActivationReLU();
-nam::activations::ActivationLeakyReLU _LEAKY_RELU = nam::activations::ActivationLeakyReLU();
+nam::activations::ActivationLeakyReLU _LEAKY_RELU = nam::activations::ActivationLeakyReLU(0.01); //FIXME does not parameterize LeakyReLU
 nam::activations::ActivationSigmoid _SIGMOID = nam::activations::ActivationSigmoid();
+nam::activations::ActivationSwish _SWISH = nam::activations::ActivationSwish();
+nam::activations::ActivationHardSwish _HARD_SWISH = nam::activations::ActivationHardSwish();
+nam::activations::ActivationLeakyHardTanh _LEAKY_HARD_TANH = nam::activations::ActivationLeakyHardTanh();
 
 bool nam::activations::Activation::using_fast_tanh = false;
 
 std::unordered_map<std::string, nam::activations::Activation*> nam::activations::Activation::_activations = {
   {"Tanh", &_TANH}, {"Hardtanh", &_HARD_TANH},   {"Fasttanh", &_FAST_TANH},
-  {"ReLU", &_RELU}, {"LeakyReLU", &_LEAKY_RELU}, {"Sigmoid", &_SIGMOID}};
+  {"ReLU", &_RELU}, {"LeakyReLU", &_LEAKY_RELU}, {"Sigmoid", &_SIGMOID},
+  {"SiLU", &_SWISH}, {"Hardswish", &_HARD_SWISH}, {"LeakyHardtanh", &_LEAKY_HARD_TANH}};
 
 nam::activations::Activation* tanh_bak = nullptr;
+nam::activations::Activation* sigmoid_bak = nullptr;
 
 nam::activations::Activation* nam::activations::Activation::get_activation(const std::string name)
 {
@@ -43,3 +48,31 @@ void nam::activations::Activation::disable_fast_tanh()
     _activations["Tanh"] = tanh_bak;
   }
 }
+
+void nam::activations::Activation::enable_lut(std::string function_name, float min, float max, std::size_t n_points)
+{
+  std::function<float(float)> fn;
+  if (function_name == "Tanh"){
+    fn = [](float x) { return std::tanh(x); };
+    tanh_bak = _activations["Tanh"];
+  } else if (function_name == "Sigmoid") {
+    fn = sigmoid;
+    sigmoid_bak = _activations["Sigmoid"];
+  } else {
+    throw std::runtime_error("Tried to enable LUT for a function other than Tanh or Sigmoid");
+  }
+  FastLUTActivation lut_activation(min, max, n_points, fn);
+  _activations[function_name] = &lut_activation;
+}
+
+void nam::activations::Activation::disable_lut(std::string function_name)
+{
+  if (function_name == "Tanh"){
+    _activations["Tanh"] = tanh_bak;
+  } else if (function_name == "Sigmoid") {
+    _activations["Sigmoid"] = sigmoid_bak;
+  } else {
+    throw std::runtime_error("Tried to disable LUT for a function other than Tanh or Sigmoid");
+  }
+}
+
diff --git a/NAM/activations.h b/NAM/activations.h
@@ -4,6 +4,7 @@
 #include <cmath> // expf
 #include <unordered_map>
 #include <Eigen/Dense>
+#include <functional>
 
 namespace nam
 {
@@ -25,6 +26,17 @@ inline float hard_tanh(float x)
   return t > 1 ? 1 : t;
 }
 
+inline float leaky_hardtanh(float x, float min_val, float max_val, float min_slope, float max_slope)
+{
+  if (x < min_val) {
+    return (x - min_val) * min_slope + min_val;
+  } else if (x > max_val) {
+    return (x - max_val) * max_slope + max_val;
+  } else {
+    return x;
+  }
+}
+
 inline float fast_tanh(const float x)
 {
   const float ax = fabsf(x);
@@ -38,14 +50,32 @@ inline float fast_sigmoid(const float x)
 {
   return 0.5f * (fast_tanh(x * 0.5f) + 1.0f);
 }
-
-// Assumes PyTorch default of 0.01 for negative slope. This may change to be
-// configurable in the future.
-inline float leaky_relu(float x)
+  
+inline float leaky_relu(float x, float negative_slope)
 {
-  const float negative_slope = 0.01;
   return x > 0.0f ? x : negative_slope * x;
 }
+inline float leaky_relu(float x)
+{
+  return leaky_relu(x, 0.01);
+}
+
+
+inline float swish(float x)
+{
+  return x * sigmoid(x);
+}
+
+inline float hardswish(float x)
+{
+  if (x <= -3.0) {
+    return 0;
+  } else if (x >= 3.0) {
+    return x;
+  } else {
+    return x * (x + 3.0)/6.0;
+  }
+}
 
 class Activation
 {
@@ -64,6 +94,8 @@ class Activation
   static void enable_fast_tanh();
   static void disable_fast_tanh();
   static bool using_fast_tanh;
+  static void enable_lut(std::string function_name, float min, float max, std::size_t n_points);
+  static void disable_lut(std::string function_name);
 
 protected:
   static std::unordered_map<std::string, Activation*> _activations;
@@ -93,6 +125,30 @@ class ActivationHardTanh : public Activation
   }
 };
 
+class ActivationLeakyHardTanh : public Activation
+{
+public:
+  ActivationLeakyHardTanh() = default;
+  ActivationLeakyHardTanh(float min_val_, float max_val_, float min_slope_, float max_slope_) {
+    min_val = min_val_;
+    max_val = max_val_;
+    min_slope = min_slope_;
+    max_slope = max_slope_;
+  }
+  void apply(float* data, long size) override
+  {
+    for (long pos = 0; pos < size; pos++)
+    {
+      data[pos] = leaky_hardtanh(data[pos], min_val, max_val, min_slope, max_slope);
+    }
+  }
+private:
+  float min_val = -1.0;
+  float max_val = 1.0;
+  float min_slope = 0.01;
+  float max_slope = 0.01;
+};
+
 class ActivationFastTanh : public Activation
 {
 public:
@@ -120,13 +176,19 @@ class ActivationReLU : public Activation
 class ActivationLeakyReLU : public Activation
 {
 public:
+  ActivationLeakyReLU() = default;
+  ActivationLeakyReLU(float ns) {
+    negative_slope = ns;
+  }
   void apply(float* data, long size) override
   {
     for (long pos = 0; pos < size; pos++)
     {
-      data[pos] = leaky_relu(data[pos]);
+      data[pos] = leaky_relu(data[pos], negative_slope);
     }
   }
+private:
+  float negative_slope = 0.01;
 };
 
 class ActivationSigmoid : public Activation
@@ -140,5 +202,75 @@ class ActivationSigmoid : public Activation
     }
   }
 };
+
+class ActivationSwish : public Activation
+{
+public:
+  void apply(float* data, long size) override
+  {
+    for (long pos = 0; pos < size; pos++)
+    {
+      data[pos] = swish(data[pos]);
+    }
+  }
+};
+
+class ActivationHardSwish : public Activation
+{
+public:
+  void apply(float* data, long size) override
+  {
+    for (long pos = 0; pos < size; pos++)
+    {
+      data[pos] = hardswish(data[pos]);
+    }
+  }
+};
+
+class FastLUTActivation : public Activation
+{
+public:
+    FastLUTActivation(float min_x, float max_x, std::size_t size, std::function<float(float)> f)
+        : min_x_(min_x), max_x_(max_x), size_(size) {
+        
+        step_ = (max_x - min_x) / (size - 1);
+        inv_step_ = 1.0f / step_;
+        table_.reserve(size);
+
+        for (std::size_t i = 0; i < size; ++i) {
+            table_.push_back(f(min_x + i * step_));
+        }
+    }
+
+    // Fast lookup with linear interpolation
+    inline float lookup(float x) const {
+        // Clamp input to range
+        x = std::clamp(x, min_x_, max_x_);
+
+        // Calculate float index
+        float f_idx = (x - min_x_) * inv_step_;
+        std::size_t i = static_cast<std::size_t>(f_idx);
+        
+        // Handle edge case at max_x_
+        if (i >= size_ - 1) return table_.back();
+
+        // Linear interpolation: y = y0 + (y1 - y0) * fractional_part
+        float frac = f_idx - static_cast<float>(i);
+        return table_[i] + (table_[i + 1] - table_[i]) * frac;
+    }
+
+    // Vector application (Batch processing)
+    void apply(std::vector<float>& data) const {
+        for (float& val : data) {
+            val = lookup(val);
+        }
+    }
+
+private:
+    float min_x_, max_x_, step_, inv_step_;
+    size_t size_;
+    std::vector<float> table_;
+};
+
 }; // namespace activations
 }; // namespace nam
diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp
@@ -52,6 +52,7 @@ void nam::wavenet::_Layer::process_(const Eigen::MatrixXf& input, const Eigen::M
     for (int i = 0; i < num_frames; i++)
     {
       this->_activation->apply(this->_z.block(0, i, channels, 1));
+      // TODO Need to support other activation functions here instead of hardcoded sigmoid
       activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1));
     }
     this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array();
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
@@ -28,7 +28,7 @@ class _Layer
   : _conv(channels, gated ? 2 * channels : channels, kernel_size, true, dilation)
   , _input_mixin(condition_size, gated ? 2 * channels : channels, false)
   , _1x1(channels, channels, true)
-  , _activation(activations::Activation::get_activation(activation))
+  , _activation(activations::Activation::get_activation(activation)) // needs to support activations with parameters
   , _gated(gated) {};
   // Resize all arrays to be able to process `maxBufferSize` frames.
   void SetMaxBufferSize(const int maxBufferSize);
diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp
@@ -6,6 +6,7 @@
 #include "test/test_dsp.cpp"
 #include "test/test_get_dsp.cpp"
 #include "test/test_wavenet.cpp"
+#include "test/test_fast_lut.cpp"
 
 int main()
 {
@@ -35,6 +36,9 @@ int main()
 
   test_wavenet::test_gated();
 
+  test_lut::TestFastLUT::test_sigmoid();
+  test_lut::TestFastLUT::test_tanh();
+
   std::cout << "Success!" << std::endl;
   return 0;
-}
+}
diff --git a/tools/test/test_activations.cpp b/tools/test/test_activations.cpp
@@ -73,7 +73,7 @@ class TestLeakyReLU
   static void test_core_function()
   {
     auto TestCase = [](float input, float expectedOutput) {
-      float actualOutput = nam::activations::leaky_relu(input);
+      float actualOutput = nam::activations::leaky_relu(input, 0.01);
       assert(actualOutput == expectedOutput);
     };
     // A few snapshot tests
@@ -84,7 +84,7 @@ class TestLeakyReLU
 
   static void test_get_by_init()
   {
-    auto a = nam::activations::ActivationLeakyReLU();
+    auto a = nam::activations::ActivationLeakyReLU(0.01);
     _test_class(&a);
   }
 
diff --git a/tools/test/test_fast_lut.cpp b/tools/test/test_fast_lut.cpp
@@ -0,0 +1,39 @@
+#include <cassert>
+#include <string>
+#include <vector>
+#include <cmath>
+
+#include "NAM/activations.h"
+
+namespace test_lut {
+
+float sigmoid(float x) {
+    return 1.0f / (1.0f + std::exp(-x));
+}
+
+class TestFastLUT
+{
+  public:
+    static void test_sigmoid()
+    {
+      // create a lut for sigmoid from -8.0 to 8.0 with 1024 samples
+      nam::activations::FastLUTActivation lut_sigmoid(-8.0f, 8.0f, 1024, [](float x) {
+          return 1.0f / (1.0f + expf(-x));
+      });
+
+      float input = 1.25f;
+      assert(abs(sigmoid(input) - lut_sigmoid.lookup(input)) < 1e-3);
+    }
+   static void test_tanh()
+    {
+      // create a lut for sigmoid from -8.0 to 8.0 with 1024 samples
+      nam::activations::FastLUTActivation lut_tanh(-8.0f, 8.0f, 1024, [](float x) {
+          return std::tanh(x);
+      });
+
+      float input = 1.25f;
+      assert(abs(std::tanh(input) - lut_tanh.lookup(input)) < 1e-3);
+    }
+};
+}
+

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ void nam::wavenet::_Layer::process_(const Eigen::MatrixXf& input, const Eigen::M`
`52`	`52`	`for (int i = 0; i < num_frames; i++)`
`53`	`53`	`{`
`54`	`54`	`this->_activation->apply(this->_z.block(0, i, channels, 1));`
	`55`	`+ // TODO Need to support other activation functions here instead of hardcoded sigmoid`
`55`	`56`	`activations::Activation::get_activation("Sigmoid")->apply(this->_z.block(channels, i, channels, 1));`
`56`	`57`	`}`
`57`	`58`	`this->_z.block(0, 0, channels, num_frames).array() *= this->_z.block(channels, 0, channels, num_frames).array();`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ class TestLeakyReLU`
`73`	`73`	`static void test_core_function()`
`74`	`74`	`{`
`75`	`75`	`auto TestCase = [](float input, float expectedOutput) {`
`76`		`- float actualOutput = nam::activations::leaky_relu(input);`
	`76`	`+ float actualOutput = nam::activations::leaky_relu(input, 0.01);`
`77`	`77`	`assert(actualOutput == expectedOutput);`
`78`	`78`	`};`
`79`	`79`	`// A few snapshot tests`
`@@ -84,7 +84,7 @@ class TestLeakyReLU`
`84`	`84`
`85`	`85`	`static void test_get_by_init()`
`86`	`86`	`{`
`87`		`- auto a = nam::activations::ActivationLeakyReLU();`
	`87`	`+ auto a = nam::activations::ActivationLeakyReLU(0.01);`
`88`	`88`	`_test_class(&a);`
`89`	`89`	`}`
`90`	`90`