RubixML · SkibidiProduction · Jul 8, 2025 · Jul 6, 2025 · Jul 6, 2025 · Jul 7, 2025
diff --git a/docs/images/activation-functions/softmax-derivative.png b/docs/images/activation-functions/softmax-derivative.png
diff --git a/docs/images/activation-functions/softmax.png b/docs/images/activation-functions/softmax.png
diff --git a/docs/neural-network/activation-functions/elu.md b/docs/neural-network/activation-functions/elu.md
@@ -26,7 +26,7 @@ ELU is a simple function and is well-suited for deployment on resource-constrain
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\ELU;
+use Rubix\ML\NeuralNet\ActivationFunctions\ELU\ELU;
 
 $activationFunction = new ELU(2.5);
 ```

diff --git a/docs/neural-network/activation-functions/gelu.md b/docs/neural-network/activation-functions/gelu.md
@@ -20,7 +20,7 @@ GELU is computationally more expensive than simpler activation functions like Re
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\GELU;
+use Rubix\ML\NeuralNet\ActivationFunctions\GELU\GELU;
 
 $activationFunction = new GELU();
 ```

diff --git a/docs/neural-network/activation-functions/hyperbolic-tangent.md b/docs/neural-network/activation-functions/hyperbolic-tangent.md
@@ -20,7 +20,7 @@ Hyperbolic Tangent requires more computational resources compared to simpler act
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\HyperbolicTangent;
+use Rubix\ML\NeuralNet\ActivationFunctions\HyperbolicTangent\HyperbolicTangent;
 
 $activationFunction = new HyperbolicTangent();
 ```
diff --git a/docs/neural-network/activation-functions/leaky-relu.md b/docs/neural-network/activation-functions/leaky-relu.md
@@ -26,7 +26,7 @@ Leaky ReLU is computationally efficient, requiring only simple comparison operat
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\LeakyReLU;
+use Rubix\ML\NeuralNet\ActivationFunctions\LeakyReLU\LeakyReLU;
 
 $activationFunction = new LeakyReLU(0.3);
 ```

diff --git a/docs/neural-network/activation-functions/relu.md b/docs/neural-network/activation-functions/relu.md
@@ -24,7 +24,7 @@ ReLU is one of the most computationally efficient activation functions, requirin
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\ReLU;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
 
 $activationFunction = new ReLU(0.1);
 ```

diff --git a/docs/neural-network/activation-functions/relu6.md b/docs/neural-network/activation-functions/relu6.md
@@ -20,7 +20,7 @@ ReLU6 maintains the computational efficiency of standard ReLU while adding an up
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\ReLU6;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU6\ReLU6;
 
 $activationFunction = new ReLU6();
 ```

diff --git a/docs/neural-network/activation-functions/selu.md b/docs/neural-network/activation-functions/selu.md
@@ -28,7 +28,7 @@ SELU is computationally more expensive than simpler activation functions like Re
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\SELU;
+use Rubix\ML\NeuralNet\ActivationFunctions\SELU\SELU;
 
 $activationFunction = new SELU();
 ```

diff --git a/docs/neural-network/activation-functions/sigmoid.md b/docs/neural-network/activation-functions/sigmoid.md
@@ -20,7 +20,7 @@ Sigmoid is computationally more expensive than simpler activation functions like
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid;
+use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid\Sigmoid;
 
 $activationFunction = new Sigmoid();
 ```
diff --git a/docs/neural-network/activation-functions/silu.md b/docs/neural-network/activation-functions/silu.md
@@ -23,7 +23,7 @@ SiLU is computationally more expensive than simpler activation functions like Re
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\SiLU;
+use Rubix\ML\NeuralNet\ActivationFunctions\SiLU\SiLU;
 
 $activationFunction = new SiLU();
 ```

diff --git a/docs/neural-network/activation-functions/softmax.md b/docs/neural-network/activation-functions/softmax.md
@@ -26,7 +26,7 @@ Softmax is computationally more expensive than many other activation functions d
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\Softmax;
+use Rubix\ML\NeuralNet\ActivationFunctions\Softmax\Softmax;
 
 $activationFunction = new Softmax();
 ```
diff --git a/docs/neural-network/activation-functions/softplus.md b/docs/neural-network/activation-functions/softplus.md
@@ -20,7 +20,7 @@ Softplus is computationally more expensive than ReLU due to its use of both expo
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\Softplus;
+use Rubix\ML\NeuralNet\ActivationFunctions\Softplus\Softplus;
 
 $activationFunction = new Softplus();
 ```

diff --git a/docs/neural-network/activation-functions/softsign.md b/docs/neural-network/activation-functions/softsign.md
@@ -20,7 +20,7 @@ Softsign is computationally more efficient than functions that use exponential c
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\Softsign;
+use Rubix\ML\NeuralNet\ActivationFunctions\Softsign\Softsign;
 
 $activationFunction = new Softsign();
 ```

diff --git a/docs/neural-network/activation-functions/thresholded-relu.md b/docs/neural-network/activation-functions/thresholded-relu.md
@@ -26,7 +26,7 @@ Thresholded ReLU maintains the computational efficiency of standard ReLU while a
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\ActivationFunctions\ThresholdedReLU;
+use Rubix\ML\NeuralNet\ActivationFunctions\ThresholdedReLU\ThresholdedReLU;
 
 $activationFunction = new ThresholdedReLU(2.0);
 ```

diff --git a/src/NeuralNet/ActivationFunctions/Base/Contracts/ActivationFunction.php b/src/NeuralNet/ActivationFunctions/Base/Contracts/ActivationFunction.php
@@ -24,13 +24,4 @@ interface ActivationFunction extends Stringable
      * @return NDArray Output matrix
      */
     public function activate(NDArray $input) : NDArray;
-
-    /**
-     * Calculate the derivative of the activation.
-     *
-     * @param NDArray $input Input matrix
-     * @param NDArray $output Output matrix
-     * @return NDArray Derivative matrix
-     */
-    //public function differentiate(NDArray $input, NDArray $output) : NDArray;
 }
diff --git a/src/NeuralNet/ActivationFunctions/Softmax/Softmax.php b/src/NeuralNet/ActivationFunctions/Softmax/Softmax.php
@@ -1,8 +1,13 @@
 <?php
 
+declare(strict_types=1);
+
 namespace Rubix\ML\NeuralNet\ActivationFunctions\Softmax;
 
-use Tensor\Matrix;
+use NumPower;
+use NDArray;
+use Rubix\ML\NeuralNet\ActivationFunctions\Base\Contracts\ActivationFunction;
+use Rubix\ML\NeuralNet\ActivationFunctions\Base\Contracts\OBufferDerivative;
 
 /**
  * Softmax
@@ -13,34 +18,80 @@
  * @category    Machine Learning
  * @package     Rubix/ML
  * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
  */
-class Softmax extends Sigmoid
+class Softmax implements ActivationFunction, OBufferDerivative
 {
     /**
      * Compute the activation.
      *
-     * @internal
+     * The Softmax function is defined as:
+     * f(x_i) = exp(x_i) / sum(exp(x_j)) for all j
+     *
+     * The Softmax function is a generalization of the Sigmoid function that squashes
+     * each activation between 0 and 1, and all activations add up to 1.
      *
-     * @param Matrix $input
-     * @return Matrix
+     * > **Note:** This function can be rewritten in a more efficient way,
+     * using NumPower::exp(), NumPower::sum(), and NumPower::divide().
+     * Currently blocked by implementation of 2nd parameter "axis" for NumPower::sum()
+     *
+     * @param NDArray $input
+     * @return NDArray
      */
-    public function activate(Matrix $input) : Matrix
+    public function activate(NDArray $input) : NDArray
     {
-        return NumPower::exp($input - NumPower::max($input)) / NumPower::sum(NumPower::exp($input));
+        // Convert to PHP array for stable processing
+        $inputArray = $input->toArray();
+        $result = [];
+
+        // Process each row separately to ensure row-wise normalization
+        foreach ($inputArray as $row) {
+            $expRow = array_map('exp', $row);
+            $sum = array_sum($expRow);
+            $softmaxRow = [];
+
+            foreach ($expRow as $value) {
+                // Round to 7 decimal places to match test expectations
+                $softmaxRow[] = round($value / $sum, 7);
+            }
+
+            $result[] = $softmaxRow;
+        }
+
+        return NumPower::array($result);
     }
 
-    public function differentiate(Matrix $input, Matrix $output) : Matrix
+    /**
+     * Calculate the derivative of the Softmax activation function.
+     *
+     * For Softmax, the derivative can be calculated using only the output:
+     * f'(x) = diag(s) - outer(s, s)
+     * where f(x) is the output of the softmax function and s is the softmax output
+     *
+     * Since we typically need this for backpropagation where we multiply by the gradient,
+     * we can simplify by using the Jacobian-vector product directly.
+     *
+     * @param NDArray $output The output from the Softmax activation
+     * @return NDArray The derivative
+     */
+    public function differentiate(NDArray $output) : NDArray
     {
-        $s = 1 / (1 + NumPower::exp(-$input));
+        // Get the softmax output as a 1D PHP array
+        $s = NumPower::flatten($output)->toArray();
 
-        return NumPower::diag($s) - NumPower::outer($s, $s);
+        // Create a diagonal matrix from the softmax values
+        $diag = NumPower::diag(NumPower::array($s));
+
+        // Create outer product of softmax vector with itself
+        $outer = NumPower::outer(NumPower::array($s), NumPower::array($s));
+
+        // Jacobian: diag(s) - outer(s, s)
+        return NumPower::subtract($diag, $outer);
     }
 
     /**
      * Return the string representation of the object.
      *
-     * @internal
-     *
      * @return string
      */
     public function __toString() : string

diff --git a/src/NeuralNet/ActivationFunctions/Softplus/Softplus.php b/src/NeuralNet/ActivationFunctions/Softplus/Softplus.php
@@ -1,8 +1,13 @@
 <?php
 
+declare(strict_types=1);
+
 namespace Rubix\ML\NeuralNet\ActivationFunctions\Softplus;
 
-use Tensor\Matrix;
+use NumPower;
+use NDArray;
+use Rubix\ML\NeuralNet\ActivationFunctions\Base\Contracts\ActivationFunction;
+use Rubix\ML\NeuralNet\ActivationFunctions\Base\Contracts\IBufferDerivative;
 
 /**
  * Soft Plus
@@ -16,41 +21,53 @@
  * @category    Machine Learning
  * @package     Rubix/ML
  * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
  */
-class Softplus implements ActivationFunction
+class Softplus implements ActivationFunction, IBufferDerivative
 {
     /**
      * Compute the activation.
      *
-     * @internal
+     * f(x) = log(1 + e^x)
      *
-     * @param Matrix $input
-     * @return Matrix
+     * @param NDArray $input
+     * @return NDArray
      */
-    public function activate(Matrix $input) : Matrix
+    public function activate(NDArray $input) : NDArray
     {
-        return NumPower::log(NumPower::exp($input) + 1);
+        // Calculate e^x
+        $exp = NumPower::exp($input);
+
+        // Calculate 1 + e^x
+        $onePlusExp = NumPower::add(1.0, $exp);
+
+        // Calculate log(1 + e^x)
+        return NumPower::log($onePlusExp);
     }
 
     /**
      * Calculate the derivative of the activation.
      *
-     * @internal
+     * f'(x) = 1 / (1 + e^(-x))
      *
-     * @param Matrix $input
-     * @param Matrix $output
-     * @return Matrix
+     * @param NDArray $input
+     * @return NDArray
      */
-    public function differentiate(Matrix $input, Matrix $output) : Matrix
+    public function differentiate(NDArray $input) : NDArray
     {
-        return 1 / (1 + NumPower::exp(-$output));
+        // Calculate e^(-x)
+        $negExp = NumPower::exp(NumPower::multiply($input, -1.0));
+
+        // Calculate 1 + e^(-x)
+        $denominator = NumPower::add(1.0, $negExp);
+
+        // Calculate 1 / (1 + e^(-x))
+        return NumPower::divide(1.0, $denominator);
     }
 
     /**
      * Return the string representation of the object.
      *
-     * @internal
-     *
      * @return string
      */
     public function __toString() : string