Khushiyant
diff --git a/‎benchmarks/benchmark_plif.py‎
Lines changed: 72 additions & 0 deletions b/‎benchmarks/benchmark_plif.py‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tether/__init__.py‎
Lines changed: 14 additions & 1 deletion b/‎src/tether/__init__.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎src/tether/data/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎src/tether/data/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/tether/functional/alif.py‎
Lines changed: 72 additions & 0 deletions b/‎src/tether/functional/alif.py‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎src/tether/functional/plif.py‎
Lines changed: 64 additions & 0 deletions b/‎src/tether/functional/plif.py‎
Lines changed: 64 additions & 0 deletions
@@ -0,0 +1,72 @@
+import torch
+import triton
+import time
+import sys
+import os
+
+# Add src to path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
+
+from tether.functional.lif import LIFSubFunction
+from tether.functional.plif import PLIFSubFunction
+
+def benchmark_plif():
+    """
+    Benchmark LIF vs PLIF (Triton vs Triton).
+    """
+    if not torch.cuda.is_available():
+        print("Skipping benchmark on CPU.")
+        return
+
+    device = torch.device("cuda")
+    print(f"Benchmarking LIF vs PLIF on {torch.cuda.get_device_name(0)}")
+
+    batch_size = 32
+    seq_len = 2048
+    dim = 768
+    n_neurons = batch_size * dim
+    
+    x_seq = torch.randn(seq_len, n_neurons, device=device)
+    v_init = torch.zeros(n_neurons, device=device)
+    
+    # LIF Params (Scalar)
+    decay_scalar = torch.tensor(0.9, device=device)
+    threshold_scalar = torch.tensor(1.0, device=device)
+    alpha = torch.tensor(2.0, device=device)
+    
+    # PLIF Params (Vector)
+    decay_vector = torch.full((n_neurons,), 0.9, device=device)
+    threshold_vector = torch.full((n_neurons,), 1.0, device=device)
+
+    # Warmup
+    print("Warming up...")
+    for _ in range(10):
+        with torch.no_grad():
+            LIFSubFunction.apply(x_seq, v_init, decay_scalar, threshold_scalar, alpha, 0)
+            PLIFSubFunction.apply(x_seq, v_init, decay_vector, threshold_vector, alpha, 0)
+    
+    # Benchmark LIF
+    torch.cuda.synchronize()
+    start_time = time.time()
+    iterations = 50
+    with torch.no_grad():
+        for _ in range(iterations):
+            LIFSubFunction.apply(x_seq, v_init, decay_scalar, threshold_scalar, alpha, 0)
+    torch.cuda.synchronize()
+    lif_time = (time.time() - start_time) / iterations
+    print(f"LIF Time:  {lif_time * 1000:.3f} ms")
+
+    # Benchmark PLIF
+    torch.cuda.synchronize()
+    start_time = time.time()
+    with torch.no_grad():
+        for _ in range(iterations):
+            PLIFSubFunction.apply(x_seq, v_init, decay_vector, threshold_vector, alpha, 0)
+    torch.cuda.synchronize()
+    plif_time = (time.time() - start_time) / iterations
+    print(f"PLIF Time: {plif_time * 1000:.3f} ms")
+    
+    print(f"Overhead: {plif_time / lif_time:.2f}x slower")
+
+if __name__ == "__main__":
+    benchmark_plif()
@@ -40,7 +40,7 @@ def get_project_metadata():
 project = 'Tether'
 copyright = '2025, Khushiyant'
 author = meta.get("author", "Khushiyant")
-release = meta.get("release", "0.6.0")
+release = meta.get("release", "0.1.0")
 
 extensions = [
     'sphinx.ext.autodoc',      # Core library for html generation from docstrings
 
@@ -1,5 +1,18 @@
 from .nn.lif import LIF
+from .nn.alif import ALIF
+from .nn.plif import PLIF
 from .nn.attention import SpikingSelfAttention
 from .nn.block import SpikingTransformerBlock
+from .nn.surrogates import Surrogate, Arctan, Sigmoid, FastSigmoid
 
-__all__ = ["LIF", "SpikingSelfAttention", "SpikingTransformerBlock"]
+__all__ = [
+    "LIF", 
+    "ALIF", 
+    "PLIF",
+    "SpikingSelfAttention", 
+    "SpikingTransformerBlock",
+    "Surrogate",
+    "Arctan",
+    "Sigmoid",
+    "FastSigmoid"
+]
@@ -0,0 +1,3 @@
+from .encoding import SpikingDatasetWrapper, rate_encoding, latency_encoding
+
+__all__ = ["SpikingDatasetWrapper", "rate_encoding", "latency_encoding"]
@@ -0,0 +1,72 @@
+import torch
+import triton
+from ..kernels.alif import alif_fwd_kernel, alif_bwd_kernel
+
+class ALIFSubFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x_seq, v_init, a_init, decay_v, decay_a, threshold, beta, alpha):
+        """
+        Forward pass of the ALIF function.
+        """
+        x_seq, v_init, a_init = x_seq.contiguous(), v_init.contiguous(), a_init.contiguous()
+        n_steps, n_neurons = x_seq.shape
+        
+        out_spikes = torch.empty_like(x_seq)
+        
+        n_int32 = (n_steps + 31) // 32
+        out_spikes_packed = torch.empty((n_int32, n_neurons), device=x_seq.device, dtype=torch.int32)
+        
+        v_seq = torch.empty_like(x_seq)
+        a_seq = torch.empty_like(x_seq)
+        v_final = torch.empty_like(v_init)
+        a_final = torch.empty_like(a_init)
+        
+        grid = (triton.cdiv(n_neurons, 1024),)
+        alif_fwd_kernel[grid](
+            x_seq, v_init, a_init, 
+            out_spikes, out_spikes_packed, v_seq, v_final, a_seq, a_final,
+            n_neurons, n_steps, decay_v.item(), decay_a.item(), threshold.item(), beta.item(), 
+            BLOCK_SIZE=1024
+        )
+        
+        ctx.save_for_backward(out_spikes_packed, v_seq, a_seq, v_init, a_init, decay_v, decay_a, threshold, beta, alpha)
+        ctx.mark_non_differentiable(v_seq)
+        ctx.mark_non_differentiable(a_seq)
+        return out_spikes, v_final, a_final, v_seq, a_seq
+
+    @staticmethod
+    def backward(ctx, grad_spikes, grad_v_final, grad_a_final, grad_v_seq, grad_a_seq):
+        """
+        Backward pass of the ALIF function.
+        """
+        out_spikes_packed, v_seq, a_seq, v_init, a_init, decay_v, decay_a, threshold, beta, alpha = ctx.saved_tensors
+        n_steps, n_neurons = v_seq.shape
+        
+        grad_x = torch.empty_like(v_seq)
+        
+        grad_decay_v = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        grad_decay_a = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        grad_threshold = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        grad_beta = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        grad_alpha = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        
+        if grad_v_final is None:
+            grad_v_final = torch.zeros_like(v_init)
+        if grad_a_final is None:
+            grad_a_final = torch.zeros_like(a_init)
+            
+        grid = (triton.cdiv(n_neurons, 1024),)
+        
+        alif_bwd_kernel[grid](
+            grad_spikes.contiguous(), out_spikes_packed, 
+            v_seq.contiguous(), a_seq.contiguous(),
+            grad_x, 
+            grad_v_final.contiguous(), grad_a_final.contiguous(),
+            v_init.contiguous(), a_init.contiguous(),
+            n_neurons, n_steps, 
+            decay_v, decay_a, threshold, beta, alpha,
+            grad_decay_v, grad_decay_a, grad_threshold, grad_beta, grad_alpha,
+            BLOCK_SIZE=1024
+        )
+        
+        return grad_x, grad_v_final, grad_a_final, grad_decay_v, grad_decay_a, grad_threshold, grad_beta, grad_alpha
@@ -0,0 +1,64 @@
+import torch
+import triton
+from ..kernels.plif import plif_fwd_kernel, plif_bwd_kernel
+
+class PLIFSubFunction(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x_seq, v_init, decay, threshold, alpha, surrogate_type):
+        """
+        Forward pass of the PLIF function.
+        Decay and Threshold are vectors (n_neurons,).
+        """
+        x_seq, v_init = x_seq.contiguous(), v_init.contiguous()
+        decay, threshold = decay.contiguous(), threshold.contiguous()
+        
+        n_steps, n_neurons = x_seq.shape
+        
+        out_spikes = torch.empty_like(x_seq)
+        n_int32 = (n_steps + 31) // 32
+        out_spikes_packed = torch.empty((n_int32, n_neurons), device=x_seq.device, dtype=torch.int32)
+        
+        v_seq = torch.empty_like(x_seq)
+        v_final = torch.empty_like(v_init)
+        
+        grid = (triton.cdiv(n_neurons, 1024),)
+        plif_fwd_kernel[grid](
+            x_seq, v_init, out_spikes, out_spikes_packed, v_seq, v_final, 
+            n_neurons, n_steps, decay, threshold, 
+            BLOCK_SIZE=1024
+        )
+        
+        ctx.save_for_backward(out_spikes_packed, v_seq, v_init, decay, threshold, alpha)
+        ctx.surrogate_type = surrogate_type
+        ctx.mark_non_differentiable(v_seq)
+        return out_spikes, v_final, v_seq
+
+    @staticmethod
+    def backward(ctx, grad_spikes, grad_v_final, grad_v_seq):
+        out_spikes_packed, v_seq, v_init, decay, threshold, alpha = ctx.saved_tensors
+        surrogate_type = ctx.surrogate_type
+        n_steps, n_neurons = v_seq.shape
+        
+        grad_x = torch.empty_like(v_seq)
+        
+        # Gradients for parameters (Vectors)
+        grad_decay = torch.zeros_like(decay)
+        grad_threshold = torch.zeros_like(threshold)
+        grad_alpha = torch.zeros(1, device=grad_spikes.device, dtype=torch.float32)
+        
+        if grad_v_final is None:
+            grad_v_final = torch.zeros_like(v_init)
+        
+        grid = (triton.cdiv(n_neurons, 1024),)
+        
+        plif_bwd_kernel[grid](
+            grad_spikes.contiguous(), out_spikes_packed, 
+            v_seq.contiguous(), grad_x, 
+            grad_v_final.contiguous(), v_init.contiguous(),
+            n_neurons, n_steps, decay, threshold, alpha,
+            grad_decay, grad_threshold, grad_alpha,
+            surrogate_type,
+            BLOCK_SIZE=1024
+        )
+        
+        return grad_x, grad_v_final, grad_decay, grad_threshold, grad_alpha, None
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .encoding import SpikingDatasetWrapper, rate_encoding, latency_encoding`
	`2`	`+`
	`3`	`+__all__ = ["SpikingDatasetWrapper", "rate_encoding", "latency_encoding"]`