Adding documentation

michaelciraci · michaelciraci · commit d036dc3ac74d · 2025-03-28T22:01:11.000-05:00
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -19,6 +19,6 @@ jobs:
     - name: Clippy
       run: cargo clippy -- -D warnings
     - name: Build
-      run: cargo build --verbose
+      run: cargo build
     - name: Run tests
-      run: cargo test --verbose
+      run: cargo test
diff --git a/README.md b/README.md
@@ -1,36 +1,43 @@
-## Monarch Butterfly
+[![Build](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml/badge.svg)](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml)
+[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+[![](https://img.shields.io/crates/v/monarch-butterfly)](https://img.shields.io/crates/v/monarch-butterfly)
+[![](https://docs.rs/monarch-butterfly/badge.svg)](https://docs.rs/monarch-butterfly/)
 
-Experimental FFT library with an emphasis on runtime performance.
-The goal of this library is to hand-unroll all loops in a procedural macro
-for optimal SIMD throughput.
+# Monarch Butterfly
 
-This currently only works on powers of two.
+Experimental FFT library where all FFTs are proc-macro generated const-evaluation functions. The use case is if you know the FFT size at compile time. However, knowing the FFT size at compile time gives immense gains.
 
-This library implements FFTs for both `f32` and `f64` for the following sizes:
-```
-1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048
-```
+This library implements FFTs for both `f32` and `f64` sizes `1-200`. The FFTs are auto-generated so this limit could be increased above 200 at the expense of compile time.
 
-This library will use all SIMD features your CPU has available including AVX512,
-assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
+## Features
 
-The larger the FFT sizes, the larger speed boost this library will give you.
+- All functions are auto-generated with proc-macros with unrolled loops
+- Zero `unsafe` code
+- Completely portable
+- Const-evaluation functions
 
-As an example of AVX512 instructions, here is an example on just an FFT
-of size 128: https://godbolt.org/z/rz48azEsd (`Ctrl+F` for "zmm" instructions)
+## Limitations
+
+- FFT size must be known at compile time
+- By default, only FFTs up to size 200 are generated
 
-If a larger FFT size is needed, just clone the repo and add the needed
-sizes to the top of `crates\monarch-derive\src\lib.rs` and larger FFTs
-will be generated. However, this comes at the cost of a longer compile time.
+![log](assets/log_comparison.png)
 
 ```
 use monarch_butterfly::*;
 use num_complex::Complex;
 
 let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
-let output_slice = fft8(&input);
-let output_vec = fft8(input);
+let output = fft::<8, _, _>(input);
 ```
 
+This library will use all SIMD features your CPU has available including AVX512,
+assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
+
+The larger the FFT sizes, the larger speed boost this library will give you.
+
+As an example of AVX512 instructions, here is an example on just an FFT
+of size 128: https://godbolt.org/z/Y58eh1x5a(`Ctrl+F` for "zmm" instructions)
+
 The FFTs before unrolling are heavily inspired from [RustFFT](https://github.com/ejmahler/RustFFT).
 Credit is given to Elliott Mahler as the RustFFT original author.
diff --git a/crates/monarch-derive/src/lib.rs b/crates/monarch-derive/src/lib.rs
@@ -105,6 +105,15 @@ pub fn generate_switch(_input: TokenStream) -> TokenStream {
     });
 
     let expanded = quote! {
+        /// Top level FFT function
+        /// 
+        /// ```
+        /// use monarch_butterfly::*;
+        /// use num_complex::Complex;
+        /// 
+        /// let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
+        /// let output = fft::<8, _, _>(input);
+        /// ```
         #[inline]
         pub fn fft<const N: usize, T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; N] {
             let x_in = input.as_ref();
@@ -117,6 +126,15 @@ pub fn generate_switch(_input: TokenStream) -> TokenStream {
             }
         }
 
+         /// Top level iFFT function
+        /// 
+        /// ```
+        /// use monarch_butterfly::*;
+        /// use num_complex::Complex;
+        /// 
+        /// let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
+        /// let output = ifft::<8, _, _>(input);
+        /// ```
         #[inline]
         pub fn ifft<const N: usize, T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; N] {
             let x_in = input.as_ref();
@@ -168,6 +186,7 @@ pub fn generate_powers_of_two(_input: TokenStream) -> TokenStream {
         });
 
         quote! {
+            #[doc = concat!("Inner FFT")]
             #[inline]
             pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
                 let n = #s;
@@ -194,6 +213,7 @@ pub fn generate_powers_of_two(_input: TokenStream) -> TokenStream {
     });
 
     let expanded = quote! {
+        
         #[inline]
         pub fn fft1<T: Float, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 1] {
             let n = 1;
@@ -260,6 +280,7 @@ pub fn generate_coprimes(_input: TokenStream) -> TokenStream {
         });
 
         quote! {
+            #[doc = concat!("Inner FFT")]
             #[inline]
             pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
                 let n = #s;
@@ -344,6 +365,7 @@ pub fn generate_mixed_radix(_input: TokenStream) -> TokenStream {
         });
 
         quote! {
+            #[doc = concat!("Inner FFT")]
             #[inline]
             pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
                 let n = #s;
@@ -525,6 +547,7 @@ pub fn generate_primes(_input: TokenStream) -> TokenStream {
         });
 
         quote! {
+            #[doc = concat!("Inner FFT")]
             #[inline]
             pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
                 let n = #s;
@@ -579,6 +602,7 @@ pub fn generate_iffts(_input: TokenStream) -> TokenStream {
         });
 
         quote! {
+            #[doc = concat!("Inner iFFT")]
             #[inline]
             pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #n] {
                 let x = input.as_ref();
@@ -596,6 +620,7 @@ pub fn generate_iffts(_input: TokenStream) -> TokenStream {
     });
 
     let expanded = quote! {
+        #[doc = concat!("Inner iFFT")]
         #[inline]
         pub fn ifft1<T: Float, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 1] {
             let n = 1;
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,30 +1,48 @@
-//! Experimental FFT library with an emphasis on runtime performance.
-//! This currently only works on powers of two. The FFTs are autogenerated
-//! with a procedural macro which hand unrolls all the loops with inlined
-//! functions to allow the compiler to maximize the SIMD throughput available
-//! on the given CPU.
-//!
-//! This library will use all SIMD features your CPU has available including AVX512,
-//! assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
-//!
-//! This library implements FFTs for both `f32` and `f64` for the following sizes:
-//!
-//! ```no_compile
-//! 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048
-//! ```
-//!
-//! If a larger FFT size is needed, just clone the repo and add the needed
-//! sizes to the top of `crates\monarch-derive\src\lib.rs` and larger FFTs
-//! will be generated. However, this comes at the cost of a longer compile time.
-//!
+//! [![Build](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml/badge.svg)](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml)
+//! [![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+//! [![](https://img.shields.io/crates/v/monarch-butterfly)](https://img.shields.io/crates/v/monarch-butterfly)
+//! [![](https://docs.rs/monarch-butterfly/badge.svg)](https://docs.rs/monarch-butterfly/)
+//! 
+//! # Monarch Butterfly
+//! 
+//! Experimental FFT library where all FFTs are proc-macro generated const-evaluation functions. //! The use case is if you know the FFT size at compile time. However, knowing the FFT size at //! compile time gives immense gains.
+//! 
+//! This library implements FFTs for both `f32` and `f64` sizes `1-200`. The FFTs are //! auto-generated so this limit could be increased above 200 at the expense of compile time.
+//! 
+//! ## Features
+//! 
+//! - All functions are auto-generated with proc-macros with unrolled loops
+//! - Zero `unsafe` code
+//! - Completely portable
+//! - Const-evaluation functions
+//! 
+//! ## Limitations
+//! 
+//! - FFT size must be known at compile time
+//! - By default, only FFTs up to size 200 are generated
+//! 
+//! 
 //! ```
 //! use monarch_butterfly::*;
 //! use num_complex::Complex;
-//!
+//! 
 //! let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
-//! let output_slice = fft::<8, _, _>(&input);
-//! let output_vec = fft::<8, _, _>(input);
+//! let output = fft::<8, _, _>(input);
 //! ```
+//! 
+//! The top level functions are [`fft`] and [`ifft`].
+//! 
+//! This library will use all SIMD features your CPU has available including AVX512,
+//! assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
+//! 
+//! The larger the FFT sizes, the larger speed boost this library will give you.
+//! 
+//! As an example of AVX512 instructions, here is an example on just an FFT
+//! of size 128: <https://godbolt.org/z/Y58eh1x5a>(`Ctrl+F` for "zmm" instructions)
+//! 
+//! The FFTs before unrolling are heavily inspired from [`RustFFT``](<https://github.com/ejmahler/RustFFT>).
+//! Credit is given to Elliott Mahler as the RustFFT original author.
+
 
 #![allow(clippy::excessive_precision)]
 #![forbid(unsafe_code)]
@@ -42,16 +60,17 @@ monarch_derive::generate_mixed_radix!();
 monarch_derive::generate_primes!();
 monarch_derive::generate_iffts!();
 
-pub fn _compute_twiddle<T: Float + FloatConst>(index: usize, fft_len: usize) -> Complex<T> {
+fn _compute_twiddle<T: Float + FloatConst>(index: usize, fft_len: usize) -> Complex<T> {
     let constant = T::from(-2.0).unwrap() * T::PI() / T::from(fft_len).unwrap();
     // index * -2PI / fft_len
     let angle = constant * T::from(index).unwrap();
 
     Complex::new(angle.cos(), angle.sin())
 }
 
+#[doc = concat!("Inner FFT")]
 #[inline]
-fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 3] {
+pub fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 3] {
     let n = 3;
     let x = input.as_ref();
     assert_eq!(n, x.len());
@@ -75,8 +94,9 @@ fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>;
     [sum, temp_a + temp_b, temp_a - temp_b]
 }
 
+#[doc = concat!("Inner FFT")]
 #[inline]
-fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 9] {
+pub fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 9] {
     let n = 9;
     let x = input.as_ref();
     assert_eq!(n, x.len());
@@ -107,8 +127,9 @@ fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>;
     ]
 }
 
+#[doc = concat!("Inner FFT")]
 #[inline]
-fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 18] {
+pub fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 18] {
     let n = 18;
     let x = input.as_ref();
     assert_eq!(n, x.len());
@@ -176,8 +197,9 @@ fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>
     ]
 }
 
+#[doc = concat!("Inner FFT")]
 #[inline]
-fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 27] {
+pub fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 27] {
     let n = 27;
     let x = input.as_ref();
     assert_eq!(n, x.len());
@@ -252,6 +274,7 @@ fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>
     ]
 }
 
+#[doc = concat!("Inner FFT")]
 #[inline]
 pub fn fft125<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 125] {
     let n = 125;