Skip to content

Commit d036dc3

Browse files
committed
Adding documentation
1 parent 64f4d7c commit d036dc3

File tree

4 files changed

+103
-48
lines changed

4 files changed

+103
-48
lines changed

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ jobs:
1919
- name: Clippy
2020
run: cargo clippy -- -D warnings
2121
- name: Build
22-
run: cargo build --verbose
22+
run: cargo build
2323
- name: Run tests
24-
run: cargo test --verbose
24+
run: cargo test

README.md

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
1-
## Monarch Butterfly
1+
[![Build](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml/badge.svg)](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml)
2+
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
3+
[![](https://img.shields.io/crates/v/monarch-butterfly)](https://img.shields.io/crates/v/monarch-butterfly)
4+
[![](https://docs.rs/monarch-butterfly/badge.svg)](https://docs.rs/monarch-butterfly/)
25

3-
Experimental FFT library with an emphasis on runtime performance.
4-
The goal of this library is to hand-unroll all loops in a procedural macro
5-
for optimal SIMD throughput.
6+
# Monarch Butterfly
67

7-
This currently only works on powers of two.
8+
Experimental FFT library where all FFTs are proc-macro generated const-evaluation functions. The use case is if you know the FFT size at compile time. However, knowing the FFT size at compile time gives immense gains.
89

9-
This library implements FFTs for both `f32` and `f64` for the following sizes:
10-
```
11-
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048
12-
```
10+
This library implements FFTs for both `f32` and `f64` sizes `1-200`. The FFTs are auto-generated so this limit could be increased above 200 at the expense of compile time.
1311

14-
This library will use all SIMD features your CPU has available including AVX512,
15-
assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
12+
## Features
1613

17-
The larger the FFT sizes, the larger speed boost this library will give you.
14+
- All functions are auto-generated with proc-macros with unrolled loops
15+
- Zero `unsafe` code
16+
- Completely portable
17+
- Const-evaluation functions
1818

19-
As an example of AVX512 instructions, here is an example on just an FFT
20-
of size 128: https://godbolt.org/z/rz48azEsd (`Ctrl+F` for "zmm" instructions)
19+
## Limitations
20+
21+
- FFT size must be known at compile time
22+
- By default, only FFTs up to size 200 are generated
2123

22-
If a larger FFT size is needed, just clone the repo and add the needed
23-
sizes to the top of `crates\monarch-derive\src\lib.rs` and larger FFTs
24-
will be generated. However, this comes at the cost of a longer compile time.
24+
![log](assets/log_comparison.png)
2525

2626
```
2727
use monarch_butterfly::*;
2828
use num_complex::Complex;
2929
3030
let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
31-
let output_slice = fft8(&input);
32-
let output_vec = fft8(input);
31+
let output = fft::<8, _, _>(input);
3332
```
3433

34+
This library will use all SIMD features your CPU has available including AVX512,
35+
assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
36+
37+
The larger the FFT sizes, the larger speed boost this library will give you.
38+
39+
As an example of AVX512 instructions, here is an example on just an FFT
40+
of size 128: https://godbolt.org/z/Y58eh1x5a(`Ctrl+F` for "zmm" instructions)
41+
3542
The FFTs before unrolling are heavily inspired from [RustFFT](https://github.com/ejmahler/RustFFT).
3643
Credit is given to Elliott Mahler as the RustFFT original author.

crates/monarch-derive/src/lib.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ pub fn generate_switch(_input: TokenStream) -> TokenStream {
105105
});
106106

107107
let expanded = quote! {
108+
/// Top level FFT function
109+
///
110+
/// ```
111+
/// use monarch_butterfly::*;
112+
/// use num_complex::Complex;
113+
///
114+
/// let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
115+
/// let output = fft::<8, _, _>(input);
116+
/// ```
108117
#[inline]
109118
pub fn fft<const N: usize, T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; N] {
110119
let x_in = input.as_ref();
@@ -117,6 +126,15 @@ pub fn generate_switch(_input: TokenStream) -> TokenStream {
117126
}
118127
}
119128

129+
/// Top level iFFT function
130+
///
131+
/// ```
132+
/// use monarch_butterfly::*;
133+
/// use num_complex::Complex;
134+
///
135+
/// let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
136+
/// let output = ifft::<8, _, _>(input);
137+
/// ```
120138
#[inline]
121139
pub fn ifft<const N: usize, T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; N] {
122140
let x_in = input.as_ref();
@@ -168,6 +186,7 @@ pub fn generate_powers_of_two(_input: TokenStream) -> TokenStream {
168186
});
169187

170188
quote! {
189+
#[doc = concat!("Inner FFT")]
171190
#[inline]
172191
pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
173192
let n = #s;
@@ -194,6 +213,7 @@ pub fn generate_powers_of_two(_input: TokenStream) -> TokenStream {
194213
});
195214

196215
let expanded = quote! {
216+
197217
#[inline]
198218
pub fn fft1<T: Float, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 1] {
199219
let n = 1;
@@ -260,6 +280,7 @@ pub fn generate_coprimes(_input: TokenStream) -> TokenStream {
260280
});
261281

262282
quote! {
283+
#[doc = concat!("Inner FFT")]
263284
#[inline]
264285
pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
265286
let n = #s;
@@ -344,6 +365,7 @@ pub fn generate_mixed_radix(_input: TokenStream) -> TokenStream {
344365
});
345366

346367
quote! {
368+
#[doc = concat!("Inner FFT")]
347369
#[inline]
348370
pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
349371
let n = #s;
@@ -525,6 +547,7 @@ pub fn generate_primes(_input: TokenStream) -> TokenStream {
525547
});
526548

527549
quote! {
550+
#[doc = concat!("Inner FFT")]
528551
#[inline]
529552
pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #s] {
530553
let n = #s;
@@ -579,6 +602,7 @@ pub fn generate_iffts(_input: TokenStream) -> TokenStream {
579602
});
580603

581604
quote! {
605+
#[doc = concat!("Inner iFFT")]
582606
#[inline]
583607
pub fn #func<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; #n] {
584608
let x = input.as_ref();
@@ -596,6 +620,7 @@ pub fn generate_iffts(_input: TokenStream) -> TokenStream {
596620
});
597621

598622
let expanded = quote! {
623+
#[doc = concat!("Inner iFFT")]
599624
#[inline]
600625
pub fn ifft1<T: Float, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 1] {
601626
let n = 1;

src/lib.rs

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,48 @@
1-
//! Experimental FFT library with an emphasis on runtime performance.
2-
//! This currently only works on powers of two. The FFTs are autogenerated
3-
//! with a procedural macro which hand unrolls all the loops with inlined
4-
//! functions to allow the compiler to maximize the SIMD throughput available
5-
//! on the given CPU.
6-
//!
7-
//! This library will use all SIMD features your CPU has available including AVX512,
8-
//! assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
9-
//!
10-
//! This library implements FFTs for both `f32` and `f64` for the following sizes:
11-
//!
12-
//! ```no_compile
13-
//! 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048
14-
//! ```
15-
//!
16-
//! If a larger FFT size is needed, just clone the repo and add the needed
17-
//! sizes to the top of `crates\monarch-derive\src\lib.rs` and larger FFTs
18-
//! will be generated. However, this comes at the cost of a longer compile time.
19-
//!
1+
//! [![Build](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml/badge.svg)](https://github.com/michaelciraci/Monarch-Butterfly/actions/workflows/rust.yml)
2+
//! [![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
3+
//! [![](https://img.shields.io/crates/v/monarch-butterfly)](https://img.shields.io/crates/v/monarch-butterfly)
4+
//! [![](https://docs.rs/monarch-butterfly/badge.svg)](https://docs.rs/monarch-butterfly/)
5+
//!
6+
//! # Monarch Butterfly
7+
//!
8+
//! Experimental FFT library where all FFTs are proc-macro generated const-evaluation functions. //! The use case is if you know the FFT size at compile time. However, knowing the FFT size at //! compile time gives immense gains.
9+
//!
10+
//! This library implements FFTs for both `f32` and `f64` sizes `1-200`. The FFTs are //! auto-generated so this limit could be increased above 200 at the expense of compile time.
11+
//!
12+
//! ## Features
13+
//!
14+
//! - All functions are auto-generated with proc-macros with unrolled loops
15+
//! - Zero `unsafe` code
16+
//! - Completely portable
17+
//! - Const-evaluation functions
18+
//!
19+
//! ## Limitations
20+
//!
21+
//! - FFT size must be known at compile time
22+
//! - By default, only FFTs up to size 200 are generated
23+
//!
24+
//!
2025
//! ```
2126
//! use monarch_butterfly::*;
2227
//! use num_complex::Complex;
23-
//!
28+
//!
2429
//! let input: Vec<_> = (0..8).map(|i| Complex::new(i as f32, 0.0)).collect();
25-
//! let output_slice = fft::<8, _, _>(&input);
26-
//! let output_vec = fft::<8, _, _>(input);
30+
//! let output = fft::<8, _, _>(input);
2731
//! ```
32+
//!
33+
//! The top level functions are [`fft`] and [`ifft`].
34+
//!
35+
//! This library will use all SIMD features your CPU has available including AVX512,
36+
//! assuming you compile with those features (`RUSTFLAGS="-C target-cpu=native" cargo build`).
37+
//!
38+
//! The larger the FFT sizes, the larger speed boost this library will give you.
39+
//!
40+
//! As an example of AVX512 instructions, here is an example on just an FFT
41+
//! of size 128: <https://godbolt.org/z/Y58eh1x5a>(`Ctrl+F` for "zmm" instructions)
42+
//!
43+
//! The FFTs before unrolling are heavily inspired from [`RustFFT``](<https://github.com/ejmahler/RustFFT>).
44+
//! Credit is given to Elliott Mahler as the RustFFT original author.
45+
2846

2947
#![allow(clippy::excessive_precision)]
3048
#![forbid(unsafe_code)]
@@ -42,16 +60,17 @@ monarch_derive::generate_mixed_radix!();
4260
monarch_derive::generate_primes!();
4361
monarch_derive::generate_iffts!();
4462

45-
pub fn _compute_twiddle<T: Float + FloatConst>(index: usize, fft_len: usize) -> Complex<T> {
63+
fn _compute_twiddle<T: Float + FloatConst>(index: usize, fft_len: usize) -> Complex<T> {
4664
let constant = T::from(-2.0).unwrap() * T::PI() / T::from(fft_len).unwrap();
4765
// index * -2PI / fft_len
4866
let angle = constant * T::from(index).unwrap();
4967

5068
Complex::new(angle.cos(), angle.sin())
5169
}
5270

71+
#[doc = concat!("Inner FFT")]
5372
#[inline]
54-
fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 3] {
73+
pub fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 3] {
5574
let n = 3;
5675
let x = input.as_ref();
5776
assert_eq!(n, x.len());
@@ -75,8 +94,9 @@ fn fft3<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>;
7594
[sum, temp_a + temp_b, temp_a - temp_b]
7695
}
7796

97+
#[doc = concat!("Inner FFT")]
7898
#[inline]
79-
fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 9] {
99+
pub fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 9] {
80100
let n = 9;
81101
let x = input.as_ref();
82102
assert_eq!(n, x.len());
@@ -107,8 +127,9 @@ fn fft9<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>;
107127
]
108128
}
109129

130+
#[doc = concat!("Inner FFT")]
110131
#[inline]
111-
fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 18] {
132+
pub fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 18] {
112133
let n = 18;
113134
let x = input.as_ref();
114135
assert_eq!(n, x.len());
@@ -176,8 +197,9 @@ fn fft18<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>
176197
]
177198
}
178199

200+
#[doc = concat!("Inner FFT")]
179201
#[inline]
180-
fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 27] {
202+
pub fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 27] {
181203
let n = 27;
182204
let x = input.as_ref();
183205
assert_eq!(n, x.len());
@@ -252,6 +274,7 @@ fn fft27<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>
252274
]
253275
}
254276

277+
#[doc = concat!("Inner FFT")]
255278
#[inline]
256279
pub fn fft125<T: Float + FloatConst, A: AsRef<[Complex<T>]>>(input: A) -> [Complex<T>; 125] {
257280
let n = 125;

0 commit comments

Comments
 (0)