Skip to content

Commit e356b14

Browse files
authored
Performance improvements (#22)
## 2.6.0 2025-10-15 Substantial performance improvement for `flux_density_linear_filament` Biot-Savart methods. This also improves performance in calculations that use these methods, such as linear filament body force density calcs. ### Added * Rust * Add `dot3f` and `cross3f` 32-bit float variants ### Changed * Rust * Use mixed-precision method for `flux_density_linear_filament_scalar` * High-dynamic-range part of the calc is still done using 64-bit floats * Low-dynamic-range part of the calc is now done using 32-bit floats * _All_ addition operations in 32-bit section are done using fused multiply-add operations, usually chained to defer roundoff to final operation. As a result, total roundoff error accumulated in this section is minimal. * Return is upcast back to 64-bit float to support precise summation downstream * 1.4-2x speedup without any meaningful loss of precision * No change to unit test tolerances needed; unlike an all-32-bit implementation, this mixed-precision method passes all the same tests as the 64-bit-only method * Python * Update dep versions * Use latest rust backend version, which includes 1.4-2x speedup for flux_density_linear_filament Biot-Savart calcs
1 parent 8b41247 commit e356b14

File tree

11 files changed

+242
-150
lines changed

11 files changed

+242
-150
lines changed

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Changelog
2+
3+
See archived changelogs for versions prior to 2.6.0.
4+
5+
## 2.6.0 2025-10-15
6+
7+
Substantial performance improvement for `flux_density_linear_filament` Biot-Savart methods.
8+
This also improves performance in calculations that use these methods, such as linear filament
9+
body force density calcs.
10+
11+
### Added
12+
13+
* Rust
14+
* Add `dot3f` and `cross3f` 32-bit float variants
15+
16+
### Changed
17+
18+
* Rust
19+
* Use mixed-precision method for `flux_density_linear_filament_scalar`
20+
* High-dynamic-range part of the calc is still done using 64-bit floats
21+
* Low-dynamic-range part of the calc is now done using 32-bit floats
22+
* _All_ addition operations in 32-bit section are done using
23+
fused multiply-add operations, usually chained to defer
24+
roundoff to final operation. As a result, total roundoff error
25+
accumulated in this section is minimal.
26+
* Return is upcast back to 64-bit float to support precise summation downstream
27+
* 1.4-2x speedup without any meaningful loss of precision
28+
* No change to unit test tolerances needed; unlike an all-32-bit implementation,
29+
this mixed-precision method passes all the same tests as the 64-bit-only method
30+
* Python
31+
* Update dep versions
32+
* Use latest rust backend version, which includes 1.4-2x speedup for flux_density_linear_filament Biot-Savart calcs
File renamed without changes.
File renamed without changes.

Cargo.lock

Lines changed: 133 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "cfsem"
3-
version = "2.5.0"
3+
version = "2.6.0"
44
edition = "2024"
55
authors = ["Commonwealth Fusion Systems <jlogan@cfs.energy>"]
66
license = "MIT"
@@ -15,11 +15,11 @@ name = "cfsem"
1515
crate-type = ["cdylib", "rlib"]
1616

1717
[dependencies]
18-
pyo3 = { version="0.25.1", features=["extension-module"], optional=true }
19-
numpy = { version="0.25.0", optional=true } # This must match pyo3 version!
18+
pyo3 = { version="0.26.0", features=["extension-module"], optional=true }
19+
numpy = { version="0.26.0", optional=true } # This must match pyo3 version!
2020

21-
nalgebra = "^0.33.2"
22-
rayon = "^1.10.0"
21+
nalgebra = "^0.34.1"
22+
rayon = "^1.11.0"
2323
libm = "^0.2"
2424
num-traits = { version = "0.2.19", features = ["libm"] }
2525

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "cfsem"
7-
version = "2.5.0"
7+
version = "2.6.0"
88
description = "Quasi-steady electromagnetics including filamentized approximations, Biot-Savart, and Grad-Shafranov."
99
authors = [{name = "Commonwealth Fusion Systems", email = "jlogan@cfs.energy"}]
1010
requires-python = ">=3.9, <3.14"
@@ -15,7 +15,7 @@ classifiers = [
1515
]
1616
dependencies = [
1717
"numpy >= 2",
18-
"interpn >= 0.2.5",
18+
"interpn >= 0.6.1",
1919
"findiff >= 0.12.1",
2020
"pydantic >= 2",
2121
"pydantic-numpy >= 6"

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#![doc=include_str!("../README.md")]
2-
#![allow(non_snake_case)]
2+
#![allow(clippy::doc_overindented_list_items)]
33
#![allow(clippy::needless_range_loop)]
44
#![allow(clippy::needless_late_init)]
55
#![allow(non_snake_case)]

src/math.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ pub fn ellipk(m: f64) -> f64 {
4545
let mut ellip: f64 = 0.0;
4646
let c: f64 = 1.0 - m;
4747
let logterm = c.powi(-1).ln();
48+
49+
// NOTE: This loop is unrolled at compile-time automatically,
50+
// and the repeated calls to `powi` are de-duplicated by the compiler.
4851
for i in 0..5 {
4952
ellip = logterm
5053
.mul_add(ELLIPK_B[i], ELLIPK_A[i])
@@ -69,6 +72,9 @@ pub fn ellipe(m: f64) -> f64 {
6972
let mut ellip: f64 = 0.0;
7073
let c: f64 = 1.0 - m;
7174
let logterm = c.powi(-1).ln();
75+
76+
// NOTE: This loop is unrolled at compile-time automatically,
77+
// and the repeated calls to `powi` are de-duplicated by the compiler.
7278
for i in 0..5 {
7379
ellip = logterm
7480
.mul_add(ELLIPE_B[i], ELLIPE_A[i])
@@ -99,12 +105,35 @@ pub fn cross3(x0: f64, y0: f64, z0: f64, x1: f64, y1: f64, z1: f64) -> (f64, f64
99105
(cx, cy, cz)
100106
}
101107

108+
/// Evaluate the cross products for each axis component
109+
/// separately using `mul_add` which would not be assumed usable
110+
/// in a more general implementation.
111+
/// 32-bit float variant.
112+
#[inline]
113+
pub fn cross3f(x0: f32, y0: f32, z0: f32, x1: f32, y1: f32, z1: f32) -> (f32, f32, f32) {
114+
let xy = -x1 * y0;
115+
let yz = -y1 * z0;
116+
let zx = -z1 * x0;
117+
let cx = y0.mul_add(z1, yz);
118+
let cy = z0.mul_add(x1, zx);
119+
let cz = x0.mul_add(y1, xy);
120+
121+
(cx, cy, cz)
122+
}
123+
102124
/// Scalar dot product using `mul_add`.
103125
#[inline]
104126
pub fn dot3(x0: f64, y0: f64, z0: f64, x1: f64, y1: f64, z1: f64) -> f64 {
105127
x0.mul_add(x1, y0.mul_add(y1, z0 * z1))
106128
}
107129

130+
/// Scalar dot product using `mul_add`.
131+
/// 32-bit float variant.
132+
#[inline]
133+
pub fn dot3f(x0: f32, y0: f32, z0: f32, x1: f32, y1: f32, z1: f32) -> f32 {
134+
x0.mul_add(x1, y0.mul_add(y1, z0 * z1))
135+
}
136+
108137
/// Convert a point from cartesian to cylindrical coordinates.
109138
#[inline]
110139
pub fn cartesian_to_cylindrical(x: f64, y: f64, z: f64) -> (f64, f64, f64) {

src/physics/circular_filament.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1268,7 +1268,7 @@ mod test {
12681268
let zfil: Vec<f64> = (0..NFIL)
12691269
.map(|i| (i as f64) - (NFIL as f64) / 2.0)
12701270
.collect();
1271-
let ifil: Vec<f64> = (0..NFIL).map(|i| (i as f64)).collect();
1271+
let ifil: Vec<f64> = (0..NFIL).map(|i| i as f64).collect();
12721272

12731273
// Build a scattering of observation locations
12741274
let rprime: Vec<f64> = (0..NOBS).map(|i| 2.0 * (i as f64).sin() + 2.1).collect();

0 commit comments

Comments
 (0)