|
1 | 1 | # Benchmarks |
2 | 2 |
|
3 | | -Benchmarks were performed on an [AMD Ryzen 7 4800HS CPU](https://en.wikichip.org/wiki/amd/ryzen_9/3900). |
| 3 | +Benchmarks were performed on an Intel Core i9-14900K CPU. |
4 | 4 |
|
5 | 5 | ```sh |
6 | 6 | $ cargo bench --bench <name> --features <int> |
7 | | -$ critcmp new | tail +3 | sort | sed 's# ? ?/sec##' |
| 7 | +$ critcmp new | tail +3 | sort | sed 's# ? ?/sec##' | sed 's# 1.00##' |
8 | 8 | ``` |
9 | 9 |
|
10 | 10 | ## ops |
11 | 11 | 64-bit FP with precision = 9: |
12 | 12 | ``` |
13 | | -F64p9/cadd (~1e4) 1.00 1.9±0.01ns |
14 | | -F64p9/from_decimal(12345, -3) 1.00 1.6±0.00ns |
15 | | -F64p9/next_power_of_ten 1.00 3.6±0.01ns |
16 | | -F64p9/rdiv (~1e5/~1e4, Ceil) 1.00 1.9±0.01ns |
17 | | -F64p9/rdiv (~1e5/~1e4, Floor) 1.00 1.9±0.01ns |
18 | | -F64p9/rdiv (~1e5/~1e4, Nearest) 1.00 1.9±0.00ns |
19 | | -F64p9/rmul (~1e4, Ceil) 1.00 1.9±0.01ns |
20 | | -F64p9/rmul (~1e4, Floor) 1.00 1.9±0.03ns |
21 | | -F64p9/rmul (~1e4, Nearest) 1.00 1.9±0.00ns |
22 | | -F64p9/rsqrt (~1e4, Ceil) 1.00 43.7±0.29ns |
23 | | -F64p9/rsqrt (~1e4, Floor) 1.00 42.5±0.17ns |
24 | | -F64p9/rsqrt (~1e4, Nearest) 1.00 47.0±0.19ns |
25 | | -F64p9/rsqrt (adaptive, Ceil) 1.00 98.0±0.33ns |
26 | | -F64p9/rsqrt (adaptive, Floor) 1.00 94.4±1.45ns |
27 | | -F64p9/rsqrt (adaptive, Nearest) 1.00 99.6±0.67ns |
28 | | -F64p9/rsqrt (MAX, Ceil) 1.00 102.3±0.50ns |
29 | | -F64p9/rsqrt (MAX, Floor) 1.00 100.2±0.50ns |
30 | | -F64p9/rsqrt (MAX, Nearest) 1.00 102.7±0.80ns |
31 | | -F64p9/to_decimal(0) (12.345) 1.00 9.1±0.02ns |
32 | | -F64p9/to_decimal(i32::MAX) (12.345) 1.00 9.1±0.01ns |
33 | | -F64p9/try_from(f64) (~0.1) 1.00 64.8±0.33ns |
34 | | -F64p9/try_from(f64) (~1e-12) 1.00 132.5±0.46ns |
35 | | -F64p9/try_from(f64) (~1e6) 1.00 24.9±0.14ns |
36 | | -F64p9/try_from(f64) (MAX) 1.00 5.9±0.01µs |
37 | | -F64p9/try_from(f64) (MIN_POSITIVE) 1.00 1872.9±4.12ns |
| 13 | +F64p9/cadd (~1e4) 1.0±0.03ns |
| 14 | +F64p9/from_decimal(12345, -3) 1.0±0.01ns |
| 15 | +F64p9/next_power_of_ten 1.6±0.03ns |
| 16 | +F64p9/rdiv (~1e5/~1e4, Ceil) 1.0±0.03ns |
| 17 | +F64p9/rdiv (~1e5/~1e4, Floor) 1.0±0.04ns |
| 18 | +F64p9/rdiv (~1e5/~1e4, Nearest) 1.0±0.04ns |
| 19 | +F64p9/rmul (~1e4, Ceil) 1.0±0.03ns |
| 20 | +F64p9/rmul (~1e4, Floor) 1.0±0.04ns |
| 21 | +F64p9/rmul (~1e4, Nearest) 1.0±0.05ns |
| 22 | +F64p9/rsqrt (~1e4, Ceil) 1.0±0.02ns |
| 23 | +F64p9/rsqrt (~1e4, Floor) 1.0±0.02ns |
| 24 | +F64p9/rsqrt (~1e4, Nearest) 1.0±0.03ns |
| 25 | +F64p9/rsqrt (adaptive, Ceil) 5.4±0.02ns |
| 26 | +F64p9/rsqrt (adaptive, Floor) 4.9±0.01ns |
| 27 | +F64p9/rsqrt (adaptive, Nearest) 5.5±0.02ns |
| 28 | +F64p9/rsqrt (MAX, Ceil) 1.0±0.01ns |
| 29 | +F64p9/rsqrt (MAX, Floor) 1.0±0.01ns |
| 30 | +F64p9/rsqrt (MAX, Nearest) 1.0±0.01ns |
| 31 | +F64p9/to_decimal(0) (12.345) 5.0±0.01ns |
| 32 | +F64p9/to_decimal(i32::MAX) (12.345) 5.0±0.02ns |
| 33 | +F64p9/try_from(f64) (~0.1) 33.2±0.08ns |
| 34 | +F64p9/try_from(f64) (~1e-12) 61.9±0.20ns |
| 35 | +F64p9/try_from(f64) (~1e6) 16.2±0.05ns |
| 36 | +F64p9/try_from(f64) (MAX) 1263.8±2.26ns |
| 37 | +F64p9/try_from(f64) (MIN_POSITIVE) 693.4±2.38ns |
38 | 38 | ``` |
39 | 39 |
|
40 | 40 | 128-bit FP with precision = 18: |
41 | 41 | ``` |
42 | | -F128p18/cadd (~1e4) 1.00 2.8±0.00ns |
43 | | -F128p18/from_decimal(12345, -3) 1.00 9.1±0.03ns |
44 | | -F128p18/next_power_of_ten 1.00 6.3±0.03ns |
45 | | -F128p18/rdiv (~1e5/~1e4, Ceil) 1.00 157.3±0.51ns |
46 | | -F128p18/rdiv (~1e5/~1e4, Floor) 1.00 154.2±1.19ns |
47 | | -F128p18/rdiv (~1e5/~1e4, Nearest) 1.00 159.4±1.05ns |
48 | | -F128p18/rmul (~1e4, Ceil) 1.00 132.5±0.61ns |
49 | | -F128p18/rmul (~1e4, Floor) 1.00 132.3±0.79ns |
50 | | -F128p18/rmul (~1e4, Nearest) 1.00 134.1±0.79ns |
51 | | -F128p18/rsqrt (~1e4, Ceil) 1.00 428.3±7.08ns |
52 | | -F128p18/rsqrt (~1e4, Floor) 1.00 403.9±1.24ns |
53 | | -F128p18/rsqrt (~1e4, Nearest) 1.00 475.3±1.03ns |
54 | | -F128p18/rsqrt (adaptive, Ceil) 1.00 1469.3±3.05ns |
55 | | -F128p18/rsqrt (adaptive, Floor) 1.00 1436.2±1.98ns |
56 | | -F128p18/rsqrt (adaptive, Nearest) 1.00 1530.6±1.97ns |
57 | | -F128p18/rsqrt (MAX, Ceil) 1.00 1393.2±9.68ns |
58 | | -F128p18/rsqrt (MAX, Floor) 1.00 1335.9±10.01ns |
59 | | -F128p18/rsqrt (MAX, Nearest) 1.00 1441.7±11.63ns |
60 | | -F128p18/to_decimal(0) (12.345) 1.00 263.8±25.35ns |
61 | | -F128p18/to_decimal(i32::MAX) (12.345) 1.00 263.2±0.13ns |
62 | | -F128p18/try_from(f64) (~0.1) 1.00 59.3±0.36ns |
63 | | -F128p18/try_from(f64) (~1e-12) 1.00 133.0±0.14ns |
64 | | -F128p18/try_from(f64) (~1e6) 1.00 27.8±0.25ns |
65 | | -F128p18/try_from(f64) (MAX) 1.00 5.9±0.00µs |
66 | | -F128p18/try_from(f64) (MIN_POSITIVE) 1.00 1842.6±1.86ns |
| 42 | +F128p18/cadd (~1e4) 1.9±0.05ns |
| 43 | +F128p18/from_decimal(12345, -3) 4.8±0.02ns |
| 44 | +F128p18/next_power_of_ten 3.1±0.04ns |
| 45 | +F128p18/rdiv (~1e5/~1e4, Ceil) 10.7±0.15ns |
| 46 | +F128p18/rdiv (~1e5/~1e4, Floor) 10.4±0.15ns |
| 47 | +F128p18/rdiv (~1e5/~1e4, Nearest) 11.2±0.16ns |
| 48 | +F128p18/rmul (~1e4, Ceil) 7.0±0.04ns |
| 49 | +F128p18/rmul (~1e4, Floor) 7.0±0.02ns |
| 50 | +F128p18/rmul (~1e4, Nearest) 7.2±0.06ns |
| 51 | +F128p18/rsqrt (~1e4, Ceil) 40.0±0.24ns |
| 52 | +F128p18/rsqrt (~1e4, Floor) 39.4±0.28ns |
| 53 | +F128p18/rsqrt (~1e4, Nearest) 41.2±0.28ns |
| 54 | +F128p18/rsqrt (adaptive, Ceil) 50.0±0.42ns |
| 55 | +F128p18/rsqrt (adaptive, Floor) 49.2±0.42ns |
| 56 | +F128p18/rsqrt (adaptive, Nearest) 50.6±0.38ns |
| 57 | +F128p18/rsqrt (MAX, Ceil) 40.2±0.28ns |
| 58 | +F128p18/rsqrt (MAX, Floor) 39.3±0.27ns |
| 59 | +F128p18/rsqrt (MAX, Nearest) 41.4±0.38ns |
| 60 | +F128p18/to_decimal(0) (12.345) 59.1±0.19ns |
| 61 | +F128p18/to_decimal(i32::MAX) (12.345) 59.1±0.28ns |
| 62 | +F128p18/try_from(f64) (~0.1) 28.5±1.51ns |
| 63 | +F128p18/try_from(f64) (~1e-12) 62.1±0.20ns |
| 64 | +F128p18/try_from(f64) (~1e6) 15.2±0.04ns |
| 65 | +F128p18/try_from(f64) (MAX) 1264.6±4.34ns |
| 66 | +F128p18/try_from(f64) (MIN_POSITIVE) 693.6±2.45ns |
67 | 67 | ``` |
68 | 68 |
|
69 | 69 | ## serde |
70 | 70 | 64-bit FP with precision = 9: |
71 | 71 | ``` |
72 | | -F64p9/deserialize 123.456 from f64 1.00 103.7±0.24ns |
73 | | -F64p9/deserialize 123.456 from string 1.00 54.8±0.18ns |
74 | | -F64p9/deserialize MAX from f64 1.00 59.8±0.24ns |
75 | | -F64p9/deserialize MAX from string 1.00 86.3±0.79ns |
76 | | -F64p9/serialize 123.456 to f64 1.00 48.2±0.46ns |
77 | | -F64p9/serialize 123.456 to string 1.00 27.5±0.29ns |
78 | | -F64p9/serialize MAX to f64 1.00 41.3±0.95ns |
79 | | -F64p9/serialize MAX to string 1.00 35.3±2.63ns |
| 72 | +F64p9/deserialize 123.456 from f64 55.4±0.17ns |
| 73 | +F64p9/deserialize 123.456 from string 27.1±0.34ns |
| 74 | +F64p9/deserialize MAX from f64 44.4±0.03ns |
| 75 | +F64p9/deserialize MAX from string 39.3±0.61ns |
| 76 | +F64p9/serialize 123.456 to f64 27.0±0.33ns |
| 77 | +F64p9/serialize 123.456 to string 13.1±0.21ns |
| 78 | +F64p9/serialize MAX to f64 38.6±0.01ns |
| 79 | +F64p9/serialize MAX to string 14.8±0.19ns |
80 | 80 | ``` |
81 | 81 |
|
82 | 82 | 128-bit FP with precision = 18: |
83 | 83 | ``` |
84 | | -F128p18/deserialize 123.456 from f64 1.00 103.3±0.24ns |
85 | | -F128p18/deserialize 123.456 from string 1.00 70.8±0.09ns |
86 | | -F128p18/deserialize MAX from f64 1.00 56.6±0.19ns |
87 | | -F128p18/deserialize MAX from string 1.00 147.3±0.51ns |
88 | | -F128p18/serialize 123.456 to f64 1.00 67.7±0.38ns |
89 | | -F128p18/serialize 123.456 to string 1.00 51.7±0.64ns |
90 | | -F128p18/serialize MAX to f64 1.00 63.6±0.74ns |
91 | | -F128p18/serialize MAX to string 1.00 80.6±1.00ns |
| 84 | +F128p18/deserialize 123.456 from f64 55.9±0.07ns |
| 85 | +F128p18/deserialize 123.456 from string 31.5±0.74ns |
| 86 | +F128p18/deserialize MAX from f64 40.8±0.20ns |
| 87 | +F128p18/deserialize MAX from string 60.1±0.75ns |
| 88 | +F128p18/serialize 123.456 to f64 30.4±0.15ns |
| 89 | +F128p18/serialize 123.456 to string 23.6±0.29ns |
| 90 | +F128p18/serialize MAX to f64 23.4±0.02ns |
| 91 | +F128p18/serialize MAX to string 37.3±0.04ns |
92 | 92 | ``` |
0 commit comments