1
+ @inline max_min (x,y) = ifelse (abs (y) < abs (x), (x,y) , (y,x))
2
+
3
+ function maxtomin (a:: T , b:: T , c:: T , d:: T ) where {T}
4
+ a, b = max_min (a, b)
5
+ c, d = max_min (c, d)
6
+
7
+ a, c = max_min (a, c)
8
+ b, d = max_min (b, d)
9
+
10
+ b, c = max_min (b, c)
11
+
12
+ return a, b, c, d
13
+ end
14
+
15
+ function maxtomin (a:: T , b:: T , c:: T , d:: T ) where {T}
16
+ a, b = max_min (a, b)
17
+ c, d = max_min (c, d)
18
+ a, c = max_min (a, c)
19
+ b, d = max_min (b, d)
20
+ b, c = max_min (b, c)
21
+ return a, b, c, d
22
+ end
23
+
1
24
function vec_sum (x0:: T , x1:: T , x2:: T , x3:: T ) where {T}
2
25
s3 = x3
3
26
s2, e3 = two_sum (x2, s3)
@@ -6,10 +29,8 @@ function vec_sum(x0::T, x1::T, x2::T, x3::T) where {T}
6
29
return s0,e1,e2,e3
7
30
end
8
31
9
- function vsum_errbranch (x:: NTuple{4,T} ) where {T}
10
- y = zeros (T, 4 )
11
- r = zeros (T, 4 )
12
- e = zeros (T, 4 )
32
+ function vecsum_errbranch (x:: NTuple{4,T} ) where {T}
33
+ y = r = e = zeros (T, 4 )
13
34
j = 1
14
35
e[1 ] = x[1 ]
15
36
for i = 1 : 2
@@ -26,12 +47,73 @@ function vsum_errbranch(x::NTuple{4,T}) where {T}
26
47
return y
27
48
end
28
49
50
+ function fast_vecsum_errbranch (x:: NTuple{4,T} ) where {T}
51
+ y = zeros (T, 4 )
52
+ j = 1
53
+ # e[1] = x1
54
+ # i = 1
55
+ r, t = two_sum (x[1 ], x[2 ])
56
+ if t != = zero (T)
57
+ y[j] = r
58
+ e = t
59
+ j += 1
60
+ else
61
+ e = r
62
+ end
63
+ # i = 2
64
+ r, t = two_sum (e, x[3 ])
65
+ if t != = zero (T)
66
+ y[j] = r
67
+ e = t
68
+ j += 1
69
+ else
70
+ e = r
71
+ end
72
+
73
+ y[j], y[j+ 1 ] = two_sum (e, x[4 ])
74
+ return y
75
+ end
76
+
77
+
78
+ function fast_vecsum_errbranch (x1:: T ,x2:: T ,x3:: T ,x4:: T ) where {T}
79
+ y = zeros (T, 4 )
80
+ j = 1
81
+ # e[1] = x1
82
+ # i = 1
83
+ r, t = two_sum (x1, x2)
84
+ if t != = zero (T)
85
+ y[j] = r
86
+ e = t
87
+ j += 1
88
+ else
89
+ e = r
90
+ end
91
+ # i = 2
92
+ r, t = two_sum (e, x3)
93
+ if t != = zero (T)
94
+ y[j] = r
95
+ e = t
96
+ j += 1
97
+ else
98
+ e = r
99
+ end
100
+
101
+ y[j], y[j+ 1 ] = two_sum (e, x4)
102
+ return y
103
+ end
104
+
29
105
function quadword (x1:: T , x2:: T , x3:: T , x4:: T ) where {T}
30
106
a1, a2 = two_sum (x1, x2)
31
107
b1, b2 = two_sum (x3, x4)
32
108
c1, c2 = two_sum (a1, b1)
33
109
d1, d2 = two_sum (a2, b2)
34
110
e1to4 = vec_sum (c1,c2,d1,d2)
35
- y = vsum_errbranch (e1to4)
111
+ y = vecsum_errbranch (e1to4)
36
112
return (y... ,)
37
113
end
114
+
115
+ @inline function fast_quadword (x1:: T , x2:: T , x3:: T , x4:: T ) where {T}
116
+ a,b,c,d = maxtomin (x1,x2,x3,x4)
117
+ return fast_vecsum_errbranch (a,b,c,d)
118
+ end
119
+
0 commit comments