Skip to content

Commit 809fd0d

Browse files
authored
Rewrite ROTMG to address cases not covered by the netlib algorithm (#1480)
* Rewrite ROTMG based on the new implementation in GONUM based on the algorithm proposed by Tim Hopkins, see issue 1452 for the reference * Correct ROTMG utest for issue1452 and add another from gonum, also correct transposition of expected and observed values in error messages
1 parent 72e6515 commit 809fd0d

File tree

3 files changed

+216
-153
lines changed

3 files changed

+216
-153
lines changed

interface/rotmg.c

Lines changed: 58 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
6464

6565
FLOAT du, dp1, dp2, dq2, dq1, dh11=ZERO, dh21=ZERO, dh12=ZERO, dh22=ZERO, dflag=-ONE, dtemp;
6666

67+
if (*dd2 == ZERO || dy1 == ZERO)
68+
{
69+
dflag = -TWO;
70+
dparam[0] = dflag;
71+
return;
72+
}
73+
6774
if(*dd1 < ZERO)
6875
{
6976
dflag = -ONE;
@@ -76,6 +83,16 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
7683
*dd2 = ZERO;
7784
*dx1 = ZERO;
7885
}
86+
else if ((*dd1 == ZERO || *dx1 == ZERO) && *dd2 > ZERO)
87+
{
88+
dflag = ONE;
89+
dh12 = 1;
90+
dh21 = -1;
91+
*dx1 = dy1;
92+
dtemp = *dd1;
93+
*dd1 = *dd2;
94+
*dd2 = dtemp;
95+
}
7996
else
8097
{
8198
dp2 = *dd2 * dy1;
@@ -90,6 +107,9 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
90107
dq1 = dp1 * *dx1;
91108
if(ABS(dq1) > ABS(dq2))
92109
{
110+
dflag = ZERO;
111+
dh11 = ONE;
112+
dh22 = ONE;
93113
dh21 = - dy1 / *dx1;
94114
dh12 = dp2 / dp1;
95115

@@ -100,8 +120,19 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
100120
*dd1 = *dd1 / du;
101121
*dd2 = *dd2 / du;
102122
*dx1 = *dx1 * du;
123+
} else {
124+
dflag = -ONE;
125+
126+
dh11 = ZERO;
127+
dh12 = ZERO;
128+
dh21 = ZERO;
129+
dh22 = ZERO;
103130

131+
*dd1 = ZERO;
132+
*dd2 = ZERO;
133+
*dx1 = ZERO;
104134
}
135+
105136
}
106137
else
107138
{
@@ -120,7 +151,9 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
120151
}
121152
else
122153
{
123-
dflag = ONE;
154+
dflag = ONE;
155+
dh21 = -ONE;
156+
dh12 = ONE;
124157

125158
dh11 = dp1 / dp2;
126159
dh22 = *dx1 / dy1;
@@ -134,76 +167,33 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
134167
}
135168

136169

137-
if(*dd1 != ZERO)
170+
while ( *dd1 <= RGAMSQ && *dd1 != ZERO)
138171
{
139-
if( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) )
140-
{
141-
if(dflag == ZERO)
142-
{
143-
dh11 = ONE;
144-
dh22 = ONE;
145-
dflag = -ONE;
146-
}
147-
else
148-
{
149-
dh21 = -ONE;
150-
dh12 = ONE;
151-
dflag = -ONE;
152-
}
153-
if( *dd1 <= RGAMSQ )
154-
{
155-
while (ABS(*dd1) <= RGAMSQ) {
156-
*dd1 = *dd1 * (GAM * GAM);
157-
*dx1 = *dx1 / GAM;
158-
dh11 = dh11 / GAM;
159-
dh12 = dh12 / GAM;
160-
}
161-
}
162-
else
163-
{
164-
while (ABS(*dd1) >= GAMSQ) {
165-
*dd1 = *dd1 / (GAM * GAM);
166-
*dx1 = *dx1 * GAM;
167-
dh11 = dh11 * GAM;
168-
dh12 = dh12 * GAM;
169-
}
170-
}
171-
}
172+
dflag = -ONE;
173+
*dd1 = *dd1 * (GAM * GAM);
174+
*dx1 = *dx1 / GAM;
175+
dh11 = dh11 / GAM;
176+
dh12 = dh12 / GAM;
177+
}
178+
while (ABS(*dd1) > GAMSQ) {
179+
dflag = -ONE;
180+
*dd1 = *dd1 / (GAM * GAM);
181+
*dx1 = *dx1 * GAM;
182+
dh11 = dh11 * GAM;
183+
dh12 = dh12 * GAM;
172184
}
173185

174-
if(*dd2 != ZERO)
175-
{
176-
if( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) )
177-
{
178-
if(dflag == ZERO)
179-
{
180-
dh11 = ONE;
181-
dh22 = ONE;
182-
dflag = -ONE;
183-
}
184-
else
185-
{
186-
dh21 = -ONE;
187-
dh12 = ONE;
188-
dflag = -ONE;
189-
}
190-
if( ABS(*dd2) <= RGAMSQ )
191-
{
192-
while (ABS(*dd2) <= RGAMSQ) {
193-
*dd2 = *dd2 * (GAM * GAM);
194-
dh21 = dh21 / GAM;
195-
dh22 = dh22 / GAM;
196-
}
197-
}
198-
else
199-
{
200-
while (ABS(*dd2) >= GAMSQ) {
201-
*dd2 = *dd2 / (GAM * GAM);
202-
dh21 = dh21 * GAM;
203-
dh22 = dh22 * GAM;
204-
}
205-
}
206-
}
186+
while (ABS(*dd2) <= RGAMSQ && *dd2 != ZERO) {
187+
dflag = -ONE;
188+
*dd2 = *dd2 * (GAM * GAM);
189+
dh21 = dh21 / GAM;
190+
dh22 = dh22 / GAM;
191+
}
192+
while (ABS(*dd2) > GAMSQ) {
193+
dflag = -ONE;
194+
*dd2 = *dd2 / (GAM * GAM);
195+
dh21 = dh21 * GAM;
196+
dh22 = dh22 * GAM;
207197
}
208198

209199
}

utest/test_rotmg.c

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ CTEST (drotmg,rotmg)
5353
te_param[i]=tr_param[i]=0.0;
5454
}
5555

56-
//reference values as calulated by netlib blas
56+
//reference values as calculated by netlib blas
5757

5858
tr_d1= 0.1732048;
5959
tr_d2= 0.03840234;
@@ -71,13 +71,13 @@ CTEST (drotmg,rotmg)
7171
tr_param[4]= 0.0;
7272

7373
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
74-
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
75-
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
76-
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
77-
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
74+
ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS);
75+
ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS);
76+
ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS);
77+
ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS);
7878

7979
for(i=0; i<5; i++){
80-
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
80+
ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS);
8181
}
8282
}
8383

@@ -91,7 +91,7 @@ CTEST (drotmg,rotmg_issue1452)
9191
double tr_param[5];
9292
int i=0;
9393

94-
// from issue #1452, buggy version returned 0.000244 for param[3]
94+
// from issue #1452
9595
te_d1 = 5.9e-8;
9696
te_d2 = 5.960464e-8;
9797
te_x1 = 1.0;
@@ -100,8 +100,8 @@ CTEST (drotmg,rotmg_issue1452)
100100
for(i=0; i<5; i++){
101101
te_param[i]=tr_param[i]=0.0;
102102
}
103-
104-
//reference values as calulated by netlib blas
103+
te_param[3]=1./4096.;
104+
//reference values as calculated by gonum blas with rotmg rewritten to Hopkins' algorithm
105105
tr_d1= 0.99995592822897;
106106
tr_d2= 0.98981219860583;
107107
tr_x1= 0.03662270484346;
@@ -110,19 +110,19 @@ CTEST (drotmg,rotmg_issue1452)
110110
tr_param[0]= -1.0;
111111
tr_param[1]= 0.00000161109346;
112112
tr_param[2]= -0.00024414062500;
113-
tr_param[3]= 1.0;
113+
tr_param[3]= 0.00024414062500;
114114
tr_param[4]= 0.00000162760417;
115115

116116
//OpenBLAS
117117
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
118118

119-
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
120-
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
121-
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
122-
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
119+
ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS);
120+
ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS);
121+
ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS);
122+
ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS);
123123

124124
for(i=0; i<5; i++){
125-
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
125+
ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS);
126126
}
127127

128128
}
@@ -145,7 +145,7 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2)
145145
te_param[i]=tr_param[i]=0.0;
146146
}
147147

148-
//reference values as calulated by netlib blas
148+
//reference values as calculated by netlib blas
149149
tr_d1= 1.0;
150150
tr_d2= 1.0;
151151
tr_x1= 16.0;
@@ -160,12 +160,47 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2)
160160
//OpenBLAS
161161
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
162162

163-
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
164-
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
165-
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
166-
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
163+
ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS);
164+
ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS);
165+
ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS);
166+
ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS);
167+
168+
for(i=0; i<5; i++){
169+
ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS);
170+
}
171+
}
172+
173+
CTEST(drotmg, drotmg_D1_big_D2_big_flag_zero)
174+
{
175+
double te_d1, tr_d1;
176+
double te_d2, tr_d2;
177+
double te_x1, tr_x1;
178+
double te_y1, tr_y1;
179+
double te_param[5]={1.,4096.,-4096.,1.,4096.};
180+
double tr_param[5]={-1.,4096.,-3584.,1792.,4096.};
181+
int i=0;
182+
te_d1= tr_d1=1600000000.;
183+
te_d2= tr_d2=800000000.;
184+
te_x1= tr_x1=8.;
185+
te_y1= tr_y1=7.;
186+
187+
188+
//reference values as calculated by gonum
189+
tr_d1= 68.96627824858757;
190+
tr_d2= 34.483139124293785;
191+
tr_x1= 45312.;
192+
tr_y1= 7.0;
193+
194+
195+
//OpenBLAS
196+
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
197+
198+
ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS);
199+
ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS);
200+
ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS);
201+
ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS);
167202

168203
for(i=0; i<5; i++){
169-
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
204+
ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS);
170205
}
171206
}

0 commit comments

Comments
 (0)