Skip to content

Commit cf6f24e

Browse files
authored
Merge pull request #20 from ajarmusch/master
Fixes
2 parents 7d67060 + b1509af commit cf6f24e

File tree

2 files changed

+82
-26
lines changed

2 files changed

+82
-26
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# OpenACCV-V
22
This repository is updated with OpenACC test codes validating and verifying implementations of features and its conformance to the OpenACC specification. We are continuously adding and editing tests to conform to the latest version of the [OpenACC Specification](https://www.openacc.org/specification).
33

4+
Consult our website for more details on results and our project [OpenACC V&V Website](https://crpl.cis.udel.edu/oaccvv/).
5+
46
## Executing program
57

68
In order to run the suite, we have provided a Python script, infrastructure.py that can be run. It is recommended to use Python 3.3 or later. Once Python has been loaded into your environment, the script can be invoked with the command

Tests/parallel_loop_auto.c

Lines changed: 80 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,113 @@
11
#include "acc_testsuite.h"
22
#ifndef T1
33
//T1:parallel,loop,combined-constructs,syntactic,V:2.0-2.7
4+
//data independent, treated as a independent clause
45
int test1(){
56
int err = 0;
67
srand(SEED);
7-
real_t * a = (real_t *)malloc(n * sizeof(real_t));
8-
real_t * a_copy = (real_t *)malloc(n * sizeof(real_t));
9-
real_t * b = (real_t *)malloc(n * sizeof(real_t));
8+
real_t * value = (real_t *)malloc(n * sizeof(real_t));
9+
real_t * empty = (real_t *)malloc(n * sizeof(real_t));
1010

1111
for (int x = 0; x < n; ++x){
12-
a[x] = rand() / (real_t)(RAND_MAX / 10);
13-
b[x] = 0;
12+
value[x] = rand() / (real_t)(RAND_MAX / 10);
13+
empty[x] = 0;
1414
}
1515

16-
#pragma acc data copyin(a[0:n]) copyout(b[0:n])
16+
#pragma acc data copyin(value[0:n]) copy(empty[0:n])
1717
{
1818
#pragma acc parallel loop auto
1919
for (int x = 0; x < n; ++x){
20-
b[x] = a[x];
20+
empty[x] = value[x];
2121
}
2222
}
2323

2424
for (int x = 0; x < n; ++x){
25-
if (fabs(b[x] - a[x]) > PRECISION){
25+
if (fabs(empty[x] - value[x]) > PRECISION){
2626
err = 1;
2727
}
2828
}
2929

30+
free(value);
31+
free(empty);
32+
3033
return err;
3134
}
3235
#endif
3336

3437
#ifndef T2
35-
//T2:parallel,loop,combined-constructs,V:2.0-2.7
36-
int test2(){
38+
//T2:parallel,loop,combined-constructs,syntactic,V:2.0-2.7
39+
//data dependent, treated with as a seq clause. Added the num_gangs clause with 1
40+
int test3(){
3741
int err = 0;
3842
srand(SEED);
39-
real_t * a = (real_t *)malloc(n * sizeof(real_t));
40-
real_t * a_copy = (real_t *)malloc(n * sizeof(real_t));
41-
real_t * b = (real_t *)malloc(n * sizeof(real_t));
43+
real_t * device = (real_t *)malloc(n * sizeof(real_t));
44+
real_t * host = (real_t *)malloc(n * sizeof(real_t));
4245

4346
for (int x = 0; x < n; ++x){
44-
a[x] = rand() / (real_t)(RAND_MAX / 10);
45-
a_copy[x] = a[x];
47+
device[x] = rand() / (real_t)(RAND_MAX / 10);
48+
host[x] = device[x];
4649
}
4750

48-
#pragma acc data copy(a[0:n])
51+
#pragma acc data copy(device[0:n])
4952
{
50-
#pragma acc parallel loop auto
53+
#pragma acc parallel loop num_gangs(1) auto
54+
for (int x = 1; x < n; ++x){
55+
device[x] = device[x - 1] + device[x];
56+
}
57+
}
58+
59+
real_t rolling_total = 0.0;
60+
for (int x = 0; x < n; ++x){
61+
rolling_total += host[x];
62+
if (fabs(rolling_total - device[x]) > PRECISION){
63+
err = 1;
64+
}
65+
}
66+
67+
free(device);
68+
free(host);
69+
70+
return err;
71+
}
72+
#endif
73+
74+
#ifndef T3
75+
//T3:parallel,loop,combined-constructs,V:2.0-2.7
76+
//data dependent, treated with as a seq clause.
77+
int test3(){
78+
int err = 0;
79+
srand(SEED);
80+
real_t * device = (real_t *)malloc(n * sizeof(real_t));
81+
real_t * host = (real_t *)malloc(n * sizeof(real_t));
82+
83+
for (int x = 0; x < n; ++x){
84+
device[x] = rand() / (real_t)(RAND_MAX / 10);
85+
host[x] = device[x];
86+
}
87+
88+
#pragma acc data copy(device[0:n])
89+
{
90+
#pragma acc parallel loop num_gangs(1) vector worker auto
5191
for (int x = 1; x < n; ++x){
52-
a[x] = a[x - 1] + a[x];
92+
device[x] = device[x - 1] + device[x];
5393
}
5494
}
5595

5696
real_t rolling_total = 0.0;
5797
for (int x = 0; x < n; ++x){
58-
rolling_total += a_copy[x];
59-
if (fabs(rolling_total - a[x]) > PRECISION){
98+
rolling_total += host[x];
99+
if (fabs(rolling_total - device[x]) > PRECISION){
60100
err = 1;
61101
}
62102
}
63103

104+
free(device);
105+
free(host);
106+
64107
return err;
65108
}
109+
110+
66111
#endif
67112

68113
int main(){
@@ -71,19 +116,28 @@ int main(){
71116
#ifndef T1
72117
failed = 0;
73118
for (int x = 0; x < NUM_TEST_CALLS; ++x){
74-
failed = failed + test1();
119+
failed += test1();
75120
}
76-
if (failed != 0){
77-
failcode = failcode + (1 << 0);
121+
if (failed){
122+
failcode += (1 << 0);
78123
}
79124
#endif
80125
#ifndef T2
81126
failed = 0;
82127
for (int x = 0; x < NUM_TEST_CALLS; ++x){
83-
failed = failed + test2();
128+
failed += test2();
129+
}
130+
if (failed){
131+
failcode += (1 << 1);
132+
}
133+
#endif
134+
#ifndef T3
135+
failed = 0;
136+
for (int x = 0; x < NUM_TEST_CALLS; ++x){
137+
failed += test3();
84138
}
85-
if (failed != 0){
86-
failcode = failcode + (1 << 1);
139+
if (failed){
140+
failcode += (1 << 2);
87141
}
88142
#endif
89143
return failcode;

0 commit comments

Comments
 (0)