11// REQUIRES: aspect-ext_intel_legacy_image
2- // RUN: %{build} -o %t.out
2+
3+ // %O0 added because of GSD-10960. Without it, IGC will fail with
4+ // an access violation error.
5+ // RUN: %{build} %O0 -o %t.out
36// RUN: %{run} %t.out
47
58// UNSUPPORTED: cuda
1316#include < sycl/detail/core.hpp>
1417using namespace sycl ;
1518
16- void init (uint32_t *A, uint32_t *B, size_t NumI32Elts) {
17- for (int I = 0 ; I < NumI32Elts; I++) {
18- A[I] = I;
19- B[I] = 0 ;
20- }
21- }
19+ template <int Dimensions> class CopyKernel ;
2220
23- int check (uint32_t *B, size_t NumI32Elts) {
24- for (int I = 0 ; I < NumI32Elts; I++) {
25- if (B[I] != I) {
26- std::cout << " Failed" << std::endl;
27- std::cerr << " Error for the index: " << I << " , computed: " << B[I]
28- << std::endl;
29- return 1 ;
30- }
31- }
32- std::cout << " Passed" << std::endl;
33- return 0 ;
34- }
21+ template <int Dimensions>
22+ bool testND (queue &Q, size_t XSize, size_t YSize, size_t ZSize = 1 ) {
3523
36- int test2D (queue &Q, size_t XSize, size_t YSize) {
37- std::cout << " Starting the test with size = {" << XSize << " , " << YSize
38- << " } ... " ;
39- size_t NumI32Elts = XSize * YSize * 4 ;
40- uint32_t *A = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
41- uint32_t *B = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
42- init (A, B, NumI32Elts);
24+ static_assert (Dimensions == 2 || Dimensions == 3 ,
25+ " Only 2D and 3D images are supported." );
4326
44- try {
45- image<2 > ImgA (A, image_channel_order::rgba,
46- image_channel_type::unsigned_int32, range<2 >{XSize, YSize});
47- image<2 > ImgB (B, image_channel_order::rgba,
48- image_channel_type::unsigned_int32, range<2 >{XSize, YSize});
27+ if constexpr (Dimensions == 2 )
28+ std::cout << " Starting the test with size = {" << XSize << " , " << YSize
29+ << " } ... " ;
30+ else
31+ std::cout << " Starting the test with size = {" << XSize << " , " << YSize
32+ << " , " << ZSize << " } ... " ;
4933
50- Q.submit ([&](handler &CGH) {
51- auto AAcc = ImgA.get_access <uint4, access::mode::read>(CGH);
52- auto BAcc = ImgB.get_access <uint4, access::mode::write>(CGH);
53- CGH.parallel_for <class I2D >(range<2 >{XSize, YSize}, [=](id<2 > Id) {
54- sycl::int2 Coord (Id[0 ], Id[1 ]);
55- BAcc.write (Coord, AAcc.read (Coord));
56- });
57- }).wait ();
58- } catch (exception const &e) {
59- std::cout << " Failed" << std::endl;
60- std::cerr << " SYCL Exception caught: " << e.what ();
61- return 1 ;
62- }
34+ const size_t NumI32Elts = XSize * YSize * ZSize * 4 ;
35+ range<Dimensions> ImgRange;
36+ if constexpr (Dimensions == 2 )
37+ ImgRange = range<Dimensions>{XSize, YSize};
38+ else
39+ ImgRange = range<Dimensions>{XSize, YSize, ZSize};
6340
64- int NumErrors = check (B, NumI32Elts);
65- free (A);
66- free (B);
67- return NumErrors;
68- }
41+ // Allocate input buffer and initialize it with some values.
42+ uint32_t *Input = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
43+ for (int i = 0 ; i < NumI32Elts; i++)
44+ Input[i] = i;
6945
70- int test3D (queue &Q, size_t XSize, size_t YSize, size_t ZSize) {
71- std::cout << " Starting the test with size = {" << XSize << " , " << YSize
72- << " , " << ZSize << " } ... " ;
73- size_t NumI32Elts = XSize * YSize * ZSize * 4 ;
74- uint32_t *A = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
75- uint32_t *B = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
76- init (A, B, NumI32Elts);
46+ // calloc to ensure that the output buffer is initialized to zero.
47+ uint32_t *Output = (uint32_t *)calloc (NumI32Elts, sizeof (uint32_t ));
7748
49+ // Create the image and submit the copy kernel.
7850 try {
79- image<3 > ImgA (A, image_channel_order::rgba,
80- image_channel_type::unsigned_int32,
81- range<3 >{XSize, YSize, ZSize});
82- image<3 > ImgB (B, image_channel_order::rgba,
83- image_channel_type::unsigned_int32,
84- range<3 >{XSize, YSize, ZSize});
51+ image<Dimensions> ImgA (Input, image_channel_order::rgba,
52+ image_channel_type::unsigned_int32, ImgRange);
53+ image<Dimensions> ImgB (Output, image_channel_order::rgba,
54+ image_channel_type::unsigned_int32, ImgRange);
8555
8656 Q.submit ([&](handler &CGH) {
87- auto AAcc = ImgA.get_access <uint4, access::mode::read>(CGH);
88- auto BAcc = ImgB.get_access <uint4, access::mode::write>(CGH);
89- CGH.parallel_for <class I3D >(range<3 >{XSize, YSize, ZSize},
90- [=](id<3 > Id) {
91- sycl::int4 Coord (Id[0 ], Id[1 ], Id[2 ], 0 );
92- BAcc.write (Coord, AAcc.read (Coord));
93- });
57+ auto AAcc = ImgA.template get_access <uint4, access::mode::read>(CGH);
58+ auto BAcc = ImgB.template get_access <uint4, access::mode::write>(CGH);
59+ CGH.parallel_for <CopyKernel<Dimensions>>(
60+ ImgRange, [=](id<Dimensions> Id) {
61+ // Use int2 for 2D and int4 for 3D images.
62+ if constexpr (Dimensions == 3 ) {
63+ sycl::int4 Coord (Id[0 ], Id[1 ], Id[2 ], 0 );
64+ BAcc.write (Coord, AAcc.read (Coord));
65+ } else {
66+ sycl::int2 Coord (Id[0 ], Id[1 ]);
67+ BAcc.write (Coord, AAcc.read (Coord));
68+ }
69+ });
9470 }).wait ();
9571 } catch (exception const &e) {
72+
9673 std::cout << " Failed" << std::endl;
9774 std::cerr << " SYCL Exception caught: " << e.what ();
75+ free (Input);
76+ free (Output);
9877 return 1 ;
9978 }
10079
101- int NumErrors = check (B, NumI32Elts);
102- free (A);
103- free (B);
104- return NumErrors;
80+ // Check the output buffer.
81+ bool HasError = false ;
82+ for (int i = 0 ; i < NumI32Elts; i++) {
83+ if (Output[i] != i) {
84+ HasError = true ;
85+ break ;
86+ }
87+ }
88+
89+ if (!HasError) {
90+ std::cout << " Passed" << std::endl;
91+ } else {
92+ std::cout << " Failed" << std::endl;
93+ }
94+
95+ free (Input);
96+ free (Output);
97+ return HasError;
10598}
10699
107100int main () {
108- int NumErrors = 0 ;
109-
110101 queue Q;
111102 device Dev = Q.get_device ();
112103 std::cout << " Running on " << Dev.get_info <info::device::name>()
@@ -127,17 +118,18 @@ int main() {
127118
128119 // Using max sizes in one image may require too much memory.
129120 // Check them one by one.
130- NumErrors += test2D (Q, MaxWidth2D, 2 );
131- NumErrors += test2D (Q, 2 , MaxHeight2D);
121+ bool HasError = false ;
122+ HasError |= testND<2 >(Q, MaxWidth2D, 2 );
123+ HasError |= testND<2 >(Q, 2 , MaxHeight2D);
132124
133- NumErrors += test3D (Q, MaxWidth3D, 2 , 3 );
134- NumErrors += test3D (Q, 2 , MaxHeight3D, 3 );
135- NumErrors += test3D (Q, 2 , 3 , MaxDepth3D);
125+ HasError |= testND< 3 > (Q, MaxWidth3D, 2 , 3 );
126+ HasError |= testND< 3 > (Q, 2 , MaxHeight3D, 3 );
127+ HasError |= testND< 3 > (Q, 2 , 3 , MaxDepth3D);
136128
137- if (NumErrors )
138- std::cerr << " Test failed." << std::endl;
129+ if (HasError )
130+ std::cout << " Test failed." << std::endl;
139131 else
140132 std::cout << " Test passed." << std::endl;
141133
142- return NumErrors ;
134+ return HasError ? 1 : 0 ;
143135}
0 commit comments