Skip to content

Commit 69d38d9

Browse files
author
Kent Knox
committed
Adding additional trsm samples
1 parent 199b7c0 commit 69d38d9

File tree

5 files changed

+392
-15
lines changed

5 files changed

+392
-15
lines changed

src/samples/CMakeLists.txt

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# ########################################################################
22
# Copyright 2013 Advanced Micro Devices, Inc.
3-
#
3+
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
66
# You may obtain a copy of the License at
7-
#
7+
#
88
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
9+
#
1010
# Unless required by applicable law or agreed to in writing, software
1111
# distributed under the License is distributed on an "AS IS" BASIS,
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,6 +19,8 @@ set(SSYMV_SAMPLE_SRC example_ssymv.c)
1919
set(SGEMM_SAMPLE_SRC example_sgemm.c)
2020
set(STRMM_SAMPLE_SRC example_strmm.c)
2121
set(STRSM_SAMPLE_SRC example_strsm.c)
22+
set(STRSM_SAMPLE_SRCPP example_strsm.cpp)
23+
set(CTRSM_SAMPLE_SRC example_ctrsm.c)
2224
set(SSYRK_SAMPLE_SRC example_ssyrk.c)
2325
set(SSYR2K_SAMPLE_SRC example_ssyr2k.c)
2426

@@ -91,6 +93,14 @@ add_executable(example_strsm ${STRSM_SAMPLE_SRC})
9193
target_link_libraries(example_strsm ${OPENCL_LIBRARIES} clBLAS)
9294
set_property( TARGET example_strsm PROPERTY FOLDER "Samples")
9395

96+
add_executable(example_strsm_cpp ${STRSM_SAMPLE_SRCPP})
97+
target_link_libraries(example_strsm_cpp ${OPENCL_LIBRARIES} clBLAS)
98+
set_property( TARGET example_strsm_cpp PROPERTY FOLDER "Samples")
99+
100+
add_executable(example_ctrsm ${CTRSM_SAMPLE_SRC})
101+
target_link_libraries(example_ctrsm ${OPENCL_LIBRARIES} clBLAS)
102+
set_property( TARGET example_ctrsm PROPERTY FOLDER "Samples")
103+
94104
add_executable(example_ssyrk ${SSYRK_SAMPLE_SRC})
95105
target_link_libraries(example_ssyrk ${OPENCL_LIBRARIES} clBLAS)
96106
set_property( TARGET example_ssyrk PROPERTY FOLDER "Samples")
@@ -267,11 +277,11 @@ else( )
267277
set( CLBLAS_EXAMPLE_INSTALL_DESTINATION share/clBLAS/samples)
268278
endif()
269279
install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
270-
example_ssyr2k example_strmm example_strsm
271-
example_strmv example_strsv example_sger example_cher example_ssyr
280+
example_ssyr2k example_strmm example_strsm
281+
example_strmv example_strsv example_sger example_cher example_ssyr
272282
example_ssyr2 example_cherk example_ssymm example_chemm
273283
example_stpmv example_chpmv example_stpsv example_sspmv example_sspr example_chpr
274-
example_sspr2 example_zhpr2
284+
example_sspr2 example_zhpr2
275285
example_sgbmv example_stbmv example_ssbmv example_chbmv example_stbsv
276286
example_cher2k
277287
example_sswap example_sscal example_csscal example_scopy example_saxpy example_sdot
@@ -286,28 +296,28 @@ install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
286296

287297
configure_file( "${PROJECT_SOURCE_DIR}/samples/CMakeLists.pack"
288298
"${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY )
289-
299+
290300
if( WIN32 )
291301
set( CLBLAS_SAMPLE_INSTALL_DESTINATION samples)
292302
else( )
293303
set( CLBLAS_SAMPLE_INSTALL_DESTINATION share/clBLAS/samples/src)
294304
endif()
295-
305+
296306
install(FILES
297307
example_sgemv.c
298-
example_ssymv.c
308+
example_ssymv.c
299309
example_sgemm.c
300310
example_strmm.c
301311
example_strsm.c
302312
example_ssyrk.c
303313
example_ssyr2k.c
304314
example_strmv.c
305315
example_strsv.c
306-
example_sger.c
307-
example_ssyr.c
316+
example_sger.c
317+
example_ssyr.c
308318
example_ssyr2.c
309319
example_ssymm.c
310-
example_cher.c
320+
example_cher.c
311321
example_chemm.cpp
312322
example_cherk.cpp
313323
example_ssymm.c

src/samples/example_ctrsm.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
/* ************************************************************************
2+
* Copyright 2013 Advanced Micro Devices, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
* ************************************************************************/
16+
17+
18+
#include <sys/types.h>
19+
#include <stdio.h>
20+
#include <string.h>
21+
22+
/* Include CLBLAS header. It automatically includes needed OpenCL header,
23+
* so we can drop out explicit inclusion of cl.h header.
24+
*/
25+
#include <clBLAS.h>
26+
27+
/* This example uses predefined matrices and their characteristics for
28+
* simplicity purpose.
29+
*/
30+
static const clblasOrder order = clblasRowMajor;
31+
static const clblasSide side = clblasLeft;
32+
33+
static const size_t M = 4;
34+
static const size_t N = 5;
35+
36+
static const FloatComplex alpha = { 10, 0 };
37+
38+
static const clblasTranspose transA = clblasNoTrans;
39+
static const clblasUplo uploA = clblasUpper;
40+
static const clblasDiag diagA = clblasNonUnit;
41+
static const FloatComplex A[] = {
42+
{ 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },
43+
{ 0, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },
44+
{ 0, 0 },{ 0, 0 },{ 33, 0 },{ 34, 0 },
45+
{ 0, 0 },{ 0, 0 },{ 0, 0 },{ 44, 0 }
46+
};
47+
static const size_t lda = 4; /* i.e. lda = M */
48+
49+
static FloatComplex B[] = {
50+
{ 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },{ 15, 0 },
51+
{ 21, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },{ 25, 0 },
52+
{ 31, 0 },{ 32, 0 },{ 33, 0 },{ 34, 0 },{ 35, 0 },
53+
{ 41, 0 },{ 42, 0 },{ 43, 0 },{ 44, 0 },{ 45, 0 },
54+
};
55+
static const size_t ldb = 5; /* i.e. ldb = N */
56+
57+
58+
static FloatComplex result[20]; /* ldb*M */
59+
60+
static const size_t off = 1;
61+
static const size_t offA = 4 + 1; /* M + off */
62+
static const size_t offB = 5 + 1; /* N + off */
63+
64+
static void
65+
printResult(const char* str)
66+
{
67+
size_t i, j, nrows;
68+
69+
printf("%s:\n", str);
70+
71+
nrows = (sizeof(result) / sizeof(FloatComplex)) / ldb;
72+
for (i = 0; i < nrows; i++) {
73+
for (j = 0; j < ldb; j++) {
74+
printf("%.5f ", result[i * ldb + j].x);
75+
}
76+
printf("\n");
77+
}
78+
}
79+
80+
int
81+
main(void)
82+
{
83+
cl_int err;
84+
cl_platform_id platform[] = { 0, 0 };
85+
cl_device_id device = 0;
86+
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
87+
cl_context ctx = 0;
88+
cl_command_queue queue = 0;
89+
cl_mem bufA, bufB;
90+
cl_event event = NULL;
91+
int ret = 0;
92+
93+
/* Setup OpenCL environment. */
94+
err = clGetPlatformIDs(sizeof( platform ), &platform, NULL);
95+
if (err != CL_SUCCESS) {
96+
printf( "clGetPlatformIDs() failed with %d\n", err );
97+
return 1;
98+
}
99+
100+
err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
101+
if (err != CL_SUCCESS) {
102+
printf( "clGetDeviceIDs() failed with %d\n", err );
103+
return 1;
104+
}
105+
106+
props[1] = (cl_context_properties)platform;
107+
ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
108+
if (err != CL_SUCCESS) {
109+
printf( "clCreateContext() failed with %d\n", err );
110+
return 1;
111+
}
112+
113+
queue = clCreateCommandQueue(ctx, device, 0, &err);
114+
if (err != CL_SUCCESS) {
115+
printf( "clCreateCommandQueue() failed with %d\n", err );
116+
clReleaseContext(ctx);
117+
return 1;
118+
}
119+
120+
/* Setup clblas. */
121+
err = clblasSetup();
122+
if (err != CL_SUCCESS) {
123+
printf("clblasSetup() failed with %d\n", err);
124+
clReleaseCommandQueue(queue);
125+
clReleaseContext(ctx);
126+
return 1;
127+
}
128+
129+
/* Prepare OpenCL memory objects and place matrices inside them. */
130+
bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, M * M * sizeof(*A),
131+
NULL, &err);
132+
bufB = clCreateBuffer(ctx, CL_MEM_READ_WRITE, M * N * sizeof(*B),
133+
NULL, &err);
134+
135+
err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
136+
M * M * sizeof(*A), A, 0, NULL, NULL);
137+
err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
138+
M * N * sizeof(*B), B, 0, NULL, NULL);
139+
140+
/* Call clblas function. Perform TRSM for the lower right sub-matrices */
141+
err = clblasCtrsm(order, side, uploA, transA, diagA, M - off, N - off,
142+
alpha, bufA, offA, lda, bufB, offB, ldb, 1, &queue, 0,
143+
NULL, &event);
144+
if (err != CL_SUCCESS) {
145+
printf("clblasStrsmEx() failed with %d\n", err);
146+
ret = 1;
147+
}
148+
else {
149+
/* Wait for calculations to be finished. */
150+
err = clWaitForEvents(1, &event);
151+
152+
/* Fetch results of calculations from GPU memory. */
153+
err = clEnqueueReadBuffer(queue, bufB, CL_TRUE, 0,
154+
M * N * sizeof(*result),
155+
result, 0, NULL, NULL);
156+
157+
/* At this point you will get the result of STRSM placed in 'result' array. */
158+
puts("");
159+
printResult("clblasCtrsmEx result");
160+
}
161+
162+
/* Release OpenCL events. */
163+
clReleaseEvent(event);
164+
165+
/* Release OpenCL memory objects. */
166+
clReleaseMemObject(bufB);
167+
clReleaseMemObject(bufA);
168+
169+
/* Finalize work with clblas. */
170+
clblasTeardown();
171+
172+
/* Release OpenCL working objects. */
173+
clReleaseCommandQueue(queue);
174+
clReleaseContext(ctx);
175+
176+
return ret;
177+
}

0 commit comments

Comments
 (0)