Skip to content

Commit 4c765b2

Browse files
neon60Istvan Kiss
authored andcommitted
Include examples from rocm examples source
1 parent beb8beb commit 4c765b2

File tree

77 files changed

+6921
-2534
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+6921
-2534
lines changed

docs/how-to/hip_cpp_language_extensions.rst

Lines changed: 16 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -103,66 +103,10 @@ The kernel arguments are listed after the configuration parameters.
103103

104104
.. code-block:: cpp
105105
106-
#include <hip/hip_runtime.h>
107-
#include <iostream>
108-
109-
#define HIP_CHECK(expression) \
110-
{ \
111-
const hipError_t err = expression; \
112-
if(err != hipSuccess){ \
113-
std::cerr << "HIP error: " << hipGetErrorString(err) \
114-
<< " at " << __LINE__ << "\n"; \
115-
} \
116-
}
117-
118-
// Performs a simple initialization of an array with the thread's index variables.
119-
// This function is only available in device code.
120-
__device__ void init_array(float * const a, const unsigned int arraySize){
121-
// globalIdx uniquely identifies a thread in a 1D launch configuration.
122-
const int globalIdx = threadIdx.x + blockIdx.x * blockDim.x;
123-
// Each thread initializes a single element of the array.
124-
if(globalIdx < arraySize){
125-
a[globalIdx] = globalIdx;
126-
}
127-
}
128-
129-
// Rounds a value up to the next multiple.
130-
// This function is available in host and device code.
131-
__host__ __device__ constexpr int round_up_to_nearest_multiple(int number, int multiple){
132-
return (number + multiple - 1)/multiple;
133-
}
134-
135-
__global__ void example_kernel(float * const a, const unsigned int N)
136-
{
137-
// Initialize array.
138-
init_array(a, N);
139-
// Perform additional work:
140-
// - work with the array
141-
// - use the array in a different kernel
142-
// - ...
143-
}
144-
145-
int main()
146-
{
147-
constexpr int N = 100000000; // problem size
148-
constexpr int blockSize = 256; //configurable block size
149-
150-
//needed number of blocks for the given problem size
151-
constexpr int gridSize = round_up_to_nearest_multiple(N, blockSize);
152-
153-
float *a;
154-
// allocate memory on the GPU
155-
HIP_CHECK(hipMalloc(&a, sizeof(*a) * N));
156-
157-
std::cout << "Launching kernel." << std::endl;
158-
example_kernel<<<dim3(gridSize), dim3(blockSize), 0/*example doesn't use shared memory*/, 0/*default stream*/>>>(a, N);
159-
// make sure kernel execution is finished by synchronizing. The CPU can also
160-
// execute other instructions during that time
161-
HIP_CHECK(hipDeviceSynchronize());
162-
std::cout << "Kernel execution finished." << std::endl;
163-
164-
HIP_CHECK(hipFree(a));
165-
}
106+
.. literalinclude:: ../tools/example_codes/calling_global_functions.hip
107+
:start-after: // [sphinx-start]
108+
:end-before: // [sphinx-end]
109+
:language: cpp
166110
167111
Inline qualifiers
168112
--------------------------------------------------------------------------------
@@ -321,28 +265,10 @@ launch has to specify the needed amount of ``extern`` shared memory in the launc
321265
configuration. The statically allocated shared memory is allocated without this
322266
parameter.
323267

324-
.. code-block:: cpp
325-
326-
#include <hip/hip_runtime.h>
327-
328-
extern __shared__ int shared_array[];
329-
330-
__global__ void kernel(){
331-
// initialize shared memory
332-
shared_array[threadIdx.x] = threadIdx.x;
333-
// use shared memory - synchronize to make sure, that all threads of the
334-
// block see all changes to shared memory
335-
__syncthreads();
336-
}
337-
338-
int main(){
339-
//shared memory in this case depends on the configurable block size
340-
constexpr int blockSize = 256;
341-
constexpr int sharedMemSize = blockSize * sizeof(int);
342-
constexpr int gridSize = 2;
343-
344-
kernel<<<dim3(gridSize), dim3(blockSize), sharedMemSize, 0>>>();
345-
}
268+
.. literalinclude:: ../tools/example_codes/extern_shared_memory.hip
269+
:start-after: // [sphinx-start]
270+
:end-before: // [sphinx-end]
271+
:language: cpp
346272

347273
__managed__
348274
--------------------------------------------------------------------------------
@@ -735,22 +661,18 @@ with the actual frequency.
735661

736662
The difference between the returned values represents the cycles used.
737663

738-
.. code-block:: cpp
739-
740-
__global void kernel(){
741-
long long int start = clock64();
742-
// kernel code
743-
long long int stop = clock64();
744-
long long int cycles = stop - start;
745-
}
664+
.. literalinclude:: ../tools/example_codes/timer.hip
665+
:start-after: // [sphinx-kernel-start]
666+
:end-before: // [sphinx-kernel-end]
667+
:language: cpp
746668

747669
``long long int wall_clock64()`` returns the wall clock time on the device, with a constant, fixed frequency.
748670
The frequency is device dependent and can be queried using:
749671

750-
.. code-block:: cpp
751-
752-
int wallClkRate = 0; //in kilohertz
753-
hipDeviceGetAttribute(&wallClkRate, hipDeviceAttributeWallClockRate, deviceId);
672+
.. literalinclude:: ../tools/example_codes/timer.hip
673+
:start-after: // [sphinx-query-start]
674+
:end-before: // [sphinx-query-end]
675+
:language: cpp
754676

755677
.. _atomic functions:
756678

0 commit comments

Comments
 (0)