@@ -103,66 +103,10 @@ The kernel arguments are listed after the configuration parameters.
103103
104104.. code-block :: cpp
105105
106- #include <hip/hip_runtime.h>
107- #include <iostream>
108-
109- #define HIP_CHECK(expression) \
110- { \
111- const hipError_t err = expression; \
112- if(err != hipSuccess){ \
113- std::cerr << "HIP error: " << hipGetErrorString(err) \
114- << " at " << __LINE__ << "\n"; \
115- } \
116- }
117-
118- // Performs a simple initialization of an array with the thread's index variables.
119- // This function is only available in device code.
120- __device__ void init_array(float * const a, const unsigned int arraySize){
121- // globalIdx uniquely identifies a thread in a 1D launch configuration.
122- const int globalIdx = threadIdx.x + blockIdx.x * blockDim.x;
123- // Each thread initializes a single element of the array.
124- if(globalIdx < arraySize){
125- a[globalIdx] = globalIdx;
126- }
127- }
128-
129- // Rounds a value up to the next multiple.
130- // This function is available in host and device code.
131- __host__ __device__ constexpr int round_up_to_nearest_multiple(int number, int multiple){
132- return (number + multiple - 1)/multiple;
133- }
134-
135- __global__ void example_kernel(float * const a, const unsigned int N)
136- {
137- // Initialize array.
138- init_array(a, N);
139- // Perform additional work:
140- // - work with the array
141- // - use the array in a different kernel
142- // - ...
143- }
144-
145- int main()
146- {
147- constexpr int N = 100000000; // problem size
148- constexpr int blockSize = 256; //configurable block size
149-
150- //needed number of blocks for the given problem size
151- constexpr int gridSize = round_up_to_nearest_multiple(N, blockSize);
152-
153- float *a;
154- // allocate memory on the GPU
155- HIP_CHECK(hipMalloc(&a, sizeof(*a) * N));
156-
157- std::cout << "Launching kernel." << std::endl;
158- example_kernel<<<dim3(gridSize), dim3(blockSize), 0/*example doesn't use shared memory*/, 0/*default stream*/>>>(a, N);
159- // make sure kernel execution is finished by synchronizing. The CPU can also
160- // execute other instructions during that time
161- HIP_CHECK(hipDeviceSynchronize());
162- std::cout << "Kernel execution finished." << std::endl;
163-
164- HIP_CHECK(hipFree(a));
165- }
106+ .. literalinclude:: ../tools/example_codes/calling_global_functions.hip
107+ :start-after: // [sphinx-start]
108+ :end-before: // [sphinx-end]
109+ :language: cpp
166110
167111 Inline qualifiers
168112--------------------------------------------------------------------------------
@@ -321,28 +265,10 @@ launch has to specify the needed amount of ``extern`` shared memory in the launc
321265configuration. The statically allocated shared memory is allocated without this
322266parameter.
323267
324- .. code-block :: cpp
325-
326- #include <hip/hip_runtime.h>
327-
328- extern __shared__ int shared_array[];
329-
330- __global__ void kernel(){
331- // initialize shared memory
332- shared_array[threadIdx.x] = threadIdx.x;
333- // use shared memory - synchronize to make sure, that all threads of the
334- // block see all changes to shared memory
335- __syncthreads();
336- }
337-
338- int main(){
339- //shared memory in this case depends on the configurable block size
340- constexpr int blockSize = 256;
341- constexpr int sharedMemSize = blockSize * sizeof(int);
342- constexpr int gridSize = 2;
343-
344- kernel<<<dim3(gridSize), dim3(blockSize), sharedMemSize, 0>>>();
345- }
268+ .. literalinclude :: ../tools/example_codes/extern_shared_memory.hip
269+ :start-after: // [sphinx-start]
270+ :end-before: // [sphinx-end]
271+ :language: cpp
346272
347273__managed__
348274--------------------------------------------------------------------------------
@@ -735,22 +661,18 @@ with the actual frequency.
735661
736662The difference between the returned values represents the cycles used.
737663
738- .. code-block :: cpp
739-
740- __global void kernel(){
741- long long int start = clock64();
742- // kernel code
743- long long int stop = clock64();
744- long long int cycles = stop - start;
745- }
664+ .. literalinclude :: ../tools/example_codes/timer.hip
665+ :start-after: // [sphinx-kernel-start]
666+ :end-before: // [sphinx-kernel-end]
667+ :language: cpp
746668
747669``long long int wall_clock64() `` returns the wall clock time on the device, with a constant, fixed frequency.
748670The frequency is device dependent and can be queried using:
749671
750- .. code-block :: cpp
751-
752- int wallClkRate = 0; //in kilohertz
753- hipDeviceGetAttribute(&wallClkRate, hipDeviceAttributeWallClockRate, deviceId);
672+ .. literalinclude :: ../tools/example_codes/timer.hip
673+ :start-after: // [sphinx-query-start]
674+ :end-before: // [sphinx-query-end]
675+ :language: cpp
754676
755677.. _atomic functions :
756678
0 commit comments