From 21a4d724a729e708bbbf3ae9d61e7587b846e527 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Wed, 7 May 2025 07:39:29 -0700 Subject: [PATCH 1/3] Use dynamically allocated arrays to overcome limits on static array sizes --- stream.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/stream.c b/stream.c index 9bbd6ce..e866efb 100644 --- a/stream.c +++ b/stream.c @@ -42,6 +42,7 @@ /*-----------------------------------------------------------------------*/ # include # include +# include # include # include # include @@ -91,7 +92,7 @@ * per array. */ #ifndef STREAM_ARRAY_SIZE -# define STREAM_ARRAY_SIZE 10000000 +# define STREAM_ARRAY_SIZE 10000000l #endif /* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result @@ -176,9 +177,9 @@ #define STREAM_TYPE double #endif -static STREAM_TYPE a[STREAM_ARRAY_SIZE+OFFSET], - b[STREAM_ARRAY_SIZE+OFFSET], - c[STREAM_ARRAY_SIZE+OFFSET]; +static STREAM_TYPE* a = NULL; +static STREAM_TYPE* b = NULL; +static STREAM_TYPE* c = NULL; static double avgtime[4] = {0}, maxtime[4] = {0}, mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; @@ -214,6 +215,10 @@ main() STREAM_TYPE scalar; double t, times[4][NTIMES]; + a= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE)); + b= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE)); + c= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE)); + /* --- SETUP --- determine precision and check timing --- */ printf(HLINE); From 20ed7fa60f4d1258c60f229a7bf3f28c21492bb6 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Wed, 7 May 2025 08:01:40 -0700 Subject: [PATCH 2/3] Add support for OMPGPU and OMPGPU_UNIFIED --- stream.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/stream.c b/stream.c index e866efb..85d0a41 100644 --- a/stream.c +++ b/stream.c @@ -40,6 +40,13 @@ /* program constitutes acceptance of these licensing restrictions. */ /* 5. Absolutely no warranty is expressed or implied. */ /*-----------------------------------------------------------------------*/ +#ifdef OMPGPU +#ifdef OMPGPU_UNIFIED +#pragma omp requires unified_address +#pragma omp requires unified_shared_memory +#endif +#endif + # include # include # include @@ -92,7 +99,7 @@ * per array. */ #ifndef STREAM_ARRAY_SIZE -# define STREAM_ARRAY_SIZE 10000000l +# define STREAM_ARRAY_SIZE 1000000000l #endif /* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result @@ -219,6 +226,13 @@ main() b= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE)); c= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE)); +#ifdef OMPGPU +#ifndef OMPGPU_UNIFIED +#pragma omp target enter data map(to:a[0:(STREAM_ARRAY_SIZE+OFFSET)]) +#pragma omp target enter data map(to:b[0:(STREAM_ARRAY_SIZE+OFFSET)]) +#pragma omp target enter data map(to:c[0:(STREAM_ARRAY_SIZE+OFFSET)]) +#endif +#endif /* --- SETUP --- determine precision and check timing --- */ printf(HLINE); @@ -250,6 +264,9 @@ main() #ifdef _OPENMP printf(HLINE); +#ifdef OMPGPU + printf ("Using accelerator\n"); +#else #pragma omp parallel { #pragma omp master @@ -259,17 +276,24 @@ main() } } #endif +#endif +#ifndef OMPGPU #ifdef _OPENMP k = 0; #pragma omp parallel #pragma omp atomic k++; printf ("Number of Threads counted = %i\n",k); +#endif #endif /* Get initial value for system clock. */ +#ifdef OMPGPU +#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)],b[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)]) +#else #pragma omp parallel for +#endif for (j=0; j Date: Wed, 7 May 2025 08:36:18 -0700 Subject: [PATCH 3/3] Revert change to array size The original change was a mistake --- stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stream.c b/stream.c index 85d0a41..4f0221a 100644 --- a/stream.c +++ b/stream.c @@ -99,7 +99,7 @@ * per array. */ #ifndef STREAM_ARRAY_SIZE -# define STREAM_ARRAY_SIZE 1000000000l +# define STREAM_ARRAY_SIZE 10000000l #endif /* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result