1414namespace TutorialApp
1515{
1616 /// <summary>
17- /// Demonstrates the use of GPU and CPU parallel processing for neighbor summation in 2D arrays.
17+ /// Demonstrates the use of GPU and CPU parallel processing for neighbor summation in six 2D arrays
18+ /// using different GPU allocation modes and measuring performance.
19+ /// GPU processing uses the default stream on particular GPU accelerator.
1820 /// </summary>
19- /// <remarks>The <see cref="AllocatorExample"/> class initializes large and small 2D arrays of random
20- /// float values and provides functionality to compute the sum of neighboring elements for each element in the
21- /// arrays. It leverages both CPU and GPU resources for parallel processing, depending on the current mode of the
22- /// <see cref="GPUAllocator"/> singleton. The class also demonstrates switching between different GPU allocation
23- /// modes and measuring performance.</remarks>
2421 public class AllocatorExample
2522 {
26- private readonly float [ , ] _big2DArrayOfFloats ;
27- private readonly float [ , ] _small2DArrayOfFloats ;
23+ private readonly float [ , ] _2DArrayOfFloats1 ;
24+ private readonly float [ , ] _2DArrayOfFloats2 ;
25+ private readonly float [ , ] _2DArrayOfFloats3 ;
26+ private readonly float [ , ] _2DArrayOfFloats4 ;
27+ private readonly float [ , ] _2DArrayOfFloats5 ;
28+ private readonly float [ , ] _2DArrayOfFloats6 ;
2829
2930 /// <summary>
3031 /// Initializes a new instance of the <see cref="AllocatorExample"/> class.
3132 /// </summary>
32- /// <remarks>This constructor initializes two 2D arrays
33- /// populated with random float values between 0.0 and 1.0.</remarks>
33+ /// <remarks>This constructor initializes six 2D arrays
34+ /// populated with random float values between 0.0 and 1.0.
35+ /// Constructor also reports available
36+ /// GPUs on host machine (for information only).</remarks>
3437 public AllocatorExample ( )
3538 {
3639 Console . Clear ( ) ;
3740 Random rand = new ( ) ;
38- _big2DArrayOfFloats = new float [ 1080 , 1920 ] ;
39- for ( int i = 0 ; i < _big2DArrayOfFloats . GetLength ( 0 ) ; i ++ )
41+ int height = 600 ;
42+ int width = 800 ;
43+ _2DArrayOfFloats1 = new float [ height , width ] ;
44+ _2DArrayOfFloats2 = new float [ height , width ] ;
45+ _2DArrayOfFloats3 = new float [ height , width ] ;
46+ _2DArrayOfFloats4 = new float [ height , width ] ;
47+ _2DArrayOfFloats5 = new float [ height , width ] ;
48+ _2DArrayOfFloats6 = new float [ height , width ] ;
49+ for ( int i = 0 ; i < height ; i ++ )
4050 {
41- for ( int j = 0 ; j < _big2DArrayOfFloats . GetLength ( 1 ) ; j ++ )
51+ for ( int j = 0 ; j < width ; j ++ )
4252 {
43- _big2DArrayOfFloats [ i , j ] = ( float ) rand . NextDouble ( ) ;
44- }
45- }
46- _small2DArrayOfFloats = new float [ 600 , 800 ] ;
47- for ( int i = 0 ; i < _small2DArrayOfFloats . GetLength ( 0 ) ; i ++ )
48- {
49- for ( int j = 0 ; j < _small2DArrayOfFloats . GetLength ( 1 ) ; j ++ )
50- {
51- _small2DArrayOfFloats [ i , j ] = ( float ) rand . NextDouble ( ) ;
53+ _2DArrayOfFloats1 [ i , j ] = ( float ) rand . NextDouble ( ) ;
54+ _2DArrayOfFloats2 [ i , j ] = ( float ) rand . NextDouble ( ) ;
55+ _2DArrayOfFloats3 [ i , j ] = ( float ) rand . NextDouble ( ) ;
56+ _2DArrayOfFloats4 [ i , j ] = ( float ) rand . NextDouble ( ) ;
57+ _2DArrayOfFloats5 [ i , j ] = ( float ) rand . NextDouble ( ) ;
58+ _2DArrayOfFloats6 [ i , j ] = ( float ) rand . NextDouble ( ) ;
5259 }
5360 }
5461 Console . WriteLine ( "Available GPUs" ) ;
@@ -66,7 +73,7 @@ public AllocatorExample()
6673 /// <remarks>This method processes the input array using either CPU or GPU resources, depending on
6774 /// availability. If a GPU is available, the computation is offloaded to the GPU for improved performance.
6875 /// Otherwise, the computation is performed on the CPU using parallel processing. The radius for neighbor
69- /// summation is fixed at 5. Neighboring elements are considered only if they fall within the bounds of the
76+ /// summation is fixed at 5. Neighboring elements are considered only if they fall within the bounds of the
7077 /// input array. The method is thread-safe and can be used in multi-threaded environments.</remarks>
7178 /// <param name="threadName">The name of the thread or task performing the operation, used for logging purposes.</param>
7279 /// <param name="input">A 2D array of floating-point numbers representing the input data. Must not be null.</param>
@@ -198,20 +205,11 @@ static void GPUWorkChung(ArrayView2D<float, Stride2D.DenseY> input, ArrayView2D<
198205
199206 /// <summary>
200207 /// Executes a sequence of six parallel operations, each performing a neighbor sum calculation on specified 2D
201- /// arrays of floating-point numbers. One operation is performed on a large array, while the other five
202- /// on small array.
208+ /// array of floating-point numbers.
203209 /// </summary>
204210 /// <remarks>This method utilizes <see cref="System.Threading.Tasks.Parallel.Invoke"/> to execute
205211 /// multiple neighbor sum calculations concurrently. The results of these calculations are returned as a tuple
206212 /// of six 2D arrays.</remarks>
207- /// <returns>A tuple containing six 2D arrays of floating-point numbers, where each array represents the result of a
208- /// neighbor sum calculation performed in parallel. The arrays are returned in the following order: <list
209- /// type="number"> <item><description>Result of the "T1 big" neighbor sum calculation.</description></item>
210- /// <item><description>Result of the "T2 small" neighbor sum calculation.</description></item>
211- /// <item><description>Result of the "T3 small" neighbor sum calculation.</description></item>
212- /// <item><description>Result of the "T4 small" neighbor sum calculation.</description></item>
213- /// <item><description>Result of the "T5 small" neighbor sum calculation.</description></item>
214- /// <item><description>Result of the "T6 small" neighbor sum calculation.</description></item> </list></returns>
215213 private ( float [ , ] , float [ , ] , float [ , ] , float [ , ] , float [ , ] , float [ , ] ) ExecuteParallelSequence ( )
216214 {
217215 Stopwatch sw = new ( ) ;
@@ -222,12 +220,12 @@ static void GPUWorkChung(ArrayView2D<float, Stride2D.DenseY> input, ArrayView2D<
222220 sw . Start ( ) ;
223221 // Execute the neighbor sum calculations in parallel
224222 Parallel . Invoke (
225- ( ) => { resultT1 = NeighborSum ( $ "T1 { _big2DArrayOfFloats . GetLength ( 0 ) } x { _big2DArrayOfFloats . GetLength ( 1 ) } ", _big2DArrayOfFloats ) ; } ,
226- ( ) => { resultT2 = NeighborSum ( $ "T2 { _small2DArrayOfFloats . GetLength ( 0 ) } x { _small2DArrayOfFloats . GetLength ( 1 ) } ", _small2DArrayOfFloats ) ; } ,
227- ( ) => { resultT3 = NeighborSum ( $ "T3 { _small2DArrayOfFloats . GetLength ( 0 ) } x { _small2DArrayOfFloats . GetLength ( 1 ) } ", _small2DArrayOfFloats ) ; } ,
228- ( ) => { resultT4 = NeighborSum ( $ "T4 { _small2DArrayOfFloats . GetLength ( 0 ) } x { _small2DArrayOfFloats . GetLength ( 1 ) } ", _small2DArrayOfFloats ) ; } ,
229- ( ) => { resultT5 = NeighborSum ( $ "T5 { _small2DArrayOfFloats . GetLength ( 0 ) } x { _small2DArrayOfFloats . GetLength ( 1 ) } ", _small2DArrayOfFloats ) ; } ,
230- ( ) => { resultT6 = NeighborSum ( $ "T6 { _small2DArrayOfFloats . GetLength ( 0 ) } x { _small2DArrayOfFloats . GetLength ( 1 ) } ", _small2DArrayOfFloats ) ; }
223+ ( ) => { resultT1 = NeighborSum ( $ "T1", _2DArrayOfFloats1 ) ; } ,
224+ ( ) => { resultT2 = NeighborSum ( $ "T2", _2DArrayOfFloats2 ) ; } ,
225+ ( ) => { resultT3 = NeighborSum ( $ "T3", _2DArrayOfFloats3 ) ; } ,
226+ ( ) => { resultT4 = NeighborSum ( $ "T4", _2DArrayOfFloats4 ) ; } ,
227+ ( ) => { resultT5 = NeighborSum ( $ "T5", _2DArrayOfFloats5 ) ; } ,
228+ ( ) => { resultT6 = NeighborSum ( $ "T6", _2DArrayOfFloats6 ) ; }
231229 ) ;
232230 // Stop measuring time
233231 sw . Stop ( ) ;
@@ -236,11 +234,11 @@ static void GPUWorkChung(ArrayView2D<float, Stride2D.DenseY> input, ArrayView2D<
236234 }
237235
238236 /// <summary>
239- /// Executes a series of parallel operations using different GPU allocation modes.
237+ /// Executes a series of parallel calculations using different GPU allocation modes.
240238 /// </summary>
241239 /// <remarks>This method sequentially sets the GPU allocation mode to various configurations
242- /// (NoAccelerator, MostPowerfulGPU, LeastPowerfulGPU, and Standard) and executes parallel operations for each
243- /// mode.</remarks>
240+ /// (NoAccelerator, MostPowerfulGPU, LeastPowerfulGPU and Standard) and executes parallel
241+ /// calculations for each mode.</remarks>
244242 public void Run ( )
245243 {
246244 //Set the GPU allocation mode to NoAccelerator to force CPU processing
0 commit comments