Skip to content

Commit 4187aed

Browse files
[Libraries/MPI] Host sample demonstrating notifications usage
1 parent 1bdf0ee commit 4187aed

File tree

2 files changed

+158
-1
lines changed

2 files changed

+158
-1
lines changed

Libraries/MPI/jacobian_solver/src/04_jacobian_device_mpi_one-sided_device_initiated_notify/GNUmakefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ CXXFLAGS = -fsycl -Wall -Wformat-security -Werror=format-security
55
# Use icx from DPC++ oneAPI toolkit to compile. Please source DPCPP's vars.sh before compilation.
66
CC = mpiicx
77
CXX = mpiicpx
8-
example = mpi3_onesided_jacobian_gpu_omp_device_initiated_notify mpi3_onesided_jacobian_gpu_sycl_device_initiated_notify
8+
example = mpi3_onesided_jacobian_gpu_omp_device_initiated_notify mpi3_onesided_jacobian_gpu_sycl_device_initiated_notify mpi3_onesided_jacobian_cpu_notify
99

1010
all: CFLAGS += -O2
1111
all: CXXFLAGS += -O2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*==============================================================
2+
* Copyright © 2023 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: MIT
5+
* ============================================================= */
6+
7+
/* Distributed Jacobian computation sample using CPU computations and MPI-3 one-sided communication.
8+
* This sample also demonstrates notified RMA operations usage.
9+
*/
10+
11+
#include "../include/common.h"
12+
#ifndef MPI_ERR_INVALID_NOTIFICATION
13+
/*For Intel MPI 2021.13/14 we have to use API compatibility layer*/
14+
#include "mpix_compat.h"
15+
#endif
16+
17+
#include <math.h>
18+
#include <stdio.h>
19+
#include <stdlib.h>
20+
#include <string.h>
21+
22+
int main(int argc, char *argv[])
23+
{
24+
double t_start;
25+
struct subarray my_subarray = { };
26+
/* Here we uses double buffering to allow overlap of compute and communication phase.
27+
* Odd iterations use buffs[0] as input and buffs[1] as output and vice versa.
28+
* Same scheme is used for MPI_Win objects.
29+
*/
30+
double *buffs[2] = { NULL, NULL };
31+
MPI_Win win[2] = { MPI_WIN_NULL, MPI_WIN_NULL };
32+
33+
/* Initialization of runtime and initial state of data */
34+
MPI_Init(&argc, &argv);
35+
36+
/* Initialize subarray owned by current process
37+
* and create RMA-windows for MPI-3 one-sided communications.
38+
* - For this sample, we use GPU memory for buffers and windows.
39+
* - Sample uses MPI_Win_lock* for synchronization.
40+
*/
41+
InitSubarryAndWindows(&my_subarray, buffs, win, "host", true);
42+
43+
/* Enable notification counters */
44+
MPI_Win_notify_set_num(win[0], MPI_INFO_NULL, 1);
45+
MPI_Win_notify_set_num(win[1], MPI_INFO_NULL, 1);
46+
/* Start RMA exposure epoch */
47+
MPI_Win_lock_all(0, win[0]);
48+
MPI_Win_lock_all(0, win[1]);
49+
50+
const int row_size = ROW_SIZE(my_subarray);
51+
/* Amount of iterations to perform between norm calculations */
52+
const int iterations_batch = (NormIteration <= 0) ? Niter : NormIteration;
53+
/* Aux variables used to let OMP capture pointers */
54+
double *b1 = buffs[0], *b2 = buffs[1];
55+
/* iter_counter_step defines a notification counter step per iteration */
56+
const MPI_Count iter_counter_step =
57+
((my_subarray.up_neighbour != MPI_PROC_NULL) ? 1 : 0) +
58+
((my_subarray.dn_neighbour != MPI_PROC_NULL) ? 1 : 0);
59+
60+
/* Timestamp start time to measure overall execution time */
61+
BEGIN_PROFILING
62+
for (int passed_iters = 0; passed_iters < Niter; passed_iters += iterations_batch) {
63+
/* Perfrom a batch of iterations before checking norm */
64+
for (int k = 0; k < iterations_batch; ++k)
65+
{
66+
int i = passed_iters + k;
67+
MPI_Win prev_win = win[i % 2];
68+
MPI_Win current_win = win[(i + 1) % 2];
69+
double *in = buffs[i % 2];
70+
double *out = buffs[(1 + i) % 2];
71+
72+
/* Wait for notification counter to reach the expected value:
73+
* here we check that communication operations issued by peers on the previous iteration are completed
74+
* and data is ready for the next iteration.
75+
*
76+
* NOTE:
77+
* To be completely standard compliant, application should check memory model
78+
* and call MPI_Win_sync(prev_win) in case of MPI_WIN_SEPARATE mode after notification has been recieved.
79+
* Although, IntelMPI uses MPI_WIN_UNIFIED memory model, so this call could be omitted.
80+
*/
81+
MPI_Count c = 0;
82+
MPI_Win_flush_local_all(current_win);
83+
while (c < (iter_counter_step*i)) {
84+
MPI_Win_notify_get_value(prev_win, 0, &c);
85+
}
86+
87+
/* Calculate values on borders to initiate communications early */
88+
for (int column = 0; column < my_subarray.x_size; column ++) {
89+
RECALCULATE_POINT(out, in, column, 0, row_size);
90+
RECALCULATE_POINT(out, in, column, my_subarray.y_size - 1, row_size);
91+
}
92+
93+
/* Perform 1D halo-exchange with neighbours.
94+
* Here we uses extention primitives which allows to notify remote process about data readiness.
95+
* This approach allows us to relax syncronization requirement between origin and target processes.
96+
*
97+
* This code is executed outside of parallel section, but still on the device.
98+
* It is possible to use MPI_Put_notify in parallel region, which may have better performance for
99+
* scale-up cases, but would have additional overhead for scale-out cases.
100+
* Also, in this case iter_counter_step should be adjusted.
101+
*/
102+
if (my_subarray.up_neighbour != MPI_PROC_NULL) {
103+
int idx = XY_2_IDX(0, 0, row_size);
104+
MPI_Put_notify(&out[idx], my_subarray.x_size, MPI_DOUBLE,
105+
my_subarray.up_neighbour, my_subarray.l_nbh_offt,
106+
my_subarray.x_size, MPI_DOUBLE, 0, current_win);
107+
}
108+
109+
if (my_subarray.dn_neighbour != MPI_PROC_NULL) {
110+
int idx = XY_2_IDX(0, my_subarray.y_size - 1, row_size);
111+
MPI_Put_notify(&out[idx], my_subarray.x_size, MPI_DOUBLE,
112+
my_subarray.dn_neighbour, 1,
113+
my_subarray.x_size, MPI_DOUBLE, 0, current_win);
114+
}
115+
116+
/* Recalculate internal points in parallel with communication */
117+
for (int row = 1; row < my_subarray.y_size - 1; ++row) {
118+
for (int column = 0; column < my_subarray.x_size; ++column) {
119+
RECALCULATE_POINT(out, in, column, row, row_size);
120+
}
121+
}
122+
}
123+
124+
/* Calculate norm value after given number of iterations */
125+
if (NormIteration > 0) {
126+
double result_norm = 0.0;
127+
double norm = 0.0;
128+
129+
for (int row = 0; row < my_subarray.y_size; ++row) {
130+
for (int column = 0; column < my_subarray.x_size; ++column) {
131+
int idx = XY_2_IDX(column, row, row_size);
132+
double diff = b1[idx] - b2[idx];
133+
norm += diff*diff;
134+
}
135+
}
136+
MPI_Reduce(&norm, &result_norm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
137+
if (my_subarray.rank == 0) {
138+
printf("NORM value on iteration %d: %f\n", passed_iters+iterations_batch, sqrt(result_norm));
139+
}
140+
}
141+
}
142+
/* Timestamp end time to measure overall execution time and report average compute time */
143+
END_PROFILING
144+
145+
/* Close RMA exposure epoch and free resources */
146+
MPI_Win_unlock_all(win[1]);
147+
MPI_Win_unlock_all(win[0]);
148+
MPI_Win_free(&win[1]);
149+
MPI_Win_free(&win[0]);
150+
151+
if (my_subarray.rank == 0) {
152+
printf("SUCCESS\n");
153+
}
154+
MPI_Finalize();
155+
156+
return 0;
157+
}

0 commit comments

Comments
 (0)