1+ /*
2+ * Copyright 2025 Huawei Technologies Co., Ltd.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+ #include < chrono>
18+ #include < hicr/core/instanceManager.hpp>
19+ #include < taskr/taskr.hpp>
20+
21+ #include " grid.hpp"
22+ #include " task.hpp"
23+
24+ void jacobi3d (HiCR::InstanceManager *instanceManager,
25+ taskr::Runtime &taskr,
26+ Grid *g,
27+ size_t gDepth = 1 ,
28+ size_t N = 128 ,
29+ ssize_t nIters = 100 ,
30+ D3 pt = D3({.x = 1 , .y = 1 , .z = 1 }),
31+ D3 lt = D3({.x = 1 , .y = 1 , .z = 1 }))
32+ {
33+ // Getting distributed instance information
34+ const auto instanceCount = instanceManager->getInstances ().size ();
35+ const auto myInstanceId = instanceManager->getCurrentInstance ()->getId ();
36+ const auto rootInstanceId = instanceManager->getRootInstanceId ();
37+ const auto isRootInstance = myInstanceId == rootInstanceId;
38+
39+ // Initializing the Grid
40+ bool success = g->initialize ();
41+ if (success == false ) instanceManager->abort (-1 );
42+
43+ // Creating grid processing functions
44+ g->resetFc = std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->reset (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k ); });
45+ g->computeFc =
46+ std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->compute (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
47+ g->receiveFc =
48+ std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->receive (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
49+ g->unpackFc = std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->unpack (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
50+ g->packFc = std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->pack (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
51+ g->sendFc = std::make_unique<taskr::Function>([&g](taskr::Task *task) { g->send (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
52+ g->localResidualFc = std::make_unique<taskr::Function>(
53+ [&g](taskr::Task *task) { g->calculateLocalResidual (task, ((Task *)task)->i , ((Task *)task)->j , ((Task *)task)->k , ((Task *)task)->iteration ); });
54+
55+ // Task map
56+ std::map<taskr::taskId_t, std::shared_ptr<taskr::Task>> _taskMap;
57+
58+ printf (" Instance %lu: Executing...\n " , myInstanceId);
59+
60+ // Creating tasks to reset the grid
61+ for (ssize_t i = 0 ; i < lt.x ; i++)
62+ for (ssize_t j = 0 ; j < lt.y ; j++)
63+ for (ssize_t k = 0 ; k < lt.z ; k++)
64+ {
65+ auto resetTask = new Task (" Reset" , i, j, k, 0 , g->resetFc .get ());
66+ taskr.addTask (resetTask);
67+ }
68+
69+ // Initializing TaskR
70+ taskr.initialize ();
71+
72+ // Running Taskr initially
73+ taskr.run ();
74+
75+ // Waiting for Taskr to finish
76+ taskr.await ();
77+
78+ // Creating and adding tasks (graph nodes)
79+ for (ssize_t it = 0 ; it < nIters; it++)
80+ for (ssize_t i = 0 ; i < lt.x ; i++)
81+ for (ssize_t j = 0 ; j < lt.y ; j++)
82+ for (ssize_t k = 0 ; k < lt.z ; k++)
83+ {
84+ auto localId = g->localSubGridMapping [k][j][i];
85+ auto &subGrid = g->subgrids [localId];
86+
87+ // create new specific tasks
88+ auto computeTask = std::make_shared<Task>(" Compute" , i, j, k, it, g->computeFc .get ());
89+ auto packTask = std::make_shared<Task>(" Pack" , i, j, k, it, g->packFc .get ());
90+ auto sendTask = std::make_shared<Task>(" Send" , i, j, k, it, g->sendFc .get ());
91+ auto recvTask = std::make_shared<Task>(" Receive" , i, j, k, it, g->receiveFc .get ());
92+ auto unpackTask = std::make_shared<Task>(" Unpack" , i, j, k, it, g->unpackFc .get ());
93+
94+ _taskMap[Task::encodeTaskName (" Compute" , i, j, k, it)] = computeTask;
95+ _taskMap[Task::encodeTaskName (" Pack" , i, j, k, it)] = packTask;
96+ _taskMap[Task::encodeTaskName (" Send" , i, j, k, it)] = sendTask;
97+ _taskMap[Task::encodeTaskName (" Receive" , i, j, k, it)] = recvTask;
98+ _taskMap[Task::encodeTaskName (" Unpack" , i, j, k, it)] = unpackTask;
99+
100+ // Creating and adding local compute task dependencies
101+ if (it > 0 )
102+ if (subGrid.X0 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i - 1 , j + 0 , k + 0 , it - 1 )].get ());
103+ if (it > 0 )
104+ if (subGrid.X1 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 1 , j + 0 , k + 0 , it - 1 )].get ());
105+ if (it > 0 )
106+ if (subGrid.Y0 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 0 , j - 1 , k + 0 , it - 1 )].get ());
107+ if (it > 0 )
108+ if (subGrid.Y1 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 0 , j + 1 , k + 0 , it - 1 )].get ());
109+ if (it > 0 )
110+ if (subGrid.Z0 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 0 , j + 0 , k - 1 , it - 1 )].get ());
111+ if (it > 0 )
112+ if (subGrid.Z1 .type == LOCAL) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 0 , j + 0 , k + 1 , it - 1 )].get ());
113+ if (it > 0 ) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i + 0 , j + 0 , k + 0 , it - 1 )].get ());
114+
115+ // Adding communication-related dependencies
116+ if (it > 0 ) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Pack" , i, j, k, it - 1 )].get ());
117+ if (it > 0 ) computeTask->addDependency (_taskMap[Task::encodeTaskName (" Unpack" , i, j, k, it - 1 )].get ());
118+
119+ // Creating and adding receive task dependencies, from iteration 1 onwards
120+ if (it > 0 ) recvTask->addDependency (_taskMap[Task::encodeTaskName (" Unpack" , i, j, k, it - 1 )].get ());
121+
122+ // Creating and adding unpack task dependencies
123+ unpackTask->addDependency (_taskMap[Task::encodeTaskName (" Receive" , i, j, k, it)].get ());
124+ unpackTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i, j, k, it)].get ());
125+
126+ // Creating and adding send task dependencies, from iteration 1 onwards
127+ packTask->addDependency (_taskMap[Task::encodeTaskName (" Compute" , i, j, k, it)].get ());
128+ if (it > 0 ) packTask->addDependency (_taskMap[Task::encodeTaskName (" Send" , i, j, k, it - 1 )].get ());
129+
130+ // Creating and adding send task dependencies, from iteration 1 onwards
131+ sendTask->addDependency (_taskMap[Task::encodeTaskName (" Pack" , i, j, k, it)].get ());
132+
133+ // Adding tasks to taskr
134+ taskr.addTask (computeTask.get ());
135+ if (it < nIters - 1 ) taskr.addTask (packTask.get ());
136+ if (it < nIters - 1 ) taskr.addTask (sendTask.get ());
137+ if (it < nIters - 1 ) taskr.addTask (recvTask.get ());
138+ if (it < nIters - 1 ) taskr.addTask (unpackTask.get ());
139+ }
140+
141+ // Setting start time as now
142+ auto t0 = std::chrono::high_resolution_clock::now ();
143+
144+ // Running Taskr
145+ taskr.run ();
146+
147+ // Waiting for Taskr to finish
148+ taskr.await ();
149+
150+ // //// Calculating residual
151+
152+ // Reset local residual to zero
153+ g->resetResidual ();
154+
155+ // Calculating local residual
156+ for (ssize_t i = 0 ; i < lt.x ; i++)
157+ for (ssize_t j = 0 ; j < lt.y ; j++)
158+ for (ssize_t k = 0 ; k < lt.z ; k++)
159+ {
160+ auto residualTask = new Task (" Residual" , i, j, k, nIters, g->localResidualFc .get ());
161+ taskr.addTask (residualTask);
162+ }
163+
164+ // Running Taskr
165+ taskr.run ();
166+
167+ // Waiting for Taskr to finish
168+ taskr.await ();
169+
170+ // Finalizing TaskR
171+ taskr.finalize ();
172+
173+ // If i'm not the root instance, simply send my locally calculated residual
174+ if (isRootInstance == false )
175+ {
176+ *(double *)g->residualSendBuffer ->getPointer () = g->_residual ;
177+ g->residualProducerChannel ->push (g->residualSendBuffer , 1 );
178+ }
179+ else
180+ {
181+ // Otherwise gather all the residuals and print the results
182+ double globalRes = g->_residual ;
183+
184+ for (size_t i = 0 ; i < instanceCount - 1 ; i++)
185+ {
186+ while (g->residualConsumerChannel ->isEmpty ());
187+ double *residualPtr = (double *)g->residualConsumerChannel ->getTokenBuffer ()->getSourceLocalMemorySlot ()->getPointer () + g->residualConsumerChannel ->peek (0 );
188+ g->residualConsumerChannel ->pop ();
189+ globalRes += *residualPtr;
190+ }
191+
192+ // Setting final time now
193+ auto tf = std::chrono::high_resolution_clock::now ();
194+ std::chrono::duration<float > dt = tf - t0;
195+ float execTime = dt.count ();
196+
197+ double residual = sqrt (globalRes / ((double )(N - 1 ) * (double )(N - 1 ) * (double )(N - 1 )));
198+ double gflops = nIters * (double )N * (double )N * (double )N * (2 + gDepth * 8 ) / (1.0e9 );
199+ printf (" %.4fs, %.3f GFlop/s (L2 Norm: %.10g)\n " , execTime, gflops / execTime, residual);
200+ }
201+
202+ // Finalizing grid
203+ g->finalize ();
204+ }
0 commit comments