|
276 | 276 | <div class="line"><a id="l00171" name="l00171"></a><span class="lineno"> 171</span> </div> |
277 | 277 | <div class="line"><a id="l00172" name="l00172"></a><span class="lineno"> 172</span> <a class="code hl_function" href="namespaceamrex.html#a0a525af3444a2814c6dbda9e71a965a5">amrex::ignore_unused</a>(nnz);</div> |
278 | 278 | <div class="line"><a id="l00173" name="l00173"></a><span class="lineno"> 173</span> mkl::sparse::matrix_handle_t handle{};</div> |
279 | | -<div class="line"><a id="l00174" name="l00174"></a><span class="lineno"> 174</span> </div> |
280 | | -<div class="line"><a id="l00175" name="l00175"></a><span class="lineno"> 175</span><span class="preprocessor">#if defined(INTEL_MKL_VERSION) && (INTEL_MKL_VERSION < 20250300)</span></div> |
281 | | -<div class="line"><a id="l00176" name="l00176"></a><span class="lineno"> 176</span> mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols,</div> |
282 | | -<div class="line"><a id="l00177" name="l00177"></a><span class="lineno"> 177</span> mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);</div> |
283 | | -<div class="line"><a id="l00178" name="l00178"></a><span class="lineno"> 178</span><span class="preprocessor">#else</span></div> |
284 | | -<div class="line"><a id="l00179" name="l00179"></a><span class="lineno"> 179</span> mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols, nnz,</div> |
285 | | -<div class="line"><a id="l00180" name="l00180"></a><span class="lineno"> 180</span> mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);</div> |
286 | | -<div class="line"><a id="l00181" name="l00181"></a><span class="lineno"> 181</span><span class="preprocessor">#endif</span></div> |
287 | | -<div class="line"><a id="l00182" name="l00182"></a><span class="lineno"> 182</span> mkl::sparse::gemv(Gpu::Device::streamQueue(), mkl::transpose::nontrans,</div> |
288 | | -<div class="line"><a id="l00183" name="l00183"></a><span class="lineno"> 183</span> T(1), handle, px, T(0), py);</div> |
289 | | -<div class="line"><a id="l00184" name="l00184"></a><span class="lineno"> 184</span> </div> |
290 | | -<div class="line"><a id="l00185" name="l00185"></a><span class="lineno"> 185</span><span class="preprocessor">#endif</span></div> |
291 | | -<div class="line"><a id="l00186" name="l00186"></a><span class="lineno"> 186</span> </div> |
292 | | -<div class="line"><a id="l00187" name="l00187"></a><span class="lineno"> 187</span> <a class="code hl_define" href="AMReX__GpuError_8H.html#aff2d29ad26ba217734430c3d36f42dd1">AMREX_GPU_ERROR_CHECK</a>();</div> |
| 279 | +<div class="line"><a id="l00174" name="l00174"></a><span class="lineno"> 174</span> mkl::sparse::init_matrix_handle(&handle);</div> |
| 280 | +<div class="line"><a id="l00175" name="l00175"></a><span class="lineno"> 175</span> </div> |
| 281 | +<div class="line"><a id="l00176" name="l00176"></a><span class="lineno"> 176</span><span class="preprocessor">#if defined(INTEL_MKL_VERSION) && (INTEL_MKL_VERSION < 20250300)</span></div> |
| 282 | +<div class="line"><a id="l00177" name="l00177"></a><span class="lineno"> 177</span> mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols,</div> |
| 283 | +<div class="line"><a id="l00178" name="l00178"></a><span class="lineno"> 178</span> mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);</div> |
| 284 | +<div class="line"><a id="l00179" name="l00179"></a><span class="lineno"> 179</span><span class="preprocessor">#else</span></div> |
| 285 | +<div class="line"><a id="l00180" name="l00180"></a><span class="lineno"> 180</span> mkl::sparse::set_csr_data(Gpu::Device::streamQueue(), handle, nrows, ncols, nnz,</div> |
| 286 | +<div class="line"><a id="l00181" name="l00181"></a><span class="lineno"> 181</span> mkl::index_base::zero, (Long*)row, (Long*)col, (T*)mat);</div> |
| 287 | +<div class="line"><a id="l00182" name="l00182"></a><span class="lineno"> 182</span><span class="preprocessor">#endif</span></div> |
| 288 | +<div class="line"><a id="l00183" name="l00183"></a><span class="lineno"> 183</span> mkl::sparse::gemv(Gpu::Device::streamQueue(), mkl::transpose::nontrans,</div> |
| 289 | +<div class="line"><a id="l00184" name="l00184"></a><span class="lineno"> 184</span> T(1), handle, px, T(0), py);</div> |
| 290 | +<div class="line"><a id="l00185" name="l00185"></a><span class="lineno"> 185</span> </div> |
| 291 | +<div class="line"><a id="l00186" name="l00186"></a><span class="lineno"> 186</span> <span class="keyword">auto</span> ev = mkl::sparse::release_matrix_handle(Gpu::Device::streamQueue(), &handle);</div> |
| 292 | +<div class="line"><a id="l00187" name="l00187"></a><span class="lineno"> 187</span> ev.wait();</div> |
293 | 293 | <div class="line"><a id="l00188" name="l00188"></a><span class="lineno"> 188</span> </div> |
294 | | -<div class="line"><a id="l00189" name="l00189"></a><span class="lineno"> 189</span><span class="preprocessor">#else</span></div> |
| 294 | +<div class="line"><a id="l00189" name="l00189"></a><span class="lineno"> 189</span><span class="preprocessor">#endif</span></div> |
295 | 295 | <div class="line"><a id="l00190" name="l00190"></a><span class="lineno"> 190</span> </div> |
296 | | -<div class="line"><a id="l00191" name="l00191"></a><span class="lineno"> 191</span> Long <span class="keyword">const</span> ny = <a class="code hl_enumvalue" href="namespaceamrex.html#a8de829410ed15dbc56e4dafc9bc6ea69a415290769594460e2e485922904f345d">y</a>.numLocalRows();</div> |
297 | | -<div class="line"><a id="l00192" name="l00192"></a><span class="lineno"> 192</span> <span class="keywordflow">for</span> (Long i = 0; i < ny; ++i) {</div> |
298 | | -<div class="line"><a id="l00193" name="l00193"></a><span class="lineno"> 193</span> T <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a> = 0;</div> |
299 | | -<div class="line"><a id="l00194" name="l00194"></a><span class="lineno"> 194</span><span class="preprocessor">#ifdef AMREX_USE_OMP</span></div> |
300 | | -<div class="line"><a id="l00195" name="l00195"></a><span class="lineno"> 195</span><span class="preprocessor">#pragma omp parallel for reduction(+:r)</span></div> |
301 | | -<div class="line"><a id="l00196" name="l00196"></a><span class="lineno"> 196</span><span class="preprocessor">#endif</span></div> |
302 | | -<div class="line"><a id="l00197" name="l00197"></a><span class="lineno"> 197</span> <span class="keywordflow">for</span> (Long j = row[i]; j < row[i+1]; ++j) {</div> |
303 | | -<div class="line"><a id="l00198" name="l00198"></a><span class="lineno"> 198</span> <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a> += mat[j] * px[col[j]];</div> |
304 | | -<div class="line"><a id="l00199" name="l00199"></a><span class="lineno"> 199</span> }</div> |
305 | | -<div class="line"><a id="l00200" name="l00200"></a><span class="lineno"> 200</span> py[i] = <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a>;</div> |
306 | | -<div class="line"><a id="l00201" name="l00201"></a><span class="lineno"> 201</span> }</div> |
307 | | -<div class="line"><a id="l00202" name="l00202"></a><span class="lineno"> 202</span> </div> |
308 | | -<div class="line"><a id="l00203" name="l00203"></a><span class="lineno"> 203</span><span class="preprocessor">#endif</span></div> |
309 | | -<div class="line"><a id="l00204" name="l00204"></a><span class="lineno"> 204</span> </div> |
310 | | -<div class="line"><a id="l00205" name="l00205"></a><span class="lineno"> 205</span> <span class="keyword">const_cast<</span><a class="code hl_class" href="classamrex_1_1SpMatrix.html">SpMatrix<T></a>&<span class="keyword">></span>(A).finishComm(<a class="code hl_enumvalue" href="namespaceamrex.html#a8de829410ed15dbc56e4dafc9bc6ea69a415290769594460e2e485922904f345d">y</a>);</div> |
311 | | -<div class="line"><a id="l00206" name="l00206"></a><span class="lineno"> 206</span>}</div> |
| 296 | +<div class="line"><a id="l00191" name="l00191"></a><span class="lineno"> 191</span> <a class="code hl_define" href="AMReX__GpuError_8H.html#aff2d29ad26ba217734430c3d36f42dd1">AMREX_GPU_ERROR_CHECK</a>();</div> |
| 297 | +<div class="line"><a id="l00192" name="l00192"></a><span class="lineno"> 192</span> </div> |
| 298 | +<div class="line"><a id="l00193" name="l00193"></a><span class="lineno"> 193</span><span class="preprocessor">#else</span></div> |
| 299 | +<div class="line"><a id="l00194" name="l00194"></a><span class="lineno"> 194</span> </div> |
| 300 | +<div class="line"><a id="l00195" name="l00195"></a><span class="lineno"> 195</span> Long <span class="keyword">const</span> ny = <a class="code hl_enumvalue" href="namespaceamrex.html#a8de829410ed15dbc56e4dafc9bc6ea69a415290769594460e2e485922904f345d">y</a>.numLocalRows();</div> |
| 301 | +<div class="line"><a id="l00196" name="l00196"></a><span class="lineno"> 196</span> <span class="keywordflow">for</span> (Long i = 0; i < ny; ++i) {</div> |
| 302 | +<div class="line"><a id="l00197" name="l00197"></a><span class="lineno"> 197</span> T <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a> = 0;</div> |
| 303 | +<div class="line"><a id="l00198" name="l00198"></a><span class="lineno"> 198</span><span class="preprocessor">#ifdef AMREX_USE_OMP</span></div> |
| 304 | +<div class="line"><a id="l00199" name="l00199"></a><span class="lineno"> 199</span><span class="preprocessor">#pragma omp parallel for reduction(+:r)</span></div> |
| 305 | +<div class="line"><a id="l00200" name="l00200"></a><span class="lineno"> 200</span><span class="preprocessor">#endif</span></div> |
| 306 | +<div class="line"><a id="l00201" name="l00201"></a><span class="lineno"> 201</span> <span class="keywordflow">for</span> (Long j = row[i]; j < row[i+1]; ++j) {</div> |
| 307 | +<div class="line"><a id="l00202" name="l00202"></a><span class="lineno"> 202</span> <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a> += mat[j] * px[col[j]];</div> |
| 308 | +<div class="line"><a id="l00203" name="l00203"></a><span class="lineno"> 203</span> }</div> |
| 309 | +<div class="line"><a id="l00204" name="l00204"></a><span class="lineno"> 204</span> py[i] = <a class="code hl_enumvalue" href="namespaceamrex.html#a2b273526dd4d27047e455ab7a5a8dcd3a4b43b0aee35624cd95b910189b3dc231">r</a>;</div> |
| 310 | +<div class="line"><a id="l00205" name="l00205"></a><span class="lineno"> 205</span> }</div> |
| 311 | +<div class="line"><a id="l00206" name="l00206"></a><span class="lineno"> 206</span> </div> |
| 312 | +<div class="line"><a id="l00207" name="l00207"></a><span class="lineno"> 207</span><span class="preprocessor">#endif</span></div> |
| 313 | +<div class="line"><a id="l00208" name="l00208"></a><span class="lineno"> 208</span> </div> |
| 314 | +<div class="line"><a id="l00209" name="l00209"></a><span class="lineno"> 209</span> <span class="keyword">const_cast<</span><a class="code hl_class" href="classamrex_1_1SpMatrix.html">SpMatrix<T></a>&<span class="keyword">></span>(A).finishComm(<a class="code hl_enumvalue" href="namespaceamrex.html#a8de829410ed15dbc56e4dafc9bc6ea69a415290769594460e2e485922904f345d">y</a>);</div> |
| 315 | +<div class="line"><a id="l00210" name="l00210"></a><span class="lineno"> 210</span>}</div> |
312 | 316 | </div> |
313 | | -<div class="line"><a id="l00207" name="l00207"></a><span class="lineno"> 207</span> </div> |
314 | | -<div class="line"><a id="l00208" name="l00208"></a><span class="lineno"> 208</span>}</div> |
315 | | -<div class="line"><a id="l00209" name="l00209"></a><span class="lineno"> 209</span> </div> |
316 | | -<div class="line"><a id="l00210" name="l00210"></a><span class="lineno"> 210</span><span class="preprocessor">#endif</span></div> |
| 317 | +<div class="line"><a id="l00211" name="l00211"></a><span class="lineno"> 211</span> </div> |
| 318 | +<div class="line"><a id="l00212" name="l00212"></a><span class="lineno"> 212</span>}</div> |
| 319 | +<div class="line"><a id="l00213" name="l00213"></a><span class="lineno"> 213</span> </div> |
| 320 | +<div class="line"><a id="l00214" name="l00214"></a><span class="lineno"> 214</span><span class="preprocessor">#endif</span></div> |
317 | 321 | <div class="ttc" id="aAMReX__AlgVector_8H_html"><div class="ttname"><a href="AMReX__AlgVector_8H.html">AMReX_AlgVector.H</a></div></div> |
318 | 322 | <div class="ttc" id="aAMReX__BLassert_8H_html_abd471ebf6086189835e778f06c053833"><div class="ttname"><a href="AMReX__BLassert_8H.html#abd471ebf6086189835e778f06c053833">AMREX_ALWAYS_ASSERT</a></div><div class="ttdeci">#define AMREX_ALWAYS_ASSERT(EX)</div><div class="ttdef"><b>Definition</b> AMReX_BLassert.H:50</div></div> |
319 | 323 | <div class="ttc" id="aAMReX__Extension_8H_html_a97eafbdde3320b2e8981a1d1d3936f58"><div class="ttname"><a href="AMReX__Extension_8H.html#a97eafbdde3320b2e8981a1d1d3936f58">AMREX_RESTRICT</a></div><div class="ttdeci">#define AMREX_RESTRICT</div><div class="ttdef"><b>Definition</b> AMReX_Extension.H:37</div></div> |
|
0 commit comments