From e59235be1bc72e8dd4816a14e896d517bf59ee47 Mon Sep 17 00:00:00 2001 From: Gilles Grospellier Date: Wed, 18 Feb 2026 15:17:06 +0100 Subject: [PATCH] [femutils] Use accelerator API in 'CsrFormat::translateToLinearSystem()'. This will prevent some memory roundtrip between CPU and GPU. --- femutils/CMakeLists.txt | 2 +- femutils/CsrFormatMatrix.cc | 44 +++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/femutils/CMakeLists.txt b/femutils/CMakeLists.txt index 88081665..dd2f4a69 100644 --- a/femutils/CMakeLists.txt +++ b/femutils/CMakeLists.txt @@ -3,6 +3,7 @@ option(ENABLE_DEBUG_MATRIX "Enable Debug matrix instead of a sparse one" OFF) set(ACCELERATOR_SOURCES ArcaneFemFunctionsGpu.cc CsrDoFLinearSystemImpl.cc + CsrFormatMatrix.cc BSRFormat.cc ) @@ -27,7 +28,6 @@ add_library(FemUtils DoFLinearSystem.cc CooFormatMatrix.h CsrFormatMatrix.h - CsrFormatMatrix.cc CsrFormatMatrixView.h CsrFormatMatrixView.cc BSRFormat.h diff --git a/femutils/CsrFormatMatrix.cc b/femutils/CsrFormatMatrix.cc index 2e3c9eb2..18707868 100644 --- a/femutils/CsrFormatMatrix.cc +++ b/femutils/CsrFormatMatrix.cc @@ -17,6 +17,8 @@ #include #include +#include +#include #include "CsrFormatMatrix.h" #include "DoFLinearSystem.h" @@ -58,27 +60,45 @@ initialize(IItemFamily* dof_family, Int32 nnz, Int32 nbRow, RunQueue& queue) void CsrFormat:: translateToLinearSystem(DoFLinearSystem& linear_system, const RunQueue& queue) { - info() << "TranslateToLinearSystem this=" << this; bool do_set_csr = linear_system.hasSetCSRValues(); + info() << "TranslateToLinearSystem this=" << this << " is_csr=" << do_set_csr; + + const Int32 nb_row = m_matrix_row.dim1Size(); + const Int32 matrix_column_size = m_matrix_column.dim1Size(); + // When using CSR format, we need to know the number of non zero values for // each row. // NOTE: it should be possible to compute that in setCoordinates(). // and this value is constant if the structure of the matrix do not change // so we can store these values instead of recomputing them. if (do_set_csr) { - m_matrix_rows_nb_column.resize(m_matrix_row.extent0()); - //m_matrix_rows_nb_column.fill(0); + m_matrix_rows_nb_column.resize(nb_row); + auto command = makeCommand(queue); + auto out_matrix_rows_nb_column = viewOut(command, m_matrix_rows_nb_column); + auto in_matrix_rows = viewIn(command, m_matrix_row); + command << RUNCOMMAND_LOOP1(iter, nb_row) + { + auto [i] = iter(); + Int32 nb_column = 0; + if (((i + 1) < nb_row) && (in_matrix_rows(i) == in_matrix_rows(i + 1))) { + out_matrix_rows_nb_column[0]; + return; + } + for (Int32 j = in_matrix_rows(i); ((i + 1) < nb_row && j < in_matrix_rows(i + 1)) || ((i + 1) == nb_row && j < matrix_column_size); j++) { + ++nb_column; + } + out_matrix_rows_nb_column[i] = nb_column; + }; + CSRFormatView csr_view(view()); + linear_system.setCSRValues(csr_view); + return; } - Int32 nb_row = m_matrix_row.dim1Size(); + for (Int32 i = 0; i < nb_row; i++) { m_matrix_rows_nb_column[i] = 0; if (((i + 1) < nb_row) && (m_matrix_row(i) == m_matrix_row(i + 1))) continue; - for (Int32 j = m_matrix_row(i); ((i + 1) < nb_row && j < m_matrix_row(i + 1)) || ((i + 1) == nb_row && j < m_matrix_column.dim1Size()); j++) { - if (do_set_csr) { - ++m_matrix_rows_nb_column[i]; - continue; - } + for (Int32 j = m_matrix_row(i); ((i + 1) < nb_row && j < m_matrix_row(i + 1)) || ((i + 1) == nb_row && j < matrix_column_size); j++) { if (DoFLocalId(m_matrix_column(j)).isNull()) continue; //info() << "Add: (" << i << ", " << m_matrix_column(j) << " v=" << m_matrix_value(j); @@ -87,10 +107,12 @@ translateToLinearSystem(DoFLinearSystem& linear_system, const RunQueue& queue) } if (do_set_csr) { - CSRFormatView csr_view(view()); - linear_system.setCSRValues(csr_view); } } + +/*---------------------------------------------------------------------------*/ +/*---------------------------------------------------------------------------*/ + CsrFormatMatrixView CsrFormat:: view() {