1- // SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
1+ // SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
22//
33// SPDX-License-Identifier: BSD-3-Clause
44
@@ -623,14 +623,18 @@ void initialize(std::shared_ptr<const DefaultExecutor> exec,
623623 const auto block_dim = default_block_size;
624624 constexpr auto block_size = default_block_size;
625625
626- initialize_kernel<block_size>
627- <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
628- b->get_size ()[0 ], b->get_size ()[1 ], krylov_dim,
629- as_device_type (b->get_const_values ()), b->get_stride (),
630- as_device_type (residual->get_values ()), residual->get_stride (),
631- as_device_type (givens_sin->get_values ()), givens_sin->get_stride (),
632- as_device_type (givens_cos->get_values ()), givens_cos->get_stride (),
633- as_device_type (stop_status->get_data ()));
626+ if (grid_dim != 0 ) {
627+ initialize_kernel<block_size>
628+ <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
629+ b->get_size ()[0 ], b->get_size ()[1 ], krylov_dim,
630+ as_device_type (b->get_const_values ()), b->get_stride (),
631+ as_device_type (residual->get_values ()), residual->get_stride (),
632+ as_device_type (givens_sin->get_values ()),
633+ givens_sin->get_stride (),
634+ as_device_type (givens_cos->get_values ()),
635+ givens_cos->get_stride (),
636+ as_device_type (stop_status->get_data ()));
637+ }
634638}
635639
636640GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE_BASE (
@@ -661,12 +665,14 @@ void restart(std::shared_ptr<const DefaultExecutor> exec,
661665 constexpr auto block_size = default_block_size;
662666 const auto stride_arnoldi = arnoldi_norm->get_stride ();
663667
664- restart_1_kernel<block_size>
665- <<<grid_dim_1, block_dim, 0 , exec->get_stream ()>>>(
666- residual->get_size ()[0 ], residual->get_size ()[1 ], krylov_dim,
667- acc::as_device_range (krylov_bases),
668- as_device_type (residual_norm_collection->get_values ()),
669- residual_norm_collection->get_stride ());
668+ if (grid_dim_1 != 0 ) {
669+ restart_1_kernel<block_size>
670+ <<<grid_dim_1, block_dim, 0 , exec->get_stream ()>>>(
671+ residual->get_size ()[0 ], residual->get_size ()[1 ], krylov_dim,
672+ acc::as_device_range (krylov_bases),
673+ as_device_type (residual_norm_collection->get_values ()),
674+ residual_norm_collection->get_stride ());
675+ }
670676 kernels::GKO_DEVICE_NAMESPACE::dense::compute_norm2_dispatch (
671677 exec, residual, residual_norm, reduction_tmp);
672678
@@ -695,21 +701,23 @@ void restart(std::shared_ptr<const DefaultExecutor> exec,
695701 2 * stride_arnoldi),
696702 stride_arnoldi, acc::as_device_range (krylov_bases));
697703 }
698-
699704 const auto grid_dim_2 =
700705 ceildiv (std::max<size_type>(num_rows, 1 ) * krylov_stride[1 ],
701706 default_block_size);
702- restart_2_kernel<block_size>
703- <<<grid_dim_2, block_dim, 0 , exec->get_stream ()>>>(
704- residual->get_size ()[0 ], residual->get_size ()[1 ],
705- as_device_type (residual->get_const_values ()),
706- residual->get_stride (),
707- as_device_type (residual_norm->get_const_values ()),
708- as_device_type (residual_norm_collection->get_values ()),
709- acc::as_device_range (krylov_bases),
710- as_device_type (next_krylov_basis->get_values ()),
711- next_krylov_basis->get_stride (),
712- as_device_type (final_iter_nums->get_data ()));
707+
708+ if (grid_dim_2 != 0 ) {
709+ restart_2_kernel<block_size>
710+ <<<grid_dim_2, block_dim, 0 , exec->get_stream ()>>>(
711+ residual->get_size ()[0 ], residual->get_size ()[1 ],
712+ as_device_type (residual->get_const_values ()),
713+ residual->get_stride (),
714+ as_device_type (residual_norm->get_const_values ()),
715+ as_device_type (residual_norm_collection->get_values ()),
716+ acc::as_device_range (krylov_bases),
717+ as_device_type (next_krylov_basis->get_values ()),
718+ next_krylov_basis->get_stride (),
719+ as_device_type (final_iter_nums->get_data ()));
720+ }
713721}
714722
715723GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE (GKO_DECLARE_CB_GMRES_RESTART_KERNEL);
@@ -919,18 +927,21 @@ void givens_rotation(std::shared_ptr<const DefaultExecutor> exec,
919927 const auto block_dim = block_size;
920928 const auto grid_dim =
921929 static_cast <unsigned int >(ceildiv (num_cols, block_size));
922-
923- givens_rotation_kernel<block_size>
924- <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
925- hessenberg_iter->get_size ()[0 ], hessenberg_iter->get_size ()[1 ],
926- iter, as_device_type (hessenberg_iter->get_values ()),
927- hessenberg_iter->get_stride (),
928- as_device_type (givens_sin->get_values ()), givens_sin->get_stride (),
929- as_device_type (givens_cos->get_values ()), givens_cos->get_stride (),
930- as_device_type (residual_norm->get_values ()),
931- as_device_type (residual_norm_collection->get_values ()),
932- residual_norm_collection->get_stride (),
933- stop_status->get_const_data ());
930+ if (grid_dim != 0 ) {
931+ givens_rotation_kernel<block_size>
932+ <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
933+ hessenberg_iter->get_size ()[0 ], hessenberg_iter->get_size ()[1 ],
934+ iter, as_device_type (hessenberg_iter->get_values ()),
935+ hessenberg_iter->get_stride (),
936+ as_device_type (givens_sin->get_values ()),
937+ givens_sin->get_stride (),
938+ as_device_type (givens_cos->get_values ()),
939+ givens_cos->get_stride (),
940+ as_device_type (residual_norm->get_values ()),
941+ as_device_type (residual_norm_collection->get_values ()),
942+ residual_norm_collection->get_stride (),
943+ stop_status->get_const_data ());
944+ }
934945}
935946
936947
@@ -949,12 +960,14 @@ void arnoldi(std::shared_ptr<const DefaultExecutor> exec,
949960 array<stopping_status>* reorth_status,
950961 array<size_type>* num_reorth)
951962{
952- increase_final_iteration_numbers_kernel<<<
953- static_cast <unsigned int >(
954- ceildiv (final_iter_nums->get_size (), default_block_size)),
955- default_block_size, 0 , exec->get_stream ()>>>(
956- as_device_type (final_iter_nums->get_data ()),
957- stop_status->get_const_data (), final_iter_nums->get_size ());
963+ if (final_iter_nums->get_size () != 0 ) {
964+ increase_final_iteration_numbers_kernel<<<
965+ static_cast <unsigned int >(
966+ ceildiv (final_iter_nums->get_size (), default_block_size)),
967+ default_block_size, 0 , exec->get_stream ()>>>(
968+ as_device_type (final_iter_nums->get_data ()),
969+ stop_status->get_const_data (), final_iter_nums->get_size ());
970+ }
958971 finish_arnoldi_CGS (exec, next_krylov_basis, krylov_bases, hessenberg_iter,
959972 buffer_iter, arnoldi_norm, iter,
960973 stop_status->get_const_data (), reorth_status->get_data (),
@@ -1007,14 +1020,15 @@ void calculate_qy(std::shared_ptr<const DefaultExecutor> exec,
10071020 const auto grid_dim = static_cast <unsigned int >(
10081021 ceildiv (num_rows * stride_before_preconditioner, block_size));
10091022 const auto block_dim = block_size;
1010-
1011- calculate_Qy_kernel<block_size>
1012- <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
1013- num_rows, num_cols, acc::as_device_range (krylov_bases),
1014- as_device_type (y->get_const_values ()), y->get_stride (),
1015- as_device_type (before_preconditioner->get_values ()),
1016- stride_before_preconditioner,
1017- as_device_type (final_iter_nums->get_const_data ()));
1023+ if (grid_dim != 0 ) {
1024+ calculate_Qy_kernel<block_size>
1025+ <<<grid_dim, block_dim, 0 , exec->get_stream ()>>>(
1026+ num_rows, num_cols, acc::as_device_range (krylov_bases),
1027+ as_device_type (y->get_const_values ()), y->get_stride (),
1028+ as_device_type (before_preconditioner->get_values ()),
1029+ stride_before_preconditioner,
1030+ as_device_type (final_iter_nums->get_const_data ()));
1031+ }
10181032 // Calculate qy
10191033 // before_preconditioner = krylov_bases * y
10201034}
0 commit comments