@@ -707,30 +707,51 @@ subroutine preprocess(self, dist_b)
707707
708708 integer :: i
709709
710- ! start the hybrid algorithm
710+ ! Ref DOI: 10.1109/MCSE.2021.3130544
711+ ! Algorithm 3 in page 4
712+ ! First two lines first
711713 do i = 1 , 2
712714 self% dist_sa(i) = self% dist_sa(i)/ dist_b(i)
713715 self% dist_sc(i) = self% dist_sc(i)/ dist_b(i)
714716 self% dist_bw(i) = self% dist_sc(i)
715717 self% dist_af(i) = 1._dp / dist_b(i)
716718 end do
719+
720+ ! Then the remaining in the forward pass
717721 do i = 3 , self% n
722+ ! Algorithm 3 in ref obtains 'r' coeffs on the fly in line 7.
723+ ! As we have to solve many RHSs with the same tridiagonal system,
724+ ! it is better to do a preprocessing first.
725+ ! So lets store 'r' coeff in dist_fw array.
718726 self% dist_fw(i) = 1._dp / (dist_b(i) &
719727 - self% dist_sa(i)* self% dist_sc(i - 1 ))
728+ ! dist_af is 'a_i' in line 7 of Algorithm 3 in ref.
720729 self% dist_af(i) = self% dist_sa(i)
730+ ! We store a_i^* and c_i^* in dist_sa and dist_sc because
731+ ! we need them later in the substitution phase.
721732 self% dist_sa(i) = - self% dist_fw(i)* self% dist_sa(i) &
722733 * self% dist_sa(i - 1 )
723734 self% dist_sc(i) = self% dist_fw(i)* self% dist_sc(i)
724735 end do
736+
737+ ! backward pass starting in line 12 of Algorithm 3.
725738 do i = self% n - 2 , 2 , - 1
726739 self% dist_sa(i) = self% dist_sa(i) &
727740 - self% dist_sc(i)* self% dist_sa(i + 1 )
728741 self% dist_bw(i) = self% dist_sc(i)
729742 self% dist_sc(i) = - self% dist_sc(i)* self% dist_sc(i + 1 )
730743 end do
731- ! dist_fw(1) is never used, so store this extra factor instead
744+
745+ ! Line 17 and 18 are tricky
746+ ! First we have a new 'r', we need it.
747+ ! And for 'r' we need c_0^*...
748+ ! Now examine closely, c_0^* is set in line 4 and never overwritten!
749+ ! So we can use dist_sc(1) as is in place of c_0^*.
750+ ! We need to store this new 'r' somewhere ...
751+ ! dist_fw(1) is never used, so store this extra 'r' factor here instead
732752 self% dist_fw(1 ) = 1._dp / (1._dp - self% dist_sc(1 )* self% dist_sa(2 ))
733753
754+ ! Finally Line 19 and 20 in Algorithm 3 in ref.
734755 self% dist_sa(1 ) = self% dist_fw(1 )* self% dist_sa(1 )
735756 self% dist_sc(1 ) = - self% dist_fw(1 )* self% dist_sc(1 )* self% dist_sc(2 )
736757
0 commit comments