diff --git a/include/ConfigParser/config_parser.h b/include/ConfigParser/config_parser.h index 080d4490..1d5b9003 100644 --- a/include/ConfigParser/config_parser.h +++ b/include/ConfigParser/config_parser.h @@ -32,7 +32,6 @@ class ConfigParser int verbose() const; bool paraview() const; int maxOpenMPThreads() const; - double threadReductionFactor() const; bool DirBC_Interior() const; StencilDistributionMethod stencilDistributionMethod() const; bool cacheDensityProfileCoefficients() const; @@ -68,7 +67,6 @@ class ConfigParser bool paraview_; // Parallelization and threading settings int max_omp_threads_; - double thread_reduction_factor_; // Numerical method setup bool DirBC_Interior_; StencilDistributionMethod stencil_distribution_method_; diff --git a/include/GMGPolar/gmgpolar.h b/include/GMGPolar/gmgpolar.h index 0bc932a5..c2d90a29 100644 --- a/include/GMGPolar/gmgpolar.h +++ b/include/GMGPolar/gmgpolar.h @@ -52,16 +52,12 @@ class GMGPolar void paraview(bool paraview); /* ---------------------------------------------------------------------- */ - /* Parallelization & threading */ + /* Parallelization */ /* ---------------------------------------------------------------------- */ // Maximum number of OpenMP threads to use. int maxOpenMPThreads() const; void maxOpenMPThreads(int max_omp_threads); - // Thread reduction factor on coarser grids (e.g., 0.5 halves threads each level). - double threadReductionFactor() const; - void threadReductionFactor(double thread_reduction_factor); - /* ---------------------------------------------------------------------- */ /* Numerical method options */ /* ---------------------------------------------------------------------- */ @@ -208,7 +204,6 @@ class GMGPolar bool paraview_; // Parallelization and threading settings int max_omp_threads_; - double thread_reduction_factor_; // Numerical method setup bool DirBC_Interior_; StencilDistributionMethod stencil_distribution_method_; @@ -234,7 +229,6 @@ class GMGPolar /* Multigrid levels */ int number_of_levels_; std::vector levels_; - std::vector threads_per_level_; /* ---------------------- */ /* Interpolation operator */ diff --git a/scripts/comparison/run_gmgpolar_2.0.sh b/scripts/comparison/run_gmgpolar_2.0.sh index b107dbc2..8761dee0 100755 --- a/scripts/comparison/run_gmgpolar_2.0.sh +++ b/scripts/comparison/run_gmgpolar_2.0.sh @@ -12,8 +12,6 @@ paraview=0 # OpenMP settings: # Maximum number of threads OpenMP can use for parallel execution maxOpenMPThreads=32 -# Factor to reduce the number of threads OpenMP uses (e.g., 1.0 means no reduction) -threadReductionFactor=1.0 # Stencil distribution method: # 0 - CPU "Take": Each node independently applies the stencil @@ -138,4 +136,4 @@ else exit 1 fi -./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $maxOpenMPThreads --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance +./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $maxOpenMPThreads --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance diff --git a/scripts/performance_likwid/start_likwid_benchmark.sh b/scripts/performance_likwid/start_likwid_benchmark.sh index 4fbce7f5..2e85883f 100644 --- a/scripts/performance_likwid/start_likwid_benchmark.sh +++ b/scripts/performance_likwid/start_likwid_benchmark.sh @@ -36,8 +36,6 @@ paraview=0 # OpenMP settings: # Maximum number of threads OpenMP can use for parallel execution maxOpenMPThreads=$cores # Iterate over 1:2:maxOpenMPThreads -# Factor to reduce the number of threads OpenMP uses (e.g., 1.0 means no reduction) -threadReductionFactor=1.0 # Stencil distribution method: # 0 - CPU "Take": Each node independently applies the stencil @@ -172,8 +170,8 @@ echo "for m in \${core_list[@]}; do" >> run_COMPACT_FLOPS_DP_likwid.sh echo " let mminus1=m-1" >> run_COMPACT_FLOPS_DP_likwid.sh echo ' output_file="data/COMPACT_FLOPS_DP_${m}.txt"' >> run_COMPACT_FLOPS_DP_likwid.sh echo " # for testing that pin works correctly, potentially use likwid-pin beforehand" >> run_COMPACT_FLOPS_DP_likwid.sh -echo " # srun --cpus-per-task=$((cores)) likwid-pin -C E:N:\$m ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_FLOPS_DP_likwid.sh -echo " srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C E:N:\$m -g FLOPS_DP -o \$output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_FLOPS_DP_likwid.sh +echo " # srun --cpus-per-task=$((cores)) likwid-pin -C E:N:\$m ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_FLOPS_DP_likwid.sh +echo " srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C E:N:\$m -g FLOPS_DP -o \$output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_FLOPS_DP_likwid.sh echo "done;" >> run_COMPACT_FLOPS_DP_likwid.sh ### ----------------------------------------- ### @@ -205,8 +203,8 @@ echo "for m in \${core_list[@]}; do" >> run_COMPACT_MEM_DP_likwid.sh echo " let mminus1=m-1" >> run_COMPACT_MEM_DP_likwid.sh echo ' output_file="data/COMPACT_MEM_DP_${m}.txt"' >> run_COMPACT_MEM_DP_likwid.sh echo " # for testing that pin works correctly, potentially use likwid-pin beforehand" >> run_COMPACT_MEM_DP_likwid.sh -echo " # srun --cpus-per-task=$((cores)) likwid-pin -C E:N:\$m ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_MEM_DP_likwid.sh -echo " srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C E:N:\$m -g MEM_DP -o \$output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_MEM_DP_likwid.sh +echo " # srun --cpus-per-task=$((cores)) likwid-pin -C E:N:\$m ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_MEM_DP_likwid.sh +echo " srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C E:N:\$m -g MEM_DP -o \$output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads \$m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_COMPACT_MEM_DP_likwid.sh echo "done;" >> run_COMPACT_MEM_DP_likwid.sh ### --------------------------------------- ### @@ -250,7 +248,7 @@ for m in ${core_list[@]}; do core_set=$(IFS=,; echo "${list[*]}") output_file="data/SPREAD_FLOPS_DP_${m}.txt" - echo "srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C N:$core_set -g FLOPS_DP -o $output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_SPREAD_FLOPS_DP_likwid.sh + echo "srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C N:$core_set -g FLOPS_DP -o $output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_SPREAD_FLOPS_DP_likwid.sh done ### --------------------------------- ### @@ -293,7 +291,7 @@ for m in ${core_list[@]}; do core_set=$(IFS=,; echo "${list[*]}") output_file="data/SPREAD_MEM_DP_${m}.txt" - echo "srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C N:$core_set -g MEM_DP -o $output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $m --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_SPREAD_MEM_DP_likwid.sh + echo "srun --cpus-per-task=$((cores)) likwid-perfctr -f -m -C N:$core_set -g MEM_DP -o $output_file ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $m --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance" >> run_SPREAD_MEM_DP_likwid.sh done ### ------------------------------- ### diff --git a/scripts/tutorial/run.sh b/scripts/tutorial/run.sh index d05b1a70..debb018e 100755 --- a/scripts/tutorial/run.sh +++ b/scripts/tutorial/run.sh @@ -17,8 +17,6 @@ paraview=0 # OpenMP settings: # Maximum number of threads OpenMP can use for parallel execution maxOpenMPThreads=32 -# Factor to reduce the number of threads OpenMP uses (e.g., 1.0 means no reduction) -threadReductionFactor=1.0 # Stencil distribution method: # 0 - CPU "Take": Each node independently applies the stencil @@ -121,7 +119,6 @@ fi --verbose $verbose \ --paraview $paraview \ --maxOpenMPThreads $maxOpenMPThreads \ - --threadReductionFactor $threadReductionFactor \ --stencilDistributionMethod $stencilDistributionMethod \ --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients \ --cacheDomainGeometry $cacheDomainGeometry \ diff --git a/scripts/tutorial/sbatch.sh b/scripts/tutorial/sbatch.sh index a974c0b1..35777df9 100644 --- a/scripts/tutorial/sbatch.sh +++ b/scripts/tutorial/sbatch.sh @@ -22,8 +22,6 @@ paraview=0 # OpenMP settings: # Maximum number of threads OpenMP for parallel execution maxOpenMPThreads=20 -# Factor to reduce the number of threads OpenMP between levels (e.g., 1.0 means no reduction) -threadReductionFactor=1.0 # Implementation strategy: # 0 - CPU "Take": Each node independently applies the stencil @@ -122,4 +120,4 @@ else exit 1 fi -srun ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $maxOpenMPThreads --threadReductionFactor $threadReductionFactor --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance +srun ./../../build/gmgpolar --verbose $verbose --paraview $paraview --maxOpenMPThreads $maxOpenMPThreads --stencilDistributionMethod $stencilDistributionMethod --cacheDensityProfileCoefficients $cacheDensityProfileCoefficients --cacheDomainGeometry $cacheDomainGeometry --R0 $R0 --Rmax $Rmax --nr_exp $nr_exp --ntheta_exp $ntheta_exp --anisotropic_factor $anisotropic_factor --divideBy2 $divideBy2 --DirBC_Interior $DirBC_Interior --geometry $geometry --kappa_eps $kappa_eps --delta_e $delta_e --problem $problem --alpha_coeff $alpha_coeff --alpha_jump $alpha_jump --beta_coeff $beta_coeff --FMG $FMG --FMG_iterations $FMG_iterations --FMG_cycle $FMG_cycle --extrapolation $extrapolation --maxLevels $maxLevels --preSmoothingSteps $preSmoothingSteps --postSmoothingSteps $postSmoothingSteps --multigridCycle $multigridCycle --maxIterations $maxIterations --residualNormType $residualNormType --absoluteTolerance $absoluteTolerance --relativeTolerance $relativeTolerance diff --git a/src/ConfigParser/config_parser.cpp b/src/ConfigParser/config_parser.cpp index 52a5659f..5d0bce6a 100644 --- a/src/ConfigParser/config_parser.cpp +++ b/src/ConfigParser/config_parser.cpp @@ -6,7 +6,6 @@ ConfigParser::ConfigParser() parser_.add("verbose", '\0', "Verbosity level.", OPTIONAL, 1); parser_.add("paraview", '\0', "Generate ParaView output (0/1).", OPTIONAL, 0); parser_.add("maxOpenMPThreads", '\0', "Max OpenMP threads.", OPTIONAL, 1); - parser_.add("threadReductionFactor", '\0', "Thread reduction factor.", OPTIONAL, 1.0); parser_.add("DirBC_Interior", '\0', "Interior BC type (0=Across-origin, 1=Dirichlet).", OPTIONAL, 0, cmdline::oneof(0, 1)); parser_.add("stencilDistributionMethod", '\0', "Stencil distribution (0=CPU_Take,1=CPU_Give)", OPTIONAL, 0, @@ -72,12 +71,11 @@ bool ConfigParser::parse(int argc, char* argv[]) } // Parse general parameters from command-line arguments - verbose_ = parser_.get("verbose"); - paraview_ = parser_.get("paraview") != 0; - max_omp_threads_ = parser_.get("maxOpenMPThreads"); - thread_reduction_factor_ = parser_.get("threadReductionFactor"); - DirBC_Interior_ = parser_.get("DirBC_Interior") != 0; - const int methodValue = parser_.get("stencilDistributionMethod"); + verbose_ = parser_.get("verbose"); + paraview_ = parser_.get("paraview") != 0; + max_omp_threads_ = parser_.get("maxOpenMPThreads"); + DirBC_Interior_ = parser_.get("DirBC_Interior") != 0; + const int methodValue = parser_.get("stencilDistributionMethod"); if (methodValue == static_cast(StencilDistributionMethod::CPU_TAKE) || methodValue == static_cast(StencilDistributionMethod::CPU_GIVE)) { stencil_distribution_method_ = static_cast(methodValue); @@ -295,10 +293,6 @@ int ConfigParser::maxOpenMPThreads() const { return max_omp_threads_; } -double ConfigParser::threadReductionFactor() const -{ - return thread_reduction_factor_; -} bool ConfigParser::DirBC_Interior() const { diff --git a/src/GMGPolar/gmgpolar.cpp b/src/GMGPolar/gmgpolar.cpp index 78ab4b8b..6413ffde 100644 --- a/src/GMGPolar/gmgpolar.cpp +++ b/src/GMGPolar/gmgpolar.cpp @@ -14,7 +14,6 @@ GMGPolar::GMGPolar(const PolarGrid& grid, const DomainGeometry& domain_geometry, , paraview_(false) // Parallelization and threading settings , max_omp_threads_(omp_get_max_threads()) - , thread_reduction_factor_(1.0) // Numerical method setup , DirBC_Interior_(true) , stencil_distribution_method_(StencilDistributionMethod::CPU_GIVE) @@ -74,7 +73,7 @@ void GMGPolar::paraview(bool paraview) } /* ---------------------------------------------------------------------- */ -/* Parallelization & threading */ +/* Parallelization */ /* ---------------------------------------------------------------------- */ int GMGPolar::maxOpenMPThreads() const { @@ -85,15 +84,6 @@ void GMGPolar::maxOpenMPThreads(int max_omp_threads) max_omp_threads_ = max_omp_threads; } -double GMGPolar::threadReductionFactor() const -{ - return thread_reduction_factor_; -} -void GMGPolar::threadReductionFactor(double thread_reduction_factor) -{ - thread_reduction_factor_ = thread_reduction_factor; -} - /* ---------------------------------------------------------------------- */ /* Numerical method options */ /* ---------------------------------------------------------------------- */ diff --git a/src/GMGPolar/setup.cpp b/src/GMGPolar/setup.cpp index f331c14c..77d97f99 100644 --- a/src/GMGPolar/setup.cpp +++ b/src/GMGPolar/setup.cpp @@ -72,17 +72,9 @@ void GMGPolar::setup() if (paraview_) writeToVTK("output_coarsest_grid", levels_.back().grid()); - // ----------------------------------------------------------- // - // Initializing the optimal number of threads for OpenMP tasks // - // ----------------------------------------------------------- // - threads_per_level_.resize(number_of_levels_, max_omp_threads_); - for (int level_depth = 0; level_depth < number_of_levels_; level_depth++) { - threads_per_level_[level_depth] = std::max( - 1, - std::min(max_omp_threads_, - static_cast(std::floor(max_omp_threads_ * std::pow(thread_reduction_factor_, level_depth))))); - } - + // ------------------------------------- // + // Initialize the interpolation operator // + // ------------------------------------- // interpolation_ = std::make_unique(max_omp_threads_, DirBC_Interior_); if (verbose_ > 0) @@ -100,44 +92,44 @@ void GMGPolar::setup() case ExtrapolationType::NONE: full_grid_smoothing_ = true; levels_[level_depth].initializeSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); break; case ExtrapolationType::IMPLICIT_EXTRAPOLATION: full_grid_smoothing_ = false; levels_[level_depth].initializeExtrapolatedSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); break; case ExtrapolationType::IMPLICIT_FULL_GRID_SMOOTHING: full_grid_smoothing_ = true; levels_[level_depth].initializeSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); break; case ExtrapolationType::COMBINED: full_grid_smoothing_ = true; levels_[level_depth].initializeSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); levels_[level_depth].initializeExtrapolatedSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); break; default: full_grid_smoothing_ = false; levels_[level_depth].initializeSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); levels_[level_depth].initializeExtrapolatedSmoothing(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); break; } auto end_setup_smoother = std::chrono::high_resolution_clock::now(); t_setup_smoother_ += std::chrono::duration(end_setup_smoother - start_setup_smoother).count(); levels_[level_depth].initializeResidual(domain_geometry_, density_profile_coefficients_, DirBC_Interior_, - threads_per_level_[level_depth], stencil_distribution_method_); + max_omp_threads_, stencil_distribution_method_); } // -------------------------- // // Level n-1 (coarsest Level) // @@ -145,13 +137,13 @@ void GMGPolar::setup() else if (level_depth == number_of_levels_ - 1) { auto start_setup_directSolver = std::chrono::high_resolution_clock::now(); levels_[level_depth].initializeDirectSolver(domain_geometry_, density_profile_coefficients_, - DirBC_Interior_, threads_per_level_[level_depth], + DirBC_Interior_, max_omp_threads_, stencil_distribution_method_); auto end_setup_directSolver = std::chrono::high_resolution_clock::now(); t_setup_directSolver_ += std::chrono::duration(end_setup_directSolver - start_setup_directSolver).count(); levels_[level_depth].initializeResidual(domain_geometry_, density_profile_coefficients_, DirBC_Interior_, - threads_per_level_[level_depth], stencil_distribution_method_); + max_omp_threads_, stencil_distribution_method_); } // ------------------- // // Intermediate levels // @@ -159,11 +151,11 @@ void GMGPolar::setup() else { auto start_setup_smoother = std::chrono::high_resolution_clock::now(); levels_[level_depth].initializeSmoothing(domain_geometry_, density_profile_coefficients_, DirBC_Interior_, - threads_per_level_[level_depth], stencil_distribution_method_); + max_omp_threads_, stencil_distribution_method_); auto end_setup_smoother = std::chrono::high_resolution_clock::now(); t_setup_smoother_ += std::chrono::duration(end_setup_smoother - start_setup_smoother).count(); levels_[level_depth].initializeResidual(domain_geometry_, density_profile_coefficients_, DirBC_Interior_, - threads_per_level_[level_depth], stencil_distribution_method_); + max_omp_threads_, stencil_distribution_method_); } } diff --git a/src/GMGPolar/solver.cpp b/src/GMGPolar/solver.cpp index 2bcf800a..2af5c999 100644 --- a/src/GMGPolar/solver.cpp +++ b/src/GMGPolar/solver.cpp @@ -322,7 +322,7 @@ void GMGPolar::extrapolatedResidual(const int current_level, Vector resi assert(residual.size() == static_cast(fineGrid.numberOfNodes())); assert(residual_next_level.size() == static_cast(coarseGrid.numberOfNodes())); -#pragma omp parallel num_threads(threads_per_level_[current_level]) +#pragma omp parallel num_threads(max_omp_threads_) { /* Circluar Indexing Section */ /* For loop matches circular access pattern */ @@ -399,7 +399,7 @@ std::pair GMGPolar::computeExactError(Level& level, ConstVector< assert(solution.size() == error.size()); assert(solution.size() == static_cast(grid.numberOfNodes())); -#pragma omp parallel num_threads(threads_per_level_[level.level_depth()]) +#pragma omp parallel num_threads(max_omp_threads_) { #pragma omp for nowait for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { diff --git a/src/convergence_order.cpp b/src/convergence_order.cpp index 4425a1ab..1a9fa1ff 100644 --- a/src/convergence_order.cpp +++ b/src/convergence_order.cpp @@ -15,8 +15,7 @@ int main(int argc, char* argv[]) const int verbose = 0; const bool paraview = false; - const int maxOpenMPThreads = 16; - const double threadReductionFactor = 1.0; + const int maxOpenMPThreads = 16; const StencilDistributionMethod stencilDistributionMethod = StencilDistributionMethod::CPU_TAKE; const bool cacheDensityProfileCoefficients = true; @@ -98,8 +97,6 @@ int main(int argc, char* argv[]) solver.paraview(paraview); solver.maxOpenMPThreads(maxOpenMPThreads); - solver.threadReductionFactor(threadReductionFactor); - omp_set_num_threads(maxOpenMPThreads); // Global OpenMP thread limit solver.DirBC_Interior(DirBC_Interior); diff --git a/src/main.cpp b/src/main.cpp index b375a46d..ef389a82 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,8 +22,6 @@ int main(int argc, char* argv[]) // --- Parallelization and threading settings --- // solver.maxOpenMPThreads(parser.maxOpenMPThreads()); // Maximum OpenMP threads to use - solver.threadReductionFactor(parser.threadReductionFactor()); // Reduce threads on coarser grids - omp_set_num_threads(parser.maxOpenMPThreads()); // Global OpenMP thread limit // --- Numerical method setup --- // diff --git a/src/strong_scaling.cpp b/src/strong_scaling.cpp index 351cfcc0..2f70f0ae 100644 --- a/src/strong_scaling.cpp +++ b/src/strong_scaling.cpp @@ -62,8 +62,6 @@ void runTest(int maxOpenMPThreads, int divideBy2, std::ofstream& outfile) const int verbose = 1; const bool paraview = false; - const double threadReductionFactor = 1.0; - const StencilDistributionMethod stencilDistributionMethod = StencilDistributionMethod::CPU_GIVE; const bool cacheDensityProfileCoefficients = true; const bool cacheDomainGeometry = false; @@ -98,8 +96,6 @@ void runTest(int maxOpenMPThreads, int divideBy2, std::ofstream& outfile) solver.paraview(paraview); solver.maxOpenMPThreads(maxOpenMPThreads); - solver.threadReductionFactor(threadReductionFactor); - omp_set_num_threads(maxOpenMPThreads); // Global OpenMP thread limit solver.DirBC_Interior(DirBC_Interior); diff --git a/src/weak_scaling.cpp b/src/weak_scaling.cpp index cccd5f50..8ecdf676 100644 --- a/src/weak_scaling.cpp +++ b/src/weak_scaling.cpp @@ -61,8 +61,6 @@ void runTest(int maxOpenMPThreads, int divideBy2, std::ofstream& outfile) const int verbose = 1; const bool paraview = false; - const double threadReductionFactor = 1.0; - const StencilDistributionMethod stencilDistributionMethod = StencilDistributionMethod::CPU_GIVE; const bool cacheDensityProfileCoefficients = true; const bool cacheDomainGeometry = false; @@ -97,8 +95,6 @@ void runTest(int maxOpenMPThreads, int divideBy2, std::ofstream& outfile) solver.paraview(paraview); solver.maxOpenMPThreads(maxOpenMPThreads); - solver.threadReductionFactor(threadReductionFactor); - omp_set_num_threads(maxOpenMPThreads); // Global OpenMP thread limit solver.DirBC_Interior(DirBC_Interior); diff --git a/tests/ConfigParser/config_parser.cpp b/tests/ConfigParser/config_parser.cpp index 3e1cf89b..e990fa3f 100644 --- a/tests/ConfigParser/config_parser.cpp +++ b/tests/ConfigParser/config_parser.cpp @@ -36,7 +36,6 @@ TEST_P(ConfigParserTest, ParseAllGeometryAndProblemCombinations) const int verbose = (params.case_id > 0 ? 0 : 1); const bool paraview = false; const int maxOpenMPThreads = 4; - const double threadReductionFactor = 1.0; const bool DirBC_Interior = false; const int stencilDistributionMethod = params.case_id % 2; const bool cacheDensityProfileCoefficients = true; @@ -93,8 +92,6 @@ TEST_P(ConfigParserTest, ParseAllGeometryAndProblemCombinations) paraview ? "1" : "0", "--maxOpenMPThreads", std::to_string(maxOpenMPThreads), - "--threadReductionFactor", - double_to_string(threadReductionFactor), "--DirBC_Interior", DirBC_Interior ? "1" : "0", "--stencilDistributionMethod", @@ -171,7 +168,6 @@ TEST_P(ConfigParserTest, ParseAllGeometryAndProblemCombinations) EXPECT_EQ(parser.verbose(), verbose); EXPECT_EQ(parser.paraview(), paraview); EXPECT_EQ(parser.maxOpenMPThreads(), maxOpenMPThreads); - EXPECT_DOUBLE_EQ(parser.threadReductionFactor(), threadReductionFactor); EXPECT_EQ(parser.DirBC_Interior(), DirBC_Interior); EXPECT_EQ(parser.stencilDistributionMethod(), static_cast(stencilDistributionMethod)); EXPECT_EQ(parser.cacheDensityProfileCoefficients(), cacheDensityProfileCoefficients); diff --git a/tests/GMGPolar/convergence_order.cpp b/tests/GMGPolar/convergence_order.cpp index 145d21b0..5026111d 100644 --- a/tests/GMGPolar/convergence_order.cpp +++ b/tests/GMGPolar/convergence_order.cpp @@ -165,9 +165,8 @@ std::tuple get_gmgpolar_error(PolarGrid const& grid, CzarnyGeome gmgpolar.verbose(0); gmgpolar.paraview(false); - // --- Parallelization and threading settings --- // + // --- Parallelization settings --- // gmgpolar.maxOpenMPThreads(1); - gmgpolar.threadReductionFactor(1.0); // --- Discretization and method settings --- // gmgpolar.DirBC_Interior(false); // Use across-origin calculation diff --git a/tests/GMGPolar/solve_tests.cpp b/tests/GMGPolar/solve_tests.cpp index 7a69eb0f..48601382 100644 --- a/tests/GMGPolar/solve_tests.cpp +++ b/tests/GMGPolar/solve_tests.cpp @@ -534,17 +534,15 @@ void run_gmgpolar() GMGPolar solver(grid, domain, profile_coefficients); - bool paraview = false; - double threadReductionFactor = 1.0; - int preSmoothingSteps = 1; - int postSmoothingSteps = 1; + bool paraview = false; + int preSmoothingSteps = 1; + int postSmoothingSteps = 1; // --- General solver output and visualization settings --- // solver.verbose(TestFixture::verbose); solver.paraview(paraview); // --- Parallelization and threading settings --- // solver.maxOpenMPThreads(TestFixture::maxOpenMPThreads); - solver.threadReductionFactor(threadReductionFactor); omp_set_num_threads(TestFixture::maxOpenMPThreads); // --- Numerical method setup --- // solver.DirBC_Interior(TestFixture::DirBC_Interior); @@ -585,7 +583,6 @@ void run_gmgpolar() EXPECT_EQ(solver.verbose(), TestFixture::verbose); EXPECT_EQ(solver.paraview(), paraview); EXPECT_EQ(solver.maxOpenMPThreads(), TestFixture::maxOpenMPThreads); - EXPECT_DOUBLE_EQ(solver.threadReductionFactor(), threadReductionFactor); EXPECT_EQ(solver.DirBC_Interior(), TestFixture::DirBC_Interior); EXPECT_EQ(solver.stencilDistributionMethod(), TestFixture::stencilDistributionMethod); EXPECT_EQ(solver.cacheDensityProfileCoefficients(), TestFixture::cacheDensityProfileCoefficients);