From fbb11122a50a70cc2c40dd927e92bfc0cf02311d Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 17 Jan 2019 11:09:41 -0600 Subject: [PATCH 1/2] common/ompio: fix a floating point division problem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes a problem reported on the mailing list with individual writes larger than 512 MB. The culprit is a floating point division of two large, close values. Changing the datatypes from float to double (which is what is being used in the fcoll components) fixes the problem. See issue #6285 and https://forum.hdfgroup.org/t/cannot-write-more-than-512-mb-in-1d/5118 Thanks for Axel Huebl and René Widera for reporting the issue. Signed-off-by: Edgar Gabriel (cherry picked from commit c0f8ce0fff4684b670135043dd150abc9d83d988) --- ompi/mca/common/ompio/common_ompio_file_read.c | 9 ++++++--- ompi/mca/common/ompio/common_ompio_file_write.c | 7 ++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_file_read.c b/ompi/mca/common/ompio/common_ompio_file_read.c index 284e8101e34..e236f22f3b1 100644 --- a/ompi/mca/common/ompio/common_ompio_file_read.c +++ b/ompi/mca/common/ompio/common_ompio_file_read.c @@ -9,7 +9,9 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. + * Copyright (c) 2008-2019 University of Houston. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +36,7 @@ #include "ompi/mca/io/ompio/io_ompio_request.h" #include "math.h" #include +#include /* Read and write routines are split into two interfaces. ** The @@ -99,8 +102,8 @@ int mca_common_ompio_file_read (mca_io_ompio_file_t *fh, else { bytes_per_cycle = mca_io_ompio_cycle_buffer_size; } - cycles = ceil((float)max_data/bytes_per_cycle); - + cycles = ceil((double)max_data/bytes_per_cycle); + #if 0 printf ("Bytes per Cycle: %d Cycles: %d max_data:%d \n",bytes_per_cycle, cycles, max_data); #endif diff --git a/ompi/mca/common/ompio/common_ompio_file_write.c b/ompi/mca/common/ompio/common_ompio_file_write.c index bdfc5dd66e3..5c652cccf1e 100644 --- a/ompi/mca/common/ompio/common_ompio_file_write.c +++ b/ompi/mca/common/ompio/common_ompio_file_write.c @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2016 University of Houston. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science + * Copyright (c) 2008-2019 University of Houston. All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -34,6 +34,7 @@ #include "ompi/mca/io/ompio/io_ompio_request.h" #include "math.h" #include +#include int mca_common_ompio_file_write (mca_io_ompio_file_t *fh, const void *buf, @@ -76,7 +77,7 @@ int mca_common_ompio_file_write (mca_io_ompio_file_t *fh, else { bytes_per_cycle = mca_io_ompio_cycle_buffer_size; } - cycles = ceil((float)max_data/bytes_per_cycle); + cycles = ceil((double)max_data/bytes_per_cycle); #if 0 printf ("Bytes per Cycle: %d Cycles: %d\n", bytes_per_cycle, cycles); From 880e184735a384b8725cbfbc49525dd9fa97cd80 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Thu, 31 Jan 2019 11:02:59 -0600 Subject: [PATCH 2/2] io/ompio: possible rounding issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to #6286 rounding number of bytes into a single precision floating point value to round up the result of a division is a potential risk due to rounding errors. - remove floating point operations for `round up` - removes floating point conversion for round down (native behavior of integer division) Signed-off-by: René Widera Note: a direct cherry pick of commit a91fab8 is not possible, due to structural differences between the the 3.1.x and the master/v4.0.x branch. This commit is the equivalent of commit a91fab8. Signed-off-by: Edgar Gabriel --- ompi/mca/io/ompio/io_ompio_aggregators.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.c b/ompi/mca/io/ompio/io_ompio_aggregators.c index 818b10c947e..14814770e00 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.c +++ b/ompi/mca/io/ompio/io_ompio_aggregators.c @@ -865,11 +865,12 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, int size_smallest_group = 0; int num_groups = 0; int ret = OMPI_SUCCESS; - + OMPI_MPI_OFFSET_TYPE max_cci = 0; OMPI_MPI_OFFSET_TYPE min_cci = 0; - size_new_group = ceil ((float)mca_io_ompio_bytes_per_agg * fh->f_init_procs_per_group/ bytes_per_group); + // integer round up + size_new_group = (int)(mca_io_ompio_bytes_per_agg / bytes_per_group + (mca_io_ompio_bytes_per_agg % bytes_per_group ? 1u : 0u)); size_old_group = fh->f_init_procs_per_group; ret = mca_io_ompio_split_a_group(fh, @@ -917,7 +918,7 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, if((max_cci < OMPIO_CONTG_THRESHOLD) && (size_new_group < size_old_group)){ - size_new_group = floor( (float) (size_new_group + size_old_group ) / 2 ); + size_new_group = (size_new_group + size_old_group ) / 2; ret = mca_io_ompio_split_a_group(fh, start_offsets_lens, end_offsets, @@ -945,7 +946,9 @@ int mca_io_ompio_split_initial_groups(mca_io_ompio_file_t *fh, (size_new_group < size_old_group)){ //can be a better condition //monitor the previous iteration //break if it has not changed. - size_new_group = ceil( (float) (size_new_group + size_old_group ) / 2 ); + size_new_group = size_new_group + size_old_group; + // integer round up + size_new_group = size_new_group / 2 + (size_new_group % 2 ? 1 : 0); ret = mca_io_ompio_split_a_group(fh, start_offsets_lens, end_offsets,