diff --git a/CMakeInstallation.cmake b/CMakeInstallation.cmake index 8d710da3124..e324e5a1444 100644 --- a/CMakeInstallation.cmake +++ b/CMakeInstallation.cmake @@ -251,7 +251,7 @@ if (NOT HDF5_EXTERNALLY_CONFIGURED) if (HDF5_ENABLE_PARALLEL) set (release_files ${release_files} - ${HDF5_SOURCE_DIR}/release_docs/INSTALL_parallel + ${HDF5_SOURCE_DIR}/release_docs/README_HPC.md ) endif () endif () diff --git a/README.md b/README.md index 583ff2ced47..0e8f3f3cc7d 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ Several more files are located within the [release_docs/][u4] directory with spe details for several common platforms and configurations. - INSTALL - Start Here. General instructions for compiling and installing the library or using an installed library - INSTALL_CMAKE - instructions for building with CMake (Kitware.com) -- INSTALL_parallel - instructions for building and configuring Parallel HDF5 +- README_HPC.md - instructions for building and configuring Parallel HDF5 on HPC systems - INSTALL_Windows and INSTALL_Cygwin - MS Windows installations. - USING_HDF5_CMake - Build and Install HDF5 Applications with CMake - USING_CMake_Examples - Build and Test HDF5 Examples with CMake diff --git a/bin/batch/knl_ctestP.sl.in.cmake b/bin/batch/knl_ctestP.sl.in.cmake deleted file mode 100644 index 97bd2ad31aa..00000000000 --- a/bin/batch/knl_ctestP.sl.in.cmake +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#SBATCH -p knl -C quad,cache -#SBATCH --nodes=1 -#SBATCH -t 00:30:00 -#SBATCH --mail-type=BEGIN,END,FAIL -##SBATCH --mail-user=@sandia.gov -#SBATCH --export=ALL -#SBATCH --job-name=h5_ctestP - -cd @HDF5_BINARY_DIR@ -echo "Run parallel test command. Test output will be in build/ctestP.out" -ctest -S ctest_parallel.cmake >& ctestP.out - -echo "Done running ctest parallel command." -touch ctestP.done diff --git a/bin/batch/knl_ctestS.sl.in.cmake b/bin/batch/knl_ctestS.sl.in.cmake deleted file mode 100644 index 87c4a486eea..00000000000 --- a/bin/batch/knl_ctestS.sl.in.cmake +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#SBATCH -p knl -C quad,cache -#SBATCH --nodes=1 -#SBATCH -t 00:30:00 -#SBATCH --mail-type=BEGIN,END,FAIL -##SBATCH --mail-user=@sandia.gov -#SBATCH --export=ALL -#SBATCH --job-name=h5_ctestS - -cd @HDF5_BINARY_DIR@ -echo "Run command. Test output will be in build/ctestS.out" -ctest -S ctest_serial.cmake >& ctestS.out - -echo "Done running command." -touch ctestS.done diff --git a/config/cmake/scripts/CTestScript.cmake b/config/cmake/scripts/CTestScript.cmake index a795f1b9d32..1187ee28dd0 100644 --- a/config/cmake/scripts/CTestScript.cmake +++ b/config/cmake/scripts/CTestScript.cmake @@ -264,15 +264,6 @@ set (ENV{CI_MODEL} ${MODEL}) file (APPEND ${CTEST_SCRIPT_DIRECTORY}/FailedCTest.txt "Failed Configure: ${res}\n") endif () - # On Cray XC40, configuring fails in the Fortran section when using the craype-mic-knl module. - # When the configure phase is done with the craype-haswell module and the build phase is done - # with the craype-mic-knl module, configure succeeds and tests pass on the knl compute nodes - # for Intel, Cray, GCC and Clang compilers. If the variables aren't set or if not - # cross compiling, the module switch will not occur. - if (CMAKE_CROSSCOMPILING AND COMPILENODE_HWCOMPILE_MODULE AND COMPUTENODE_HWCOMPILE_MODULE) - execute_process (COMMAND module switch ${COMPILENODE_HWCOMPILE_MODULE} ${COMPUTENODE_HWCOMPILE_MODULE}) - endif () - ctest_build (BUILD "${CTEST_BINARY_DIRECTORY}" APPEND RETURN_VALUE res NUMBER_ERRORS errval) if (LOCAL_SUBMIT) ctest_submit (PARTS Build) @@ -287,15 +278,11 @@ set (ENV{CI_MODEL} ${MODEL}) ctest_test (BUILD "${CTEST_BINARY_DIRECTORY}" APPEND ${ctest_test_args} RETURN_VALUE res) else () file(STRINGS ${CTEST_BINARY_DIRECTORY}/Testing/TAG TAG_CONTENTS REGEX "^2([0-9]+)[-]([0-9]+)$") - if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "raybsub") - execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME}) + if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "qsub") + execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME} ctestS.out) else () - if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "qsub") - execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME} ctestS.out) - else () - execute_process (COMMAND ${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME}) - endif() - endif () + execute_process (COMMAND ${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME}) + endif() message(STATUS "Check for existence of ${CTEST_BINARY_DIRECTORY}/ctestS.done") execute_process(COMMAND ls ${CTEST_BINARY_DIRECTORY}/ctestS.done RESULT_VARIABLE result OUTPUT_QUIET ERROR_QUIET) while(result) @@ -308,14 +295,10 @@ set (ENV{CI_MODEL} ${MODEL}) endif () if (LOCAL_BATCH_SCRIPT_PARALLEL_NAME) unset(result CACHE) - if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "raybsub") - execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_PARALLEL_NAME}) + if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "qsub") + execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME} ctestP.out) else () - if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "qsub") - execute_process (COMMAND ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_NAME} ctestP.out) - else () - execute_process (COMMAND ${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_PARALLEL_NAME}) - endif () + execute_process (COMMAND ${LOCAL_BATCH_SCRIPT_COMMAND} ${LOCAL_BATCH_SCRIPT_ARGS} ${CTEST_BINARY_DIRECTORY}/${LOCAL_BATCH_SCRIPT_PARALLEL_NAME}) endif () message(STATUS "Check for existence of ${CTEST_BINARY_DIRECTORY}/ctestP.done") execute_process(COMMAND ls ${CTEST_BINARY_DIRECTORY}/ctestP.done RESULT_VARIABLE result OUTPUT_QUIET ERROR_QUIET) diff --git a/config/cmake/scripts/HDF5config.cmake b/config/cmake/scripts/HDF5config.cmake index 154841078d7..9e7f59e0a4b 100644 --- a/config/cmake/scripts/HDF5config.cmake +++ b/config/cmake/scripts/HDF5config.cmake @@ -48,7 +48,7 @@ set (CTEST_SOURCE_VERSEXT "-4") #CTEST_CONFIGURATION_TYPE - Release, Debug, RelWithDebInfo #CTEST_SOURCE_NAME - name of source folder; HDF5-2.0.x #MODEL - CDash group name -#HPC - run alternate configurations for HPC machines; sbatch, bsub, raybsub, qsub +#HPC - run alternate configurations for HPC machines; sbatch, bsub, qsub #MPI - enable MPI if (DEFINED CTEST_SCRIPT_ARG) # transform ctest script arguments of the form diff --git a/config/cmake/scripts/HPC/qsub-HDF5options.cmake b/config/cmake/scripts/HPC/qsub-HDF5options.cmake index 8c716be46a9..5a52507ab7c 100644 --- a/config/cmake/scripts/HPC/qsub-HDF5options.cmake +++ b/config/cmake/scripts/HPC/qsub-HDF5options.cmake @@ -21,11 +21,6 @@ endif() ### options to run test scripts in batch commands set (LOCAL_BATCH_SCRIPT_NAME "ctest.qsub") set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "ctest.qsub") -if (DEFINED KNL) - ### some additions and alternatives to cross compile on haswell for knl - set (COMPUTENODE_HWCOMPILE_MODULE "craype-mic-knl") - set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DCMAKE_TOOLCHAIN_FILE:STRING=config/toolchain/crayle.cmake") -endif () set (LOCAL_BATCH_SCRIPT_COMMAND "qsub") set (LOCAL_BATCH_TEST "TRUE") set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_TEST:BOOL=ON") diff --git a/config/cmake/scripts/HPC/raybsub-HDF5options.cmake b/config/cmake/scripts/HPC/raybsub-HDF5options.cmake deleted file mode 100644 index 60010584131..00000000000 --- a/config/cmake/scripts/HPC/raybsub-HDF5options.cmake +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright by The HDF Group. -# All rights reserved. -# -# This file is part of HDF5. The full HDF5 copyright notice, including -# terms governing use, modification, and redistribution, is contained in -# the LICENSE file, which can be found at the root of the source code -# distribution tree, or in https://www.hdfgroup.org/licenses. -# If you do not have access to either file, you may request a copy from -# help@hdfgroup.org. -# -############################################################################################# -#### Change default configuration of options in config/cmake/cacheinit.cmake file ### -#### format: set(ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DXXX:YY=ZZZZ") ### -############################################################################################# -if (DEFINED MPI) - # maximum parallel processor count for build and test #### - set (MAX_PROC_COUNT 8) -endif() -############################################################################################# -### options to run test scripts in batch commands -set (LOCAL_BATCH_SCRIPT_COMMAND "raybsub") -set (LOCAL_BATCH_TEST "TRUE") -set (LOCAL_BATCH_SCRIPT_NAME "ray_ctestS.lsf") -set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "ray_ctestP.lsf") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_TEST:BOOL=ON") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_SCRIPT_COMMAND:STRING=${LOCAL_BATCH_SCRIPT_COMMAND}") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_SCRIPT_NAME:STRING=${LOCAL_BATCH_SCRIPT_NAME}") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_SCRIPT_PARALLEL_NAME:STRING=${LOCAL_BATCH_SCRIPT_PARALLEL_NAME}") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DMPIEXEC_EXECUTABLE:STRING=mpirun") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DMPIEXEC_NUMPROC_FLAG:STRING=-np") -set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DMPIEXEC_MAX_NUMPROCS:STRING=6") diff --git a/config/cmake/scripts/HPC/sbatch-HDF5options.cmake b/config/cmake/scripts/HPC/sbatch-HDF5options.cmake index 2c8f89f6c2f..8b1c6f2f86b 100644 --- a/config/cmake/scripts/HPC/sbatch-HDF5options.cmake +++ b/config/cmake/scripts/HPC/sbatch-HDF5options.cmake @@ -19,17 +19,8 @@ if (DEFINED MPI) endif() ############################################################################################# ### options to run test scripts in batch commands -if (DEFINED KNL) - ### some additions and alternatives to cross compile on haswell for knl - set (COMPILENODE_HWCOMPILE_MODULE "craype-haswell") - set (COMPUTENODE_HWCOMPILE_MODULE "craype-mic-knl") - set (LOCAL_BATCH_SCRIPT_NAME "knl_ctestS.sl") - set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "knl_ctestP.sl") - set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DCMAKE_TOOLCHAIN_FILE:STRING=config/toolchain/crayle.cmake") -else () - set (LOCAL_BATCH_SCRIPT_NAME "ctestS.sl") - set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "ctestP.sl") -endif () +set (LOCAL_BATCH_SCRIPT_NAME "ctestS.sl") +set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "ctestP.sl") set (LOCAL_BATCH_SCRIPT_COMMAND "sbatch") set (LOCAL_BATCH_TEST "TRUE") set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DLOCAL_BATCH_TEST:BOOL=ON") diff --git a/release_docs/INSTALL_CMake.txt b/release_docs/INSTALL_CMake.txt index 016e630c800..2c991bb5d1c 100644 --- a/release_docs/INSTALL_CMake.txt +++ b/release_docs/INSTALL_CMake.txt @@ -65,9 +65,9 @@ from the HDF5 source code package using the CMake tools. This procedure will use the default settings in the config/cmake/cacheinit.cmake file. The HDF Group recommends using the presets process to build HDF5. -NOTE: if you are using CMake 3.25 or later, you can use the presets process. - The CMakePresets.json file in the source directory will configure, build, test, - and package HDF5 with the same options that are set in the cacheinit.cmake file. +NOTE: When using the presets process, the CMakePresets.json file in the source + directory will configure, build, test, and package HDF5 with the same options + that are set in the cacheinit.cmake file. In addition, it will get the optional files listed below that are needed, from the appropriate repositories. See Section X: Using CMakePresets.json for compiling @@ -299,9 +299,7 @@ IV. Further Considerations ======================================================================== 1. We suggest you obtain the latest CMake from the Kitware - web site. The HDF5 2."X"."Y" product requires a minimum CMake version 3.18. - If you are using VS2022, the CMake minimum version is 3.21. CMakePresets - requires CMake 3.25. + web site. The HDF5 2."X"."Y" product requires a minimum CMake version 3.26. 2. If you plan to use Zlib or Szip (aka libaec): A. Download the binary packages and install them in a central location. @@ -766,7 +764,7 @@ These five steps are described in detail below. 12. Parallel versus serial library The HDF5 library can be configured to use MPI and MPI-IO for parallelism on a distributed multi-processor system. Read the - file INSTALL_parallel for detailed information. + file README_HPC.md for detailed information. HDF5_ENABLE_PARALLEL:BOOL=ON The threadsafe, C++ and Java interfaces are not compatible diff --git a/release_docs/INSTALL_parallel b/release_docs/INSTALL_parallel deleted file mode 100644 index e2ac46a471d..00000000000 --- a/release_docs/INSTALL_parallel +++ /dev/null @@ -1,382 +0,0 @@ - Installation instructions for Parallel HDF5 - ------------------------------------------- - -0. Use Build Scripts --------------------- -The HDF Group is accumulating build scripts to handle building parallel HDF5 -on various platforms (Cray, IBM, SGI, etc...). These scripts are being -maintained and updated continuously for current and future systems. The reader -is strongly encouraged to consult the repository at, - -https://github.com/HDFGroup/build_hdf5 - -for building parallel HDF5 on these system. All contributions, additions -and fixes to the repository are welcomed and encouraged. - - -1. Overview ------------ -This file contains instructions for the installation of parallel HDF5 (PHDF5). -It is assumed that you are familiar with the general installation steps as -described in the INSTALL file. Get familiar with that file before trying -the parallel HDF5 installation. - -The remaining of this section explains the requirements to run PHDF5. -Section 2 shows quick instructions for some well know systems. Section 3 -explains the details of the installation steps. Section 4 shows some details -of running the parallel test suites. - - -1.1. Requirements ------------------ -PHDF5 requires an MPI compiler with MPI-IO support and a POSIX compliant -(Ref. 1) parallel file system. If you don't know yet, you should first consult -with your system support staff of information how to compile an MPI program, -how to run an MPI application, and how to access the parallel file system. -There are sample MPI-IO C and Fortran programs in the appendix section of -"Sample programs". You can use them to run simple tests of your MPI compilers -and the parallel file system. - - -1.2. Further Help ------------------ - -For help with installing, questions can be posted to the HDF Forum or sent to the HDF Helpdesk: - - HDF Forum: https://forum.hdfgroup.org/ - HDF Helpdesk: https://help.hdfgroup.org/ - -In your mail, please include the output of "uname -a". If you have run the -"configure" command, attach the output of the command and the content of -the file "config.log". - - -2. Quick Instruction for known systems --------------------------------------- -The following shows particular steps to run the parallel HDF5 configure for -a few machines we've tested. If your particular platform is not shown or -somehow the steps do not work for yours, please go to the next section for -more detailed explanations. - - -2.1. Know parallel compilers ----------------------------- -HDF5 knows several parallel compilers: mpicc, hcc, mpcc, mpcc_r. To build -parallel HDF5 with one of the above, just set CC as it and configure. - - $ CC=/usr/local/mpi/bin/mpicc ./configure --enable-parallel --prefix= - $ make # build the library - $ make check # verify the correctness - # Read the Details section about parallel tests. - $ make install - - -2.2. Linux 2.4 and greater --------------------------- -Be sure that your installation of MPICH was configured with the following -configuration command-line option: - - -cflags="-D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64" - -This allows for >2GB sized files on Linux systems and is only available with -Linux kernels 2.4 and greater. - - -2.3. Hopper (Cray XE6) (for v1.8 and later) -------------------------- - -The following steps are for building HDF5 for the Hopper compute -nodes. They would probably work for other Cray systems but have -not been verified. - -Obtain the HDF5 source code: - https://support.hdfgroup.org/downloads/index.html - -The entire build process should be done on a MOM node in an interactive allocation and on a file system accessible by all compute nodes. -Request an interactive allocation with qsub: -qsub -I -q debug -l mppwidth=8 - -- create a build directory build-hdf5: - mkdir build-hdf5; cd build-hdf5/ - -- configure HDF5: - RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared - - RUNSERIAL and RUNPARALLEL tell the library how it should launch programs that are part of the build procedure. - -- Compile HDF5: - gmake - -- Check HDF5 - gmake check - -- Install HDF5 - gmake install - -The build will be in build-hdf5/hdf5/ (or whatever you specify in --prefix). -To compile other HDF5 applications use the wrappers created by the build (build-hdf5/hdf5/bin/h5pcc or h5fc) - - -3. Detail explanation ---------------------- - -3.1. Installation steps (Uni/Multiple processes modes) ------------------------ -During the step of configure, you must be running in the uni-process mode. -If multiple processes are doing the configure simultaneously, they will -incur errors. - -In the build step (make), it depends on your make command whether it can -run correctly in multiple processes mode. If you are not sure, you should -try running it in uni-process mode. - -In the test step (make check), if your system can control number of processes -running in the MPI application, you can just use "make check". But if your -system (e.g., IBM SP) has a fixed number of processes for each batch run, -you need to do the serial tests by "make check-s", requesting 1 process and -then do the parallel tests by "make check-p", requesting n processes. - -Lastly, "make install" should be run in the uni-process mode. - - -3.2. Configure details ----------------------- -The HDF5 library can be configured to use MPI and MPI-IO for parallelism on -a distributed multi-processor system. The easiest way to do this is to have -a properly installed parallel compiler (e.g., MPICH's mpicc or IBM's mpcc_r) -and supply the compiler name as the value of the CC environment variable. -For examples, - - $ CC=mpcc_r ./configure --enable-parallel - $ CC=/usr/local/mpi/bin/mpicc ./configure --enable-parallel - -If a parallel library is being built then configure attempts to determine how -to run a parallel application on one processor and on many processors. If the -compiler is `mpicc' and the user hasn't specified values for RUNSERIAL and -RUNPARALLEL then configure chooses `mpiexec' from the same directory as `mpicc': - - RUNSERIAL: mpiexec -n 1 - RUNPARALLEL: mpiexec -n $${NPROCS:=6} - -The `$${NPROCS:=6}' will be substituted with the value of the NPROCS -environment variable at the time `make check' is run (or the value 6). - -Note that some MPI implementations (e.g. OpenMPI 4.0) disallow oversubscribing -nodes by default so you'll have to either set NPROCS equal to the number of -processors available (or fewer) or redefine RUNPARALLEL with appropriate -flag(s) (--oversubscribe in OpenMPI). - -4. Parallel test suite ----------------------- -The testpar/ directory contains tests for Parallel HDF5 and MPI-IO. Here are -some notes about some of the tests. - -The t_mpi tests the basic functionalities of some MPI-IO features used by -Parallel HDF5. It usually exits with non-zero code if a required MPI-IO -feature does not succeed as expected. One exception is the testing of -accessing files larger than 2GB. If the underlying filesystem or if the -MPI-IO library fails to handle file sizes larger than 2GB, the test will -print informational messages stating the failure but will not exit with -non-zero code. Failure to support file size greater than 2GB is not a fatal -error for HDF5 because HDF5 can use other file-drivers such as families of -files to bypass the file size limit. - -The t_cache does many small sized I/O requests and may not run well in a -slow file system such as NFS disk. If it takes a long time to run it, try -set the environment variable $HDF5_PARAPREFIX to a file system more suitable -for MPI-IO requests before running t_cache. - -By default, the parallel tests use the current directory as the test directory. -This can be changed by the environment variable $HDF5_PARAPREFIX. For example, -if the tests should use directory /PFS/user/me, do - HDF5_PARAPREFIX=/PFS/user/me - export HDF5_PARAPREFIX - make check - -(In some batch job system, you many need to hardset HDF5_PARAPREFIX in the -shell initial files like .profile, .cshrc, etc.) - - -Reference ---------- -1. POSIX Compliant. A good explanation is by Donald Lewin, - After a write() to a regular file has successfully returned, any - successful read() from each byte position on the file that was modified - by that write() will return the data that was written by the write(). A - subsequent write() to the same byte will overwrite the file data. If a - read() of a file data can be proven by any means [e.g., MPI_Barrier()] - to occur after a write() of that data, it must reflect that write(), - even if the calls are made by a different process. - Lewin, D. (1994). "POSIX Programmer's Guide (pg. 513-4)". O'Reilly - & Associates. - - -Appendix A. Sample programs ---------------------------- -Here are sample MPI-IO C and Fortran programs. You may use them to run simple -tests of your MPI compilers and the parallel file system. The MPI commands -used here are mpicc, mpif90 and mpiexec. Replace them with the commands of -your system. - -The programs assume they run in the parallel file system. Thus they create -the test data file in the current directory. If the parallel file system -is somewhere else, you need to run the sample programs there or edit the -programs to use a different file name. - -Example compiling and running: - -% mpicc Sample_mpio.c -o c.out -% mpiexec -np 4 c.out - -% mpif90 Sample_mpio.f90 -o f.out -% mpiexec -np 4 f.out - - -==> Sample_mpio.c <== -/* Simple MPI-IO program testing if a parallel file can be created. - * Default filename can be specified via first program argument. - * Each process writes something, then reads all data back. - */ - -#include -#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ -# include -#endif - -#define DIMSIZE 10 /* dimension size, avoid powers of 2. */ -#define PRINTID printf("Proc %d: ", mpi_rank) - -main(int ac, char **av) -{ - char hostname[128]; - int mpi_size, mpi_rank; - MPI_File fh; - char *filename = "./mpitest.data"; - char mpi_err_str[MPI_MAX_ERROR_STRING]; - int mpi_err_strlen; - int mpi_err; - char writedata[DIMSIZE], readdata[DIMSIZE]; - char expect_val; - int i, irank; - int nerrors = 0; /* number of errors */ - MPI_Offset mpi_off; - MPI_Status mpi_stat; - - MPI_Init(&ac, &av); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - /* get file name if provided */ - if (ac > 1){ - filename = *++av; - } - if (mpi_rank==0){ - printf("Testing simple MPIO program with %d processes accessing file %s\n", - mpi_size, filename); - printf(" (Filename can be specified via program argument)\n"); - } - - /* show the hostname so that we can tell where the processes are running */ - if (gethostname(hostname, 128) < 0){ - PRINTID; - printf("gethostname failed\n"); - return 1; - } - PRINTID; - printf("hostname=%s\n", hostname); - - if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, - MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, - MPI_INFO_NULL, &fh)) - != MPI_SUCCESS){ - MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); - PRINTID; - printf("MPI_File_open failed (%s)\n", mpi_err_str); - return 1; - } - - /* each process writes some data */ - for (i=0; i < DIMSIZE; i++) - writedata[i] = mpi_rank*DIMSIZE + i; - mpi_off = mpi_rank*DIMSIZE; - if ((mpi_err = MPI_File_write_at(fh, mpi_off, writedata, DIMSIZE, MPI_BYTE, - &mpi_stat)) - != MPI_SUCCESS){ - MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); - PRINTID; - printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n", - (long) mpi_off, (int) DIMSIZE, mpi_err_str); - return 1; - }; - - /* make sure all processes has done writing. */ - MPI_Barrier(MPI_COMM_WORLD); - - /* each process reads all data and verify. */ - for (irank=0; irank < mpi_size; irank++){ - mpi_off = irank*DIMSIZE; - if ((mpi_err = MPI_File_read_at(fh, mpi_off, readdata, DIMSIZE, MPI_BYTE, - &mpi_stat)) - != MPI_SUCCESS){ - MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); - PRINTID; - printf("MPI_File_read_at offset(%ld), bytes (%d), failed (%s)\n", - (long) mpi_off, (int) DIMSIZE, mpi_err_str); - return 1; - }; - for (i=0; i < DIMSIZE; i++){ - expect_val = irank*DIMSIZE + i; - if (readdata[i] != expect_val){ - PRINTID; - printf("read data[%d:%d] got %d, expect %d\n", irank, i, - readdata[i], expect_val); - nerrors++; - } - } - } - if (nerrors) - return 1; - - MPI_File_close(&fh); - - PRINTID; - printf("all tests passed\n"); - - MPI_Finalize(); - return 0; -} - -==> Sample_mpio.f90 <== -! -! The following example demonstrates how to create and close a parallel -! file using MPI-IO calls. -! -! USE MPI is the proper way to bring in MPI definitions but many -! MPI Fortran compiler supports the pseudo standard of INCLUDE. -! So, HDF5 uses the INCLUDE statement instead. -! - - PROGRAM MPIOEXAMPLE - - USE mpi - - IMPLICIT NONE - - CHARACTER(LEN=80), PARAMETER :: filename = "filef.h5" ! File name - INTEGER :: ierror ! Error flag - INTEGER :: fh ! File handle - INTEGER :: amode ! File access mode - - call MPI_INIT(ierror) - amode = MPI_MODE_RDWR + MPI_MODE_CREATE + MPI_MODE_DELETE_ON_CLOSE - call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, fh, ierror) - print *, "Trying to create ", filename - if ( ierror .eq. MPI_SUCCESS ) then - print *, "MPI_FILE_OPEN succeeded" - call MPI_FILE_CLOSE(fh, ierror) - else - print *, "MPI_FILE_OPEN failed" - endif - - call MPI_FINALIZE(ierror); - END PROGRAM diff --git a/release_docs/README_HPC b/release_docs/README_HPC deleted file mode 100644 index f8746bcd18f..00000000000 --- a/release_docs/README_HPC +++ /dev/null @@ -1,185 +0,0 @@ -************************************************************************ -* Using CMake to build and test HDF5 source on HPC machines * -************************************************************************ - - Contents - -Section I: Prerequisites -Section II: Obtain HDF5 source -Section III: Using ctest command to build and test -Section IV: Cross compiling -Section V: Manual alternatives - -************************************************************************ - -======================================================================== -I. Prerequisites -======================================================================== - 1. Create a working directory that is accessible from the compute nodes for - running tests; the working directory should be in a scratch space or a - parallel file system space since testing will use this space. Building - from HDF5 source in a 'home' directory typically results in test - failures and should be avoided. - - 2. Load modules for desired compilers, module for cmake version 3.18 or greater, - and set any needed environment variables for compilers (i.e., CC, FC, CXX). - Unload any problematic modules (i.e., craype-hugepages2M). - -======================================================================== -II. Obtain HDF5 source -======================================================================== -Obtain HDF5 source code from the HDF5 repository using a git command or -from a release tar file in a working directory: - - git clone https://github.com/HDFGroup/hdf5.git - [-b branch] [source directory] - -If no branch is specified, then the 'develop' version will be checked out. -If no source directory is specified, then the source will be located in the -'hdf5' directory. The CMake scripts expect the source to be in a directory -named hdf5-, where 'version string' uses the format '1.xx.xx'. - -If the version number is not known a priori, the version string -can be obtained by running bin/h5vers in the top level directory of the source clone, and -the source directory renamed 'hdf5-'. - -Release or snapshot tar files may also be extracted and used. - -======================================================================== -III. Using ctest command to build and test -======================================================================== - -The ctest command [1]: - - ctest -S HDF5config.cmake,BUILD_GENERATOR=Unix -C Release -V -O hdf5.log - -will configure, build, test and package HDF5 from the downloaded source -after the setup steps outlined below are followed. - -CMake option variables are available to allow running test programs in batch -scripts on compute nodes and to cross-compile for compute node hardware using -a cross-compiling emulator. The setup steps will make default settings for -parallel or serial only builds available to the CMake command. - - 1. The "hdf5" directory should be renamed hdf5-. For - further explanation see section II. - - 2. Three cmake script files need to be copied to the working directory, or - have symbolic links to them, created in the working directory: - - hdf5-/config/cmake/scripts/HDF5config.cmake - hdf5-/config/cmake/scripts/CTestScript.cmake - hdf5-/config/cmake/scripts/HDF5options.cmake - - should be copied to the working directory. - - 3. The resulting contents of the working directory are then: - - CTestScript.cmake - HDF5config.cmake - HDF5options.cmake - hdf5- - - Additionally, when the ctest command runs [1], it will add a build directory - in the working directory. - - 4. The following options (among others) can be added to the ctest - command [1], following '-S HDF5config.cmake,' and separated by ',': - - HPC=sbatch (or 'bsub' or 'raybsub') indicates which type of batch - files to use for running tests. If omitted, test - will run on the local machine or login node. - - KNL=true to cross-compile for KNL compute nodes on CrayXC40 - (see section IV) - - MPI=true enables parallel, disables c++, java, and threadsafe - - LOCAL_BATCH_SCRIPT_ARGS="--account=" to supply user account - information for batch jobs - - The HPC options will add BUILD_GENERATOR=Unix for the three HPC options. - An example ctest command for a parallel build on a system using sbatch is - - ctest -S HDF5config.cmake,HPC=sbatch,MPI=true -C Release -V -O hdf5.log - - Adding the option 'KNL=true' to the above list will compile for KNL nodes, - for example, on 'mutrino' and other CrayXC40 machines. - - Changing -V to -VV will produce more logging information in HDF5.log. - - More detailed CMake information can be found in the HDF5 source in - release_docs/INSTALL_CMake.txt. - -======================================================================== -IV. Cross-compiling -======================================================================== -For cross-compiling on Cray, set environment variables CC=cc, FC=ftn -and CXX=CC (for c++) after all compiler modules are loaded since switching -compiler modules may unset or reset these variables. - -CMake provides options for cross-compiling. To cross-compile for KNL hardware -on mutrino and other CrayXC40 machines, add HPC=sbatch,KNL=true to the -ctest command line. This will set the following options from the -config/cmake/scripts/HPC/sbatch-HDF5options.cmake file: - - set (COMPILENODE_HWCOMPILE_MODULE "craype-haswell") - set (COMPUTENODE_HWCOMPILE_MODULE "craype-mic-knl") - set (LOCAL_BATCH_SCRIPT_NAME "knl_ctestS.sl") - set (LOCAL_BATCH_SCRIPT_PARALLEL_NAME "knl_ctestP.sl") - set (ADD_BUILD_OPTIONS "${ADD_BUILD_OPTIONS} -DCMAKE_TOOLCHAIN_FILE:STRING=config/toolchain/crayle.cmake") - -On the Cray XC40 the craype-haswell module is needed for configuring, and the -craype-mic-knl module is needed for building to run on the KNL nodes. CMake -with the above options will swap modules after configuring is complete, -but before compiling programs for KNL. - -The sbatch script arguments for running jobs on KNL nodes may differ on CrayXC40 -machines other than mutrino. The batch scripts knl_ctestS.sl and knl_ctestP.sl -have the correct arguments for mutrino: "#SBATCH -p knl -C quad,cache". For -cori, another CrayXC40, that line is replaced by "#SBATCH -C knl,quad,cache". -For cori (and other machines), the values in LOCAL_BATCH_SCRIPT_NAME and -LOCAL_BATCH_SCRIPT_PARALLEL_NAME in the config/cmake/scripts/HPC/sbatch-HDF5options.cmake -file can be replaced by cori_knl_ctestS.sl and cori_knl_ctestS.sl, or the lines -can be edited in the batch files in hdf5-/bin/batch (see section II -for version string explanation). - -======================================================================== -V. Manual alternatives -======================================================================== -If using ctest is undesirable, one can create a build directory and run the cmake -configure command, for example - -"/projects/Mutrino/hpcsoft/cle6.0/common/cmake//bin/cmake" --C "/hdf5-/config/cmake/cacheinit.cmake" --DCMAKE_BUILD_TYPE:STRING=Release -DHDF5_BUILD_FORTRAN:BOOL=ON --DHDF5_BUILD_JAVA:BOOL=OFF --DCMAKE_INSTALL_PREFIX:PATH=/HDF_Group/HDF5/ --DHDF5_ENABLE_ZLIB_SUPPORT:BOOL=OFF -DHDF5_ENABLE_SZIP_SUPPORT:BOOL=OFF --DHDF5_ENABLE_PARALLEL:BOOL=ON -DHDF5_BUILD_CPP_LIB:BOOL=OFF --DHDF5_BUILD_JAVA:BOOL=OFF -DHDF5_ENABLE_THREADSAFE:BOOL=OFF --DHDF5_PACKAGE_EXTLIBS:BOOL=ON -DLOCAL_BATCH_TEST:BOOL=ON --DMPIEXEC_EXECUTABLE:STRING=srun -DMPIEXEC_NUMPROC_FLAG:STRING=-n --DMPIEXEC_MAX_NUMPROCS:STRING=6 --DCMAKE_TOOLCHAIN_FILE:STRING=config/toolchain/crayle.cmake --DLOCAL_BATCH_SCRIPT_NAME:STRING=knl_ctestS.sl --DLOCAL_BATCH_SCRIPT_PARALLEL_NAME:STRING=knl_ctestP.sl -DSITE:STRING=mutrino --DBUILDNAME:STRING=par-knl_GCC493-SHARED-Linux-4.4.156-94.61.1.16335.0.PTF.1107299-default-x86_64 -"-GUnix Makefiles" "" "/hdf5-" - -followed by make and batch jobs to run tests. - -To cross-compile on CrayXC40, run the configure command with the craype-haswell -module loaded, then switch to the craype-mic-knl module for the build process. - -Tests on machines using slurm can be run with - -"sbatch -p knl -C quad,cache ctestS.sl" - -or - -"sbatch -p knl -C quad,cache ctestP.sl" - -for parallel builds. - -Tests on machines using LSF will typically use "bsub ctestS.lsf", etc. diff --git a/release_docs/README_HPC.md b/release_docs/README_HPC.md new file mode 100644 index 00000000000..10de30201be --- /dev/null +++ b/release_docs/README_HPC.md @@ -0,0 +1,601 @@ +# Installation Instructions for Parallel HDF5 + +## 1. Overview + +This file contains instructions for installing parallel HDF5 (PHDF5) using +CMake. The document covers: + +- **Section 1:** Requirements and prerequisites +- **Section 2:** Obtaining HDF5 source +- **Section 3:** Quick start instructions +- **Section 4:** Automated builds with ctest (HPC systems) +- **Section 5:** Manual CMake configuration +- **Section 6:** Cross-compiling for HPC hardware +- **Section 7:** Running parallel tests +- **Appendix A:** Sample MPI-IO programs + +### 1.1. Requirements + +PHDF5 requires: + +- CMake version 3.26 or greater +- An MPI compiler with MPI-IO support +- A POSIX compliant parallel file system (see References) + +You should first consult with your system support staff for information on: + +- How to compile an MPI program +- How to run an MPI application +- How to access the parallel file system + +Sample MPI-IO C and Fortran programs are provided in Appendix A. Use them to +test your MPI compiler and parallel file system before building HDF5. + +### 1.2. Prerequisites for HPC Systems + +When building on HPC systems: + +1. **Create a working directory** accessible from compute nodes for running tests. + Use a scratch space or parallel file system - **NOT** your home directory, as + this typically causes test failures. + +2. **Load required modules:** + - Desired compiler modules (and set CC, FC, CXX if needed) + - CMake version 3.26 or greater + - MPI implementation module + +3. **For Cray and other systems with recommend compiler wrappers,** set compiler environment variables AFTER loading modules: + + ```bash + export CC=cc + export FC=ftn + export CXX=CC + ``` + +### 1.3. Further Help + +For installation help, post questions to the HDF Forum or HDF Support: + +- **HDF Forum:** +- **HDF Support:** + +Include the output of `uname -a` and the contents of `CMakeCache.txt` and +`CMakeError.log` from your build directory, and the loaded modules if applicable. + +--- + +## 2. Obtaining HDF5 Source + +Obtain HDF5 source code from the HDF5 repository or from a release tar file: + +```bash +git clone https://github.com/HDFGroup/hdf5.git [-b branch] [directory] +``` + +If no branch is specified, the `develop` branch will be checked out. +If no directory is specified, source will be in the `hdf5` directory. + +For release or snapshot tar files, extract them to your working directory. + +> **Note:** When using the ctest automated build method (Section 4), the source +> directory should be named `hdf5-`, where version uses format `1.xx.xx` +> or `2.xx.xx`. Use `bin/h5vers` to determine the version string if needed. + +--- + +## 3. Quick Start Instructions + +### 3.1. Using CMake Presets (Recommended for General Builds) + +For building with CMake 3.26 or greater using presets: + +```bash +cd hdf5 +cmake --workflow --preset ci-StdShar-GNUC --fresh +``` + +> **Note:** Standard presets do not enable parallel by default. To enable parallel +> support, you need to create a custom preset in `CMakeUserPresets.json` that sets +> `-DHDF5_ENABLE_PARALLEL=ON`, or use the standard CMake build approach below. + +### 3.2. Standard CMake Build (Recommended for Parallel) + +For a basic parallel build: + +```bash +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Release \ + -DHDF5_ENABLE_PARALLEL=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DBUILD_TESTING=ON \ + .. +cmake --build . --config Release +ctest . -C Release +cmake --install . --prefix /path/to/install +``` + +### 3.3. Specifying MPI Compiler + +CMake can usually find MPI automatically. To specify explicitly: + +```bash +cmake -DCMAKE_C_COMPILER=mpicc \ + -DCMAKE_Fortran_COMPILER=mpif90 \ + -DHDF5_ENABLE_PARALLEL=ON \ + .. +``` + +Or use MPI compiler wrappers: + +```bash +export CC=mpicc +export FC=mpif90 +cmake -DHDF5_ENABLE_PARALLEL=ON .. +``` + +### 3.4. Important Configuration Options + +| Option | Description | +|--------|-------------| +| `-DHDF5_ENABLE_PARALLEL=ON` | Enable parallel HDF5 (required) | +| `-DBUILD_SHARED_LIBS=ON` | Build shared libraries | +| `-DBUILD_STATIC_LIBS=ON` | Build static libraries | +| `-DHDF5_BUILD_FORTRAN=ON` | Build Fortran interface | +| `-DHDF5_BUILD_CPP_LIB=OFF` | C++ disabled in parallel builds | +| `-DHDF5_ENABLE_THREADSAFE=OFF` | Thread safety disabled in parallel builds | +| `-DHDF5_ENABLE_SUBFILING_VFD=ON` | Enable subfiling VFD (parallel I/O optimization) | +| `-DMPIEXEC_EXECUTABLE=mpiexec` | MPI launcher executable | +| `-DMPIEXEC_NUMPROC_FLAG=-n` | MPI flag for number of processes | +| `-DMPIEXEC_MAX_NUMPROCS=6` | Number of processes for tests | + +> **Note:** Some MPI implementations (e.g., OpenMPI 4.0+) disallow oversubscribing +> by default. Set `MPIEXEC_MAX_NUMPROCS` to available processors or fewer, or add +> appropriate flags (e.g., `--oversubscribe` for OpenMPI). + +--- + +## 4. Automated Builds with ctest (HPC Systems) + +The ctest command provides automated configure, build, test, and package +workflow for HPC systems with batch schedulers. + +### 4.1. Setup Steps + +1. Rename source directory to `hdf5-` (e.g., `hdf5-2.0.0`) + +2. Copy or link these CMake scripts to your working directory: + - `hdf5-/config/cmake/scripts/HDF5config.cmake` + - `hdf5-/config/cmake/scripts/CTestScript.cmake` + - `hdf5-/config/cmake/scripts/HDF5options.cmake` + +3. Your working directory should contain: + ``` + CTestScript.cmake + HDF5config.cmake + HDF5options.cmake + hdf5-/ + ``` + +### 4.2. Running ctest + +Basic ctest command: + +```bash +ctest -S HDF5config.cmake,BUILD_GENERATOR=Unix -C Release -V -O hdf5.log +``` + +For parallel builds on HPC systems with batch schedulers: + +```bash +ctest -S HDF5config.cmake,HPC=sbatch,MPI=true -C Release -V -O hdf5.log +``` + +#### Available HPC Options + +Add after `HDF5config.cmake,` separated by commas: + +| Option | Description | +|--------|-------------| +| `HPC=sbatch` | Use SLURM batch system | +| `HPC=bsub` | Use LSF batch system | +| `MPI=true` | Enable parallel (disables C++, Java, threadsafe) | +| `LOCAL_BATCH_SCRIPT_ARGS="--account="` | Supply batch job account information | + +#### Examples + +**SLURM system with parallel:** + +```bash +ctest -S HDF5config.cmake,HPC=sbatch,MPI=true \ + -C Release -V -O hdf5.log +``` + +Use `-VV` instead of `-V` for more detailed logging. + +--- + +## 5. Manual CMake Configuration + +If the automated ctest approach is not suitable, you can manually configure +and build HDF5. + +### 5.1. Create Build Directory + +```bash +mkdir build && cd build +``` + +### 5.2. Run CMake Configure + +Example for parallel build with Fortran on HPC system: + +```bash +cmake \ + -C ../hdf5-/config/cmake/cacheinit.cmake \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DCMAKE_INSTALL_PREFIX:PATH=/install/path \ + -DHDF5_ENABLE_PARALLEL:BOOL=ON \ + -DHDF5_BUILD_FORTRAN:BOOL=ON \ + -DHDF5_BUILD_CPP_LIB:BOOL=OFF \ + -DHDF5_BUILD_JAVA:BOOL=OFF \ + -DHDF5_ENABLE_THREADSAFE:BOOL=OFF \ + -DHDF5_ENABLE_ZLIB_SUPPORT:BOOL=OFF \ + -DHDF5_ENABLE_SZIP_SUPPORT:BOOL=OFF \ + -DMPIEXEC_EXECUTABLE:STRING=srun \ + -DMPIEXEC_NUMPROC_FLAG:STRING=-n \ + -DMPIEXEC_MAX_NUMPROCS:STRING=6 \ + -G"Unix Makefiles" \ + ../hdf5- +``` + +### 5.3. Build + +```bash +cmake --build . --config Release -j 8 +``` + +### 5.4. Test + +For systems where you can run MPI directly: + +```bash +ctest . -C Release +``` + +For batch systems, create and submit batch scripts (see section 7.3). + +### 5.5. Install + +```bash +cmake --install . --prefix /install/path +``` + +--- + +## 6. Cross-Compiling for HPC Hardware + +### 6.1. Overview + +Cross-compiling is the process of building software on one system architecture (like a login node) to be run on a different architecture (like a compute node). This section provides a historical overview of how this was done on systems that are no longer in service. + +### 6.2. Historical Example: Knights Landing (KNL) on Cray XC40 + +A common historical use case was compiling for Intel Knights Landing (KNL) nodes on Cray XC40 systems, such as the retired Mutrino and Cori machines. These supercomputers had login nodes with a standard CPU architecture (e.g., Haswell) but used the different KNL architecture for their compute nodes. + +To build software for KNL, a "module swapping" technique was required. The build process involved: +1. Loading the compiler module for the login node architecture (e.g., `craype-haswell`) to configure the project. + +2. Switching to the compiler module for the compute node architecture (e.g., `craype-mic-knl`) before starting the actual compilation. + +This process was managed by special CMake toolchain files and custom batch scripts, which were often automated within the `ctest` framework. + +### 6.3. Cross-Compilation on Current Systems + +The specific hardware (Cray XC40, KNL) and the build procedures described above are historical and no longer in use. + +While cross-compilation is less common on many modern, homogeneous HPC clusters, it is still a necessary technique for advanced architectures, such as systems with different processor types or accelerators (e.g., GPUs). + +**If you need assistance with cross-compiling for a current HPC system, please contact the facility administrators or The HDF Group (Section 1.3).** + +--- + +## 7. Running Parallel Tests + +### 7.1. Test Directory Location + +The parallel test suite (`testpar/`) contains tests for Parallel HDF5 and MPI-IO. + +By default, tests use the current directory for test files. To specify a +different location (e.g., parallel file system): + +```bash +export HDF5_PARAPREFIX=/scratch/username/hdf5test +ctest . -C Release +``` + +> **Important:** This is critical for performance - avoid using NFS or home +> directories for parallel testing. + +### 7.2. Important Test Notes + +#### t_mpi + +Tests basic MPI-IO features used by Parallel HDF5. Returns non-zero +exit code if required features fail. + +**Exception:** Testing files >2GB will print informational messages but not fail, +as HDF5 can use alternative file drivers (e.g., family driver) to handle size limits. + +#### t_cache + +Performs many small I/O requests. May run slowly on NFS or other +non-parallel file systems. Set `HDF5_PARAPREFIX` to a parallel file system if +this test is slow. + +#### Test Express Level + +Controls test thoroughness: + +```bash +export HDF5_TEST_EXPRESS=3 # Quick tests (default) +export HDF5_TEST_EXPRESS=0 # Exhaustive tests +``` + +#### Test Timeout + +Default is 1200 seconds (20 minutes). Modify in CMake with: + +```bash +-DDART_TESTING_TIMEOUT=3600 +``` + +### 7.3. Running Tests on Batch Systems + +**For SLURM systems:** + +```bash +sbatch -C quad,cache build/bin/batch/ctestS.sl # Serial tests +sbatch -C quad,cache build/bin/batch/ctestP.sl # Parallel tests +``` + +**For LSF systems:** + +```bash +bsub build/bin/batch/ctestS.lsf # Serial tests +bsub build/bin/batch/ctestP.lsf # Parallel tests +``` + +Batch scripts are generated during CMake configuration in the build directory. + +### 7.4. Running Specific Test Categories + +To run specific test suites: + +```bash +ctest -R "H5TEST" # Core library tests +ctest -R "MPI_TEST" # Parallel/MPI tests +ctest -R "CPP|FORTRAN" # C++ and Fortran tests +ctest -E "MPI_TEST" # Exclude parallel tests +ctest --parallel 4 # Run 4 tests in parallel +``` + +--- + +## 8. Known Platform Notes + +### 8.1. Linux Systems + +For MPICH on Linux, ensure >2GB file support by configuring MPICH with: + +```bash +-cflags="-D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64" +``` + +This is available on Linux kernels 2.4 and greater. + +### 8.2. Cray Systems + +- Use `CC=cc`, `FC=ftn`, `CXX=CC` after loading compiler modules +- Unload `craype-hugepages2M` if loaded (**Note**: This is situational advice and is not a universal rule, but it may be a valid troubleshooting step if you encounter memory-related performance issues or allocation errors.) +- Disable shared libraries if encountering linking issues: + ```bash + -DBUILD_SHARED_LIBS=OFF + ``` + +### 8.3. OpenMPI + +OpenMPI 4.0+ disallows oversubscribing by default. Either: + +- Set `MPIEXEC_MAX_NUMPROCS` to actual processor count, or +- Add `--oversubscribe` flag: `-DMPIEXEC_PREFLAGS=--oversubscribe` + +--- + +## References + +**POSIX Compliant:** After a write() to a regular file has successfully +returned, any successful read() from each byte position modified by that +write() will return the data that was written. A subsequent write() to the +same byte will overwrite the file data. If a read() can be proven by any +means [e.g., MPI_Barrier()] to occur after a write() of that data, it must +reflect that write(), even if calls are made by different processes. + +> Lewin, D. (1994). "POSIX Programmer's Guide (pg. 513-4)". O'Reilly & +> Associates. + +--- + +## Appendix A. Sample MPI-IO Programs + +Here are sample MPI-IO C and Fortran programs to test your MPI compiler and +parallel file system before building HDF5. The programs assume they run in +a parallel file system (create test files in current directory). For more +examples, please refer to the following directories: +HDF5Examples/C/H5PAR and HDF5Examples/FORTRAN/H5PAR + +### Example Compiling and Running + +```bash +mpicc Sample_mpio.c -o c.out +mpiexec -np 4 c.out + +mpif90 Sample_mpio.f90 -o f.out +mpiexec -np 4 f.out +``` + +### Sample_mpio.c + +```c +/* Simple MPI-IO program testing if a parallel file can be created. + * Default filename can be specified via first program argument. + * Each process writes something, then reads all data back. + */ + +#include +#include +#include +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +# include +#endif + +#define DIMSIZE 10 /* dimension size, avoid powers of 2. */ +#define PRINTID printf("Proc %d: ", mpi_rank) + +int main(int ac, char **av) +{ + char hostname[128]; + int mpi_size, mpi_rank; + MPI_File fh; + char *filename = "./mpitest.data"; + char mpi_err_str[MPI_MAX_ERROR_STRING]; + int mpi_err_strlen; + int mpi_err; + char writedata[DIMSIZE], readdata[DIMSIZE]; + char expect_val; + int i, irank; + int nerrors = 0; /* number of errors */ + MPI_Offset mpi_off; + MPI_Status mpi_stat; + + MPI_Init(&ac, &av); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + /* get file name if provided */ + if (ac > 1){ + filename = *++av; + } + if (mpi_rank==0){ + printf("Testing simple MPIO program with %d processes accessing file %s\n", + mpi_size, filename); + printf(" (Filename can be specified via program argument)\n"); + } + + /* show the hostname so that we can tell where the processes are running */ + if (gethostname(hostname, 128) < 0){ + PRINTID; + printf("gethostname failed\n"); + return 1; + } + PRINTID; + printf("hostname=%s\n", hostname); + + if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename, + MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, + MPI_INFO_NULL, &fh)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_open failed (%s)\n", mpi_err_str); + return 1; + } + + /* each process writes some data */ + for (i=0; i < DIMSIZE; i++) + writedata[i] = mpi_rank*DIMSIZE + i; + mpi_off = mpi_rank*DIMSIZE; + if ((mpi_err = MPI_File_write_at(fh, mpi_off, writedata, DIMSIZE, MPI_BYTE, + &mpi_stat)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n", + (long) mpi_off, (int) DIMSIZE, mpi_err_str); + return 1; + }; + + /* make sure all processes has done writing. */ + MPI_Barrier(MPI_COMM_WORLD); + + /* each process reads all data and verify. */ + for (irank=0; irank < mpi_size; irank++){ + mpi_off = irank*DIMSIZE; + if ((mpi_err = MPI_File_read_at(fh, mpi_off, readdata, DIMSIZE, MPI_BYTE, + &mpi_stat)) + != MPI_SUCCESS){ + MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen); + PRINTID; + printf("MPI_File_read_at offset(%ld), bytes (%d), failed (%s)\n", + (long) mpi_off, (int) DIMSIZE, mpi_err_str); + return 1; + }; + for (i=0; i < DIMSIZE; i++){ + expect_val = irank*DIMSIZE + i; + if (readdata[i] != expect_val){ + PRINTID; + printf("read data[%d:%d] got %d, expect %d\n", irank, i, + readdata[i], expect_val); + nerrors++; + } + } + } + if (nerrors) + return 1; + + MPI_File_close(&fh); + + PRINTID; + printf("all tests passed\n"); + + MPI_Finalize(); + return 0; +} +``` + +### Sample_mpio.f90 + +```fortran +! +! The following example demonstrates how to create and close a parallel +! file using MPI-IO calls. +! +! USE MPI is the proper way to bring in MPI definitions but many +! MPI Fortran compiler supports the pseudo standard of INCLUDE. +! So, HDF5 uses the INCLUDE statement instead. +! + + PROGRAM MPIOEXAMPLE + + USE mpi + + IMPLICIT NONE + + CHARACTER(LEN=80), PARAMETER :: filename = "filef.h5" ! File name + INTEGER :: ierror ! Error flag + INTEGER :: fh ! File handle + INTEGER :: amode ! File access mode + + call MPI_INIT(ierror) + amode = MPI_MODE_RDWR + MPI_MODE_CREATE + MPI_MODE_DELETE_ON_CLOSE + call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, fh, ierror) + print *, "Trying to create ", filename + if ( ierror .eq. MPI_SUCCESS ) then + print *, "MPI_FILE_OPEN succeeded" + call MPI_FILE_CLOSE(fh, ierror) + else + print *, "MPI_FILE_OPEN failed" + endif + + call MPI_FINALIZE(ierror); + END PROGRAM +``` diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index afc392cbeda..bbdf92ab7f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1226,12 +1226,6 @@ if (LOCAL_BATCH_TEST) ${HDF5_SOURCE_DIR}/bin/batch/ctest_parallel.cmake.in ${HDF5_BINARY_DIR}/ctest_parallel.cmake ESCAPE_QUOTES @ONLY ) - if (LOCAL_BATCH_SCRIPT_COMMAND STREQUAL "raybsub") - configure_file ( - ${HDF5_SOURCE_DIR}/bin/batch/${LOCAL_BATCH_SCRIPT_COMMAND} - ${HDF5_BINARY_DIR}/${LOCAL_BATCH_SCRIPT_COMMAND} ESCAPE_QUOTES @ONLY - ) - endif () if (LOCAL_BATCH_SCRIPT_NAME) configure_file ( ${HDF5_SOURCE_DIR}/bin/batch/${LOCAL_BATCH_SCRIPT_NAME}.in.cmake