diff --git a/Compiling.md b/Compiling.md index 648fea548..74932a1f4 100644 --- a/Compiling.md +++ b/Compiling.md @@ -151,7 +151,3 @@ As also mentioned in the instructions below but repeated here for visibility, if * Pre-trained neural nets are available at [the main training website](https://katagotraining.org/). * You will probably want to edit `configs/gtp_example.cfg` (see "Tuning for Performance" above). * If using OpenCL, you will want to verify that KataGo is picking up the correct device when you run it (e.g. some systems may have both an Intel CPU OpenCL and GPU OpenCL, if KataGo appears to pick the wrong one, you can correct this by specifying `openclGpuToUse` in `configs/gtp_example.cfg`). - * If you want to run `synchronous_loop.sh` on macOS, do the following steps: - * Install GNU coreutils `brew install coreutils` to support a `head` tool that can take negative numbers (`head -n -5` in `train.sh`) - * Install GNU findutils `brew install findutils` to support a `find` tool that supports `-printf` option, that's used by `export_model_for_selfplay.sh`. After that, fix `find` with `gfind` in the script. - Note: you can try to avoid fixing `export_model_for_selfplay.sh` by adjusting `PATH` with the installed findutils: `export PATH="/opt/homebrew/opt/findutils/libexec/gnubin:$PATH"` or by using the alias `alias find="gfind"`. However, it works not always. \ No newline at end of file diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 254d23233..8db79ca73 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -108,6 +108,11 @@ elseif(USE_BACKEND STREQUAL "METAL") message(FATAL_ERROR "Project requires building with AppleClang. Have ${CMAKE_CXX_COMPILER_ID}") endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/external/macos/cmake/modules") + + if (NOT CMAKE_OSX_SYSROOT) + execute_process(COMMAND xcrun --show-sdk-path OUTPUT_VARIABLE CMAKE_OSX_SYSROOT OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + include(InitializeSwift) include(AddSwift) set(CMAKE_OSX_DEPLOYMENT_TARGET 13.0) @@ -341,7 +346,6 @@ elseif(USE_BACKEND STREQUAL "TENSORRT") if((NOT TENSORRT_INCLUDE_DIR)) message(FATAL_ERROR "${ColorBoldRed} NvInfer.h was NOT found, specify TENSORRT_INCLUDE_DIR to indicate where it is. ${ColorReset}") endif() - find_library(TENSORRT_LIBRARY nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib) # Hackily extract out the version from the TensorRT header # In each case, try the old format and on failure try the new format. @@ -388,6 +392,8 @@ elseif(USE_BACKEND STREQUAL "TENSORRT") message(FATAL_ERROR "Could not determine TensorRT version from header file") endif() + find_library(TENSORRT_LIBRARY NAMES nvinfer nvinfer_${TENSORRT_VERSION_MAJOR} HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib) + # Version 8 is required for serializing the builder timing cache. # Version 8.2 is required for eliminating the global logger for Builder and Runtime. # Version 8.5 is required for eliminating many deprecated APIs and adopting new features. diff --git a/python/selfplay/distributed/download_and_upload_and_shuffle_and_export_loop.sh b/python/selfplay/distributed/download_and_upload_and_shuffle_and_export_loop.sh index 76e531e66..4ca932c4d 100755 --- a/python/selfplay/distributed/download_and_upload_and_shuffle_and_export_loop.sh +++ b/python/selfplay/distributed/download_and_upload_and_shuffle_and_export_loop.sh @@ -39,6 +39,12 @@ shift #and using gating disables the export script from making extraneous selfplay data dirs. USEGATING=1 +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + GITROOTDIR="$(git rev-parse --show-toplevel)" basedir="$(realpath "$BASEDIRRAW")" @@ -75,7 +81,7 @@ cp -r "$GITROOTDIR"/python/selfplay "$DATED_ARCHIVE" while true do echo "BEGINNING SUMMARIZE------------------------------" - time python3 ./summarize_old_selfplay_files.py "$basedir"/selfplay/ \ + time $PYTHON ./summarize_old_selfplay_files.py "$basedir"/selfplay/ \ -old-summary-file-to-assume-correct "$basedir"/selfplay.summary.json \ -new-summary-file "$basedir"/selfplay.summary.json.tmp mv "$basedir"/selfplay.summary.json.tmp "$basedir"/selfplay.summary.json diff --git a/python/selfplay/distributed/upload_model_for_selfplay.sh b/python/selfplay/distributed/upload_model_for_selfplay.sh index c833fb2d6..5df383387 100755 --- a/python/selfplay/distributed/upload_model_for_selfplay.sh +++ b/python/selfplay/distributed/upload_model_for_selfplay.sh @@ -25,6 +25,12 @@ shift #------------------------------------------------------------------------------ +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + mkdir -p "$BASEDIR"/modelstobetested mkdir -p "$BASEDIR"/modelsuploaded @@ -33,9 +39,15 @@ function uploadStuff() { TODIR="$2" #Sort by timestamp so that we process in order of oldest to newest if there are multiple - for FILEPATH in $(find "$BASEDIR"/"$FROMDIR"/ -mindepth 1 -maxdepth 1 -printf "%T@ %p\n" | sort -n | cut -d ' ' -f 2) + # Use python here to avoid 'find -printf' which is not portable to macOS + # Use sys.argv to safely pass directory name with spaces/quotes + $PYTHON -c "import os, sys; d=sys.argv[1]; print('\n'.join(sorted([os.path.join(d, f) for f in os.listdir(d)], key=lambda x: os.path.getmtime(x))))" "$BASEDIR/$FROMDIR" 2>/dev/null | while read -r FILEPATH do - if [ ${FILEPATH: -10} == ".uploading" ] + if [ -z "$FILEPATH" ] + then + continue + fi + if [ "${FILEPATH: -10}" == ".uploading" ] then echo "Skipping upload tmp file:" "$FILEPATH" else @@ -49,23 +61,23 @@ function uploadStuff() { if [ -d "$BASEDIR"/modelsuploaded/"$NAME" ] then - echo "Model with same name aleady exists, so skipping:" "$SRC" + echo "Model with same name already exists, so skipping:" "$SRC" else rm -rf "$TMPDST" mkdir "$TMPDST" - TOBEZIPPED="$TMPDST"/"$RUNNAME"-"$NAME" + TOBEZIPPED="$TMPDST/$RUNNAME-$NAME" mkdir "$TOBEZIPPED" # Build zip containing the ckpt - cp "$SRC"/model.ckpt "$TOBEZIPPED"/model.ckpt - (cd "$TMPDST"; zip -r "$RUNNAME"-"$NAME".zip "$RUNNAME"-"$NAME"/) + cp "$SRC/model.ckpt" "$TOBEZIPPED/model.ckpt" + (cd "$TMPDST"; zip -r "$RUNNAME-$NAME.zip" "$RUNNAME-$NAME/") rm "$TOBEZIPPED"/* rmdir "$TOBEZIPPED" - cp "$SRC"/model.bin.gz "$TMPDST"/"$RUNNAME"-"$NAME".bin.gz - cp "$SRC"/metadata.json "$TMPDST"/metadata.json - cp "$SRC"/log.txt "$TMPDST"/log.txt + cp "$SRC/model.bin.gz" "$TMPDST/$RUNNAME-$NAME.bin.gz" + cp "$SRC/metadata.json" "$TMPDST/metadata.json" + cp "$SRC/log.txt" "$TMPDST/log.txt" #Sleep a little to allow some tolerance on the filesystem sleep 3 @@ -76,13 +88,13 @@ function uploadStuff() { do set +e set -x - python3 ./upload_model.py \ + $PYTHON ./upload_model.py \ -run-name "$RUNNAME" \ - -model-name "$RUNNAME"-"$NAME" \ - -model-file "$TMPDST"/"$RUNNAME"-"$NAME".bin.gz \ - -model-zip "$TMPDST"/"$RUNNAME"-"$NAME".zip \ - -upload-log-file "$TMPDST"/upload_log.txt \ - -metadata-file "$TMPDST"/metadata.json \ + -model-name "$RUNNAME-$NAME" \ + -model-file "$TMPDST/$RUNNAME-$NAME.bin.gz" \ + -model-zip "$TMPDST/$RUNNAME-$NAME.zip" \ + -upload-log-file "$TMPDST/upload_log.txt" \ + -metadata-file "$TMPDST/metadata.json" \ -parents-dir "$TARGETDIR" \ -connection-config "$CONNECTION_CONFIG" \ -rating-only "$RATING_ONLY" diff --git a/python/selfplay/export_model_for_selfplay.sh b/python/selfplay/export_model_for_selfplay.sh index eac659be7..303c48ad8 100755 --- a/python/selfplay/export_model_for_selfplay.sh +++ b/python/selfplay/export_model_for_selfplay.sh @@ -23,6 +23,12 @@ shift #------------------------------------------------------------------------------ +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + mkdir -p "$BASEDIR"/torchmodels_toexport mkdir -p "$BASEDIR"/torchmodels_toexport_extra mkdir -p "$BASEDIR"/modelstobetested @@ -33,24 +39,30 @@ function exportStuff() { FROMDIR="$1" TODIR="$2" - #Sort by timestamp so that we process in order of oldest to newest if there are multiple - for FILEPATH in $(find "$BASEDIR"/"$FROMDIR"/ -mindepth 1 -maxdepth 1 -printf "%T@ %p\n" | sort -n | cut -d ' ' -f 2) + # Sort by timestamp so that we process in order of oldest to newest if there are multiple + # Use python here to avoid 'find -printf' which is not portable to macOS + # Use sys.argv to safely pass directory name with spaces/quotes + $PYTHON -c "import os, sys; d=sys.argv[1]; print('\n'.join(sorted([os.path.join(d, f) for f in os.listdir(d)], key=lambda x: os.path.getmtime(x))))" "$BASEDIR/$FROMDIR" 2>/dev/null | while read -r FILEPATH do #Make sure to skip tmp directories that are transiently there by the training, #they are probably in the process of being written - if [ ${FILEPATH: -4} == ".tmp" ] + if [ -z "$FILEPATH" ] + then + continue + fi + if [ "${FILEPATH: -4}" == ".tmp" ] then echo "Skipping tmp file:" "$FILEPATH" - elif [ ${FILEPATH: -9} == ".exported" ] + elif [ "${FILEPATH: -9}" == ".exported" ] then echo "Skipping self tmp file:" "$FILEPATH" else echo "Found model to export:" "$FILEPATH" NAME="$(basename "$FILEPATH")" - SRC="$BASEDIR"/"$FROMDIR"/"$NAME" - TMPDST="$BASEDIR"/"$FROMDIR"/"$NAME".exported - TARGET="$BASEDIR"/"$TODIR"/"$NAME" + SRC="$BASEDIR/$FROMDIR/$NAME" + TMPDST="$BASEDIR/$FROMDIR/$NAME.exported" + TARGET="$BASEDIR/$TODIR/$NAME" if [ -d "$BASEDIR"/modelstobetested/"$NAME" ] || \ [ -d "$BASEDIR"/rejectedmodels/"$NAME" ] || \ @@ -58,22 +70,22 @@ function exportStuff() { [ -d "$BASEDIR"/models_extra/"$NAME" ] || \ [ -d "$BASEDIR"/modelsuploaded/"$NAME" ] then - echo "Model with same name aleady exists, so skipping:" "$SRC" + echo "Model with same name already exists, so skipping:" "$SRC" else rm -rf "$TMPDST" mkdir "$TMPDST" set -x - python3 ./export_model_pytorch.py \ - -checkpoint "$SRC"/model.ckpt \ + $PYTHON ./export_model_pytorch.py \ + -checkpoint "$SRC/model.ckpt" \ -export-dir "$TMPDST" \ - -model-name "$NAMEPREFIX""-""$NAME" \ + -model-name "$NAMEPREFIX-$NAME" \ -filename-prefix model \ -use-swa - python3 ./clean_checkpoint.py \ - -checkpoint "$SRC"/model.ckpt \ - -output "$TMPDST"/model.ckpt + $PYTHON ./clean_checkpoint.py \ + -checkpoint "$SRC/model.ckpt" \ + -output "$TMPDST/model.ckpt" set +x rm -r "$SRC" @@ -87,9 +99,8 @@ function exportStuff() { then if [ "$TODIR" != "models_extra" ] then - mkdir -p "$BASEDIR"/selfplay/"$NAME" - mkdir -p "$BASEDIR"/selfplay/"$NAME"/sgfs - mkdir -p "$BASEDIR"/selfplay/"$NAME"/tdata + mkdir -p "$BASEDIR/selfplay/$NAME/sgfs" + mkdir -p "$BASEDIR/selfplay/$NAME/tdata" fi fi diff --git a/python/selfplay/shuffle.sh b/python/selfplay/shuffle.sh index 467425380..b7864d487 100755 --- a/python/selfplay/shuffle.sh +++ b/python/selfplay/shuffle.sh @@ -25,6 +25,12 @@ shift #------------------------------------------------------------------------------ +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + OUTDIR=$(date "+%Y%m%d-%H%M%S") mkdir -p "$BASEDIR"/shuffleddata/"$OUTDIR".tmp @@ -36,7 +42,7 @@ echo "Beginning shuffle at" $(date "+%Y-%m-%d %H:%M:%S") if [[ -n "${SKIP_VALIDATE:-}" ]] then ( - time python3 ./shuffle.py \ + time $PYTHON ./shuffle.py \ "$BASEDIR"/selfplay/ \ -expand-window-per-row 0.4 \ -taper-window-exponent 0.65 \ @@ -56,7 +62,7 @@ then else # Randomly peels off 5% of files generated by selfplay as validation data ( - time python3 ./shuffle.py \ + time $PYTHON ./shuffle.py \ "$BASEDIR"/selfplay/ \ -expand-window-per-row 0.4 \ -taper-window-exponent 0.65 \ @@ -74,7 +80,7 @@ else wait ) ( - time python3 ./shuffle.py \ + time $PYTHON ./shuffle.py \ "$BASEDIR"/selfplay/ \ -expand-window-per-row 0.4 \ -taper-window-exponent 0.65 \ @@ -110,7 +116,7 @@ mv "$BASEDIR"/shuffleddata/"$OUTDIR".tmp "$BASEDIR"/shuffleddata/"$OUTDIR" #This should be VERY conservative and allow plenty of time for the training to switch #to newer ones as they get generated. echo "Cleaning up any old dirs" -find "$BASEDIR"/shuffleddata/ -mindepth 1 -maxdepth 1 -type d -mmin +120 | sort | head -n -5 | xargs --no-run-if-empty rm -r +find "$BASEDIR"/shuffleddata/ -mindepth 1 -maxdepth 1 -type d -mmin +120 -print0 | sort -z | head -z -n -5 | xargs -0 --no-run-if-empty rm -r echo "Finished shuffle at" $(date "+%Y-%m-%d %H:%M:%S") #Make a little space between shuffles diff --git a/python/selfplay/shuffle_loop.sh b/python/selfplay/shuffle_loop.sh index fd52e8455..18a705b22 100755 --- a/python/selfplay/shuffle_loop.sh +++ b/python/selfplay/shuffle_loop.sh @@ -20,6 +20,12 @@ shift BATCHSIZE="$1" shift +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + GITROOTDIR="$(git rev-parse --show-toplevel)" basedir="$(realpath "$BASEDIRRAW")" @@ -44,7 +50,7 @@ cp -r "$GITROOTDIR"/python/selfplay "$DATED_ARCHIVE" while true do rm -f "$basedir"/selfplay.summary.json.tmp - time python3 ./summarize_old_selfplay_files.py "$basedir"/selfplay/ \ + time $PYTHON ./summarize_old_selfplay_files.py "$basedir"/selfplay/ \ -old-summary-file-to-assume-correct "$basedir"/selfplay.summary.json \ -new-summary-file "$basedir"/selfplay.summary.json.tmp mv "$basedir"/selfplay.summary.json.tmp "$basedir"/selfplay.summary.json diff --git a/python/selfplay/train.sh b/python/selfplay/train.sh index 40bf72279..0fa3c6a17 100755 --- a/python/selfplay/train.sh +++ b/python/selfplay/train.sh @@ -29,6 +29,13 @@ EXPORTMODE="$1" shift #------------------------------------------------------------------------------ + +if command -v python3 >/dev/null 2>&1; then + PYTHON=python3 +else + PYTHON=python +fi + set -x mkdir -p "$BASEDIR"/train/"$TRAININGNAME" @@ -72,7 +79,7 @@ else exit 1 fi -time python3 ./train.py \ +time $PYTHON ./train.py \ -traindir "$BASEDIR"/train/"$TRAININGNAME" \ -latestdatadir "$BASEDIR"/shuffleddata/ \ -exportdir "$BASEDIR"/"$EXPORT_SUBDIR" \