diff --git a/commands/run.sh b/commands/run.sh index a0aae523..4edd9cbe 100755 --- a/commands/run.sh +++ b/commands/run.sh @@ -1,6 +1,8 @@ #!/bin/bash set -ueo pipefail +. "$DIR/../lib/log.bash" + # Run takes a service name, pulls down any pre-built image for that name # and then runs docker-compose run a generated project name @@ -423,35 +425,30 @@ elif [[ ${#command[@]} -gt 0 ]] ; then fi ensure_stopped() { - echo '+++ :warning: Signal received, stopping container' - docker stop "${container_name}" || true + log 143 + echo '+++ :warning: Signal received, stopping container gracefully' + # docker stop "${container_name}" || true + compose_cleanup ${run_service} echo '~~~ Last log lines that may be missing above (if container was not already removed)' docker logs "${container_name}" || true - exitcode='TRAP' + exit 143 } -trap ensure_stopped SIGINT SIGTERM SIGQUIT +trap 'ensure_stopped "$?"' SIGINT SIGTERM SIGQUIT if [[ "${BUILDKITE_PLUGIN_DOCKER_COMPOSE_COLLAPSE_RUN_LOG_GROUP:-false}" = "true" ]]; then group_type="---" else group_type="+++" fi -# Disable -e to prevent cancelling step if the command fails for whatever reason -set +e -( # subshell is necessary to trap signals (compose v2 fails to stop otherwise) + +exitcode=0 +( echo "${group_type} :docker: Running ${display_command[*]:-} in service $run_service" >&2 run_docker_compose "${run_params[@]}" -) -exitcode=$? - -# Restore -e as an option. -set -e +) || exitcode=$? -if [[ $exitcode = "TRAP" ]]; then - # command failed due to cancellation signal, make sure there is an error but no further output - exitcode=-1 -elif [[ $exitcode -ne 0 ]] ; then +if [[ $exitcode -ne 0 ]] ; then echo "^^^ +++" echo "+++ :warning: Failed to run command, exited with $exitcode, run params:" echo "${run_params[@]}" @@ -465,4 +462,4 @@ if [[ -n "${BUILDKITE_AGENT_ACCESS_TOKEN:-}" ]] ; then fi fi -return "$exitcode" +return "$exitcode" \ No newline at end of file diff --git a/hooks/pre-exit b/hooks/pre-exit index f3bd61d4..8f1b4d82 100755 --- a/hooks/pre-exit +++ b/hooks/pre-exit @@ -15,5 +15,5 @@ if [[ -n "$(plugin_read_list RUN)" ]] && [[ "$(plugin_read_config CLEANUP "true" . "$DIR/../lib/run.bash" echo "~~~ :docker: Cleaning up after docker-compose" >&2 - compose_cleanup + compose_cleanup "" fi diff --git a/lib/log.bash b/lib/log.bash new file mode 100644 index 00000000..d66e4980 --- /dev/null +++ b/lib/log.bash @@ -0,0 +1,40 @@ +#!/bin/bash + +log() { + msg="SIG $1 received, process exiting" + echo "${msg}" + buildkite-agent meta-data set "dd_tags.job-signal-${BUILDKITE_STEP_KEY}" "$1" + buildkite-agent meta-data set "dd_tags.step-error-code-${BUILDKITE_STEP_KEY}" "$1" + buildkite-agent meta-data set "dd_tags.job-error-code-${BUILDKITE_JOB_ID}" "$1" + + echo "$(pidof buildkite-agent) is the pid of the buildkite agent" || true + + send_job_signaled_to_dd "${msg}" "${1}" +} + +send_job_signaled_to_dd() { + send_event_to_dd '{ "title": "Job '"${BUILDKITE_STEP_KEY}"' received signal", "text": "'"${1}"'", "alert_type": "error", "tags": [ "ci:job_signal", "exit_status:'"${2}"'", "job_name:'"${BUILDKITE_STEP_KEY}"'", "build_id:'"${BUILDKITE_BUILD_ID}"'", "branch:'"${BUILDKITE_BRANCH}"'", "hs_source:docker_compose_plugin", "env:ci" ] }' +} + +send_event_to_dd() { + if command -v curl >/dev/null 2>&1; then + echo "Using curl to send event to Datadog" + curl -X POST "https://api.datadoghq.com/api/v1/events" \ + -H "Accept: application/json" \ + -H "Content-Type: application/json" \ + -H "DD-API-KEY: ${DD_API_KEY}" \ + -d "$1" + elif command -v wget >/dev/null 2>&1; then + echo "Using wget to send event to Datadog" + wget \ + --header="Accept: application/json" \ + --header="Content-Type: application/json" \ + --header="DD-API-KEY: ${DD_API_KEY}" \ + --post-data="$1" \ + --output-document - \ + https://api.datadoghq.com/api/v1/events + else + echo "No suitable network tool found to send event to Datadog" + exit 1 + fi +} diff --git a/lib/run.bash b/lib/run.bash index f531292a..522ef069 100644 --- a/lib/run.bash +++ b/lib/run.bash @@ -1,14 +1,15 @@ #!/bin/bash -compose_cleanup() { - if [[ "$(plugin_read_config GRACEFUL_SHUTDOWN 'false')" == "false" ]]; then - # Send all containers a SIGKILL - run_docker_compose kill || true - else - # Send all containers a friendly SIGTERM, followed by a SIGKILL after exceeding the stop_grace_period - run_docker_compose stop || true +kill_or_wait_for_stop() { + + if [[ "$(plugin_read_config GRACEFUL_SHUTDOWN 'false')" == "true" ]]; then + # This will block until the container exits + run_docker_compose wait "$1" + container_exit_code=$? + echo "exit code was $container_exit_code" fi + # This will kill the container if it hasn't exited yet # `compose down` doesn't support force removing images if [[ "$(plugin_read_config LEAVE_VOLUMES 'false')" == "false" ]]; then run_docker_compose rm --force -v || true @@ -24,6 +25,18 @@ compose_cleanup() { fi } +compose_cleanup() { + kill_or_wait_for_stop "$1" & + sleep 1 + + # No need to call kill directly for GRACEFUL_SHUTDOWN == false since rm --force will send the same kill signal + if [[ "$(plugin_read_config GRACEFUL_SHUTDOWN 'false')" == "true" ]]; then + echo "graceful shutdown was true, stopping ${1}" + # Send all containers a friendly SIGTERM, followed by a SIGKILL after exceeding the stop_grace_period + run_docker_compose stop "$1" || true + fi +} + # Checks for failed containers and writes logs for them the the provided dir check_linked_containers_and_save_logs() { local service="$1" diff --git a/tests/cleanup.bats b/tests/cleanup.bats old mode 100644 new mode 100755 index adb69f18..1bec6d49 --- a/tests/cleanup.bats +++ b/tests/cleanup.bats @@ -17,13 +17,13 @@ load '../lib/run' export BUILDKITE_PLUGIN_DOCKER_COMPOSE_CLEANUP=true stub docker-compose \ - "-f docker-compose.yml -p buildkite1111 kill : echo killing containers" \ - "-f docker-compose.yml -p buildkite1111 rm --force -v : echo removing stopped containers" \ + "-f docker-compose.yml -p buildkite1111 rm --force -v : echo killing and removing stopped containers" \ "-f docker-compose.yml -p buildkite1111 down --remove-orphans --volumes : echo removing everything" run "$PWD"/hooks/pre-exit assert_success assert_output --partial "Cleaning up after docker-compose" + unstub docker-compose } diff --git a/tests/docker-compose-cleanup.bats b/tests/docker-compose-cleanup.bats index a8843434..7cd3ada6 100644 --- a/tests/docker-compose-cleanup.bats +++ b/tests/docker-compose-cleanup.bats @@ -15,19 +15,19 @@ setup () { run compose_cleanup assert_success - assert_equal "${lines[0]}" "kill" - assert_equal "${lines[1]}" "rm --force -v" - assert_equal "${lines[2]}" "down --remove-orphans --volumes" + assert_equal "${lines[0]}" "rm --force -v" + assert_equal "${lines[1]}" "down --remove-orphans --volumes" } @test "Possible to gracefully shutdown containers in docker-compose cleanup" { - export BUILDKITE_PLUGIN_DOCKER_COMPOSE_GRACEFUL_SHUTDOWN=1 + export BUILDKITE_PLUGIN_DOCKER_COMPOSE_GRACEFUL_SHUTDOWN="true" run compose_cleanup assert_success - assert_equal "${lines[0]}" "stop" - assert_equal "${lines[1]}" "rm --force -v" - assert_equal "${lines[2]}" "down --remove-orphans --volumes" + assert_output --partial "wait" + assert_equal "${lines[1]}" "exit code was 0" + assert_equal "${lines[2]}" "rm --force -v" + assert_equal "${lines[3]}" "down --remove-orphans --volumes" } @test "Possible to skip volume destruction in docker-compose cleanup" { @@ -35,7 +35,6 @@ setup () { run compose_cleanup assert_success - assert_equal "${lines[0]}" "kill" - assert_equal "${lines[1]}" "rm --force" - assert_equal "${lines[2]}" "down --remove-orphans" + assert_equal "${lines[0]}" "rm --force" + assert_equal "${lines[1]}" "down --remove-orphans" }