From 6b9d92915e187de3fb14dad7055aea080e8e03f8 Mon Sep 17 00:00:00 2001 From: Lachlan Donald Date: Mon, 26 Aug 2019 17:47:04 +1000 Subject: [PATCH 1/4] Allow minimum disk before cleanup to be customized --- packer/linux/conf/bin/bk-check-disk-space.sh | 64 ++++++++++++++----- .../conf/bin/bk-install-elastic-stack.sh | 6 ++ .../conf/buildkite-agent/hooks/environment | 9 ++- .../linux/conf/docker/cron.hourly/docker-gc | 12 +++- .../docker/cron.hourly/docker-low-disk-gc | 18 ++++-- templates/aws-stack.yml | 12 ++++ 6 files changed, 94 insertions(+), 27 deletions(-) diff --git a/packer/linux/conf/bin/bk-check-disk-space.sh b/packer/linux/conf/bin/bk-check-disk-space.sh index 3c25f11ef..52a98129f 100755 --- a/packer/linux/conf/bin/bk-check-disk-space.sh +++ b/packer/linux/conf/bin/bk-check-disk-space.sh @@ -1,25 +1,57 @@ #!/bin/bash set -euo pipefail -DISK_MIN_AVAILABLE=${DISK_MIN_AVAILABLE:-5242880} # 5GB -DISK_MIN_INODES=${DISK_MIN_INODES:-250000} # docker needs lots - -DOCKER_DIR="/var/lib/docker/" - -disk_avail=$(df -k --output=avail "$DOCKER_DIR" | tail -n1) - -echo "Disk space free: $(df -k -h --output=avail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')" - -if [[ $disk_avail -lt $DISK_MIN_AVAILABLE ]]; then - echo "Not enough disk space free, cutoff is ${DISK_MIN_AVAILABLE} 🚨" >&2 - exit 1 +# Usage: +# bk-check-disk-space.sh (min disk required) (min inodes required) +# min disk required can be either an amount of bytes, a pattern like 10G +# or 500M, or a percentage like 5% +# min inodes must be a number, default to 250,000 + +# Converts human-readable units like 1.43K and 120.3M to bytes +dehumanize() { + awk '/[0-9][bB]?$/ {printf "%u\n", $1*1024} + /[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)} + /[gG][bB]?$/ {printf "%u\n", $1*(1024*1024)} + /[mM][bB]?$/ {printf "%u\n", $1*(1024)} + /[kK][bB]?$/ {printf "%u\n", $1*1}' <<< "$1" +} + +min_available=${1:-5G} +docker_dir="/var/lib/docker/" + +# First check the disk available + +disk_avail=$(df -k --output=avail "$docker_dir" | tail -n1) +disk_avail_human=$(df -k -h --output=avail "$docker_dir" | tail -n1 | tr -d '[:space:]') +disk_used_pct=$(df -k --output=pcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%') +disk_free_pct=$((100-disk_used_pct)) + +printf "Disk space free: %s (%s%%)\\n" "$disk_avail_human" "$disk_free_pct" + +# Check if the min_available is a percentage +if [[ $min_available =~ \%$ ]] ; then + if [[ $(echo "${disk_free_pct}<${min_available}" | sed 's/%//g' | bc) -gt 0 ]] ; then + echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2 + exit 1 + fi +else + if [[ $disk_avail -lt $(dehumanize "$min_available") ]]; then + echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2 + exit 1 + fi fi -inodes_avail=$(df -k --output=iavail "$DOCKER_DIR" | tail -n1) +# Next check inodes, these can be exhausted by docker build operations + +inodes_min_available=${2:-250000} +inodes_avail=$(df -k --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]') +inodes_avail_human=$(df -k -h --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]') +inodes_used_pct=$(df -k --output=ipcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%') +inodes_free_pct=$((100-inodes_used_pct)) -echo "Inodes free: $(df -k -h --output=iavail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')" +printf "Inodes free: %s (%s%%)\\n" "$inodes_avail_human" "$inodes_free_pct" -if [[ $inodes_avail -lt $DISK_MIN_INODES ]]; then - echo "Not enough inodes free, cutoff is ${DISK_MIN_INODES} 🚨" >&2 +if [[ $inodes_avail -lt $inodes_min_available ]]; then + echo "Not enough inodes free, cutoff is ${inodes_min_available} 🚨" >&2 exit 1 fi diff --git a/packer/linux/conf/bin/bk-install-elastic-stack.sh b/packer/linux/conf/bin/bk-install-elastic-stack.sh index 4c55c0cbf..982f11151 100755 --- a/packer/linux/conf/bin/bk-install-elastic-stack.sh +++ b/packer/linux/conf/bin/bk-install-elastic-stack.sh @@ -62,6 +62,12 @@ export PLUGINS_ENABLED="${PLUGINS_ENABLED[*]-}" export BUILDKITE_ECR_POLICY=${BUILDKITE_ECR_POLICY:-none} EOF +# cron-env is sourced by crontab entries and low disk scripts +cat << EOF > /var/lib/buildkite-agent/cron-env +export DISK_MIN_AVAILABLE=$DISK_MIN_AVAILABLE +export DOCKER_PRUNE_UNTIL=$DOCKER_PRUNE_UNTIL +EOF + if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]] ; then echo "Downloading buildkite-agent edge..." curl -Lsf -o /usr/bin/buildkite-agent-edge \ diff --git a/packer/linux/conf/buildkite-agent/hooks/environment b/packer/linux/conf/buildkite-agent/hooks/environment index d5995e13f..48712e02e 100755 --- a/packer/linux/conf/buildkite-agent/hooks/environment +++ b/packer/linux/conf/buildkite-agent/hooks/environment @@ -7,6 +7,11 @@ source ~/cfn-env echo "~~~ :llama: Setting up elastic stack environment ($BUILDKITE_STACK_VERSION)" cat ~/cfn-env +if [[ -f ~/cron-env ]] ; then + # shellcheck source=/dev/null + source ~/cron-env +fi + echo "Checking docker" if ! docker ps ; then echo "^^^ +++" @@ -17,13 +22,13 @@ if ! docker ps ; then fi echo "Checking disk space" -if ! /usr/local/bin/bk-check-disk-space.sh ; then +if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}" ; then echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL:-4h}" docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL:-4h}" echo "Checking disk space again" - if ! /usr/local/bin/bk-check-disk-space.sh ; then + if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}"; then echo "Disk health checks failed" >&2 exit 1 fi diff --git a/packer/linux/conf/docker/cron.hourly/docker-gc b/packer/linux/conf/docker/cron.hourly/docker-gc index 1ab07e68f..71f85ae0a 100755 --- a/packer/linux/conf/docker/cron.hourly/docker-gc +++ b/packer/linux/conf/docker/cron.hourly/docker-gc @@ -5,10 +5,16 @@ if [[ $EUID -eq 0 ]]; then exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log fi -DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-4h} +# Load config from file if it exists +if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then + # shellcheck source=/dev/null + source /var/lib/buildkite-agent/cron-env +else + DOCKER_PRUNE_UNTIL=4h +fi ## ------------------------------------------ ## Prune stuff that doesn't affect cache hits -docker network prune --force --filter "until=${DOCKER_PRUNE_UNTIL}" -docker container prune --force --filter "until=${DOCKER_PRUNE_UNTIL}" +docker network prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}" +docker container prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}" diff --git a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc index ff68b64dc..44d772974 100644 --- a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc +++ b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc @@ -5,8 +5,6 @@ if [[ $EUID -eq 0 ]]; then exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log fi -DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-1h} - mark_instance_unhealthy() { # cancel any running buildkite builds killall -QUIT buildkite-agent || true @@ -19,14 +17,22 @@ mark_instance_unhealthy() { trap mark_instance_unhealthy ERR +# Load config from file if it exists +if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then + # shellcheck source=/dev/null + source /var/lib/buildkite-agent/cron-env +else + DISK_MIN_AVAILABLE=5G +fi + ## ----------------------------------------------------------------- ## Check disk, we only want to prune images/containers if we have to -if ! /usr/local/bin/bk-check-disk-space.sh ; then - echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL}" - docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL}" +if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then + echo "Cleaning up docker resources older than 1h" + docker image prune --all --force --filter "until=1h" - if ! /usr/local/bin/bk-check-disk-space.sh ; then + if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then echo "Disk health checks failed" >&2 exit 1 fi diff --git a/templates/aws-stack.yml b/templates/aws-stack.yml index f6855d9fb..ed9d7c776 100644 --- a/templates/aws-stack.yml +++ b/templates/aws-stack.yml @@ -375,6 +375,16 @@ Parameters: - "false" Default: "false" + MinimumDiskAvailableBeforeCleanup: + Type: String + Description: Either a percentage (%) or absolute unit (B, MB, GB) of disk below which disk cleanup is run + Default: "2GB" + + DockerPruneUntil: + Type: String + Description: How far back to prune docker networks images and containers on hourly cleanup + Default: "4h" + Outputs: VpcId: Value: @@ -857,6 +867,8 @@ Resources: BUILDKITE_ECR_POLICY=${ECRAccessPolicy} \ BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB=${BuildkiteTerminateInstanceAfterJob} \ BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS=${BuildkiteAdditionalSudoPermissions} \ + DISK_MIN_AVAILABLE="${MinimumDiskAvailableBeforeCleanup}" \ + DOCKER_PRUNE_UNTIL="${DockerPruneUntil} \ AWS_DEFAULT_REGION=${AWS::Region} \ SECRETS_PLUGIN_ENABLED=${EnableSecretsPlugin} \ ECR_PLUGIN_ENABLED=${EnableECRPlugin} \ From bd5fbab21f415059b77413a7a36cfb96fd3053f7 Mon Sep 17 00:00:00 2001 From: Lachlan Donald Date: Tue, 27 Aug 2019 21:57:44 +1000 Subject: [PATCH 2/4] Fix mis-matched quotes --- templates/aws-stack.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/aws-stack.yml b/templates/aws-stack.yml index ed9d7c776..12de96435 100644 --- a/templates/aws-stack.yml +++ b/templates/aws-stack.yml @@ -868,7 +868,7 @@ Resources: BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB=${BuildkiteTerminateInstanceAfterJob} \ BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS=${BuildkiteAdditionalSudoPermissions} \ DISK_MIN_AVAILABLE="${MinimumDiskAvailableBeforeCleanup}" \ - DOCKER_PRUNE_UNTIL="${DockerPruneUntil} \ + DOCKER_PRUNE_UNTIL="${DockerPruneUntil}" \ AWS_DEFAULT_REGION=${AWS::Region} \ SECRETS_PLUGIN_ENABLED=${EnableSecretsPlugin} \ ECR_PLUGIN_ENABLED=${EnableECRPlugin} \ From 0ae78f347f0de326c8d1247749e3a8e6cb3e0e7c Mon Sep 17 00:00:00 2001 From: Arturo Pie Date: Wed, 2 Oct 2019 22:07:19 -0400 Subject: [PATCH 3/4] Fix and add tests for the dehumanize function --- packer/linux/conf/bin/bk-check-disk-space.sh | 9 +---- packer/linux/conf/bin/dehumanize-test.sh | 39 ++++++++++++++++++++ packer/linux/conf/bin/dehumanize.sh | 10 +++++ 3 files changed, 50 insertions(+), 8 deletions(-) create mode 100755 packer/linux/conf/bin/dehumanize-test.sh create mode 100644 packer/linux/conf/bin/dehumanize.sh diff --git a/packer/linux/conf/bin/bk-check-disk-space.sh b/packer/linux/conf/bin/bk-check-disk-space.sh index 52a98129f..43d072324 100755 --- a/packer/linux/conf/bin/bk-check-disk-space.sh +++ b/packer/linux/conf/bin/bk-check-disk-space.sh @@ -7,14 +7,7 @@ set -euo pipefail # or 500M, or a percentage like 5% # min inodes must be a number, default to 250,000 -# Converts human-readable units like 1.43K and 120.3M to bytes -dehumanize() { - awk '/[0-9][bB]?$/ {printf "%u\n", $1*1024} - /[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)} - /[gG][bB]?$/ {printf "%u\n", $1*(1024*1024)} - /[mM][bB]?$/ {printf "%u\n", $1*(1024)} - /[kK][bB]?$/ {printf "%u\n", $1*1}' <<< "$1" -} +. "$(dirname "$0")"/dehumanize.sh min_available=${1:-5G} docker_dir="/var/lib/docker/" diff --git a/packer/linux/conf/bin/dehumanize-test.sh b/packer/linux/conf/bin/dehumanize-test.sh new file mode 100755 index 000000000..1e3d4c393 --- /dev/null +++ b/packer/linux/conf/bin/dehumanize-test.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -o pipefail + +. "$(dirname "$0")"/dehumanize.sh + +test_without_unit(){ + assertEquals 45 $(dehumanize 45) +} + +test_bytes(){ + assertEquals 45 $(dehumanize 45b) + assertEquals 45 $(dehumanize 45B) +} + +test_kilobytes(){ + assertEquals 46080 $(dehumanize 45kb) + assertEquals 46080 $(dehumanize 45KB) +} + +test_megabytes(){ + assertEquals 47185920 $(dehumanize 45mb) + assertEquals 47185920 $(dehumanize 45MB) +} + +test_gigabytes(){ + assertEquals 48318382080 $(dehumanize 45gb) + assertEquals 48318382080 $(dehumanize 45GB) +} + +test_terabytes(){ + assertEquals 49478023249920 $(dehumanize 45tb) + assertEquals 49478023249920 $(dehumanize 45TB) +} + +test_using_decimals(){ + assertEquals 1610612736 $(dehumanize 1.5gb) +} + +. shunit2 diff --git a/packer/linux/conf/bin/dehumanize.sh b/packer/linux/conf/bin/dehumanize.sh new file mode 100644 index 000000000..ca90e8897 --- /dev/null +++ b/packer/linux/conf/bin/dehumanize.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# Converts human-readable units like 1.43K and 120.3M to bytes +dehumanize() { + awk '/[0-9][bB]?$/ {printf "%u\n", $1*1} + /[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024*1024)} + /[gG][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)} + /[mM][bB]?$/ {printf "%u\n", $1*(1024*1024)} + /[kK][bB]?$/ {printf "%u\n", $1*1024}' <<< "$1" +} From dfa2eb3eb8830ccc86c0fd4b0a396a215f03d3f3 Mon Sep 17 00:00:00 2001 From: Arturo Pie Date: Sun, 6 Oct 2019 11:03:06 -0400 Subject: [PATCH 4/4] removes default disk min duplication. bk-check-disk-space will have the default --- packer/linux/conf/buildkite-agent/hooks/environment | 4 ++-- packer/linux/conf/docker/cron.hourly/docker-low-disk-gc | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/packer/linux/conf/buildkite-agent/hooks/environment b/packer/linux/conf/buildkite-agent/hooks/environment index 48712e02e..b853dba19 100755 --- a/packer/linux/conf/buildkite-agent/hooks/environment +++ b/packer/linux/conf/buildkite-agent/hooks/environment @@ -22,13 +22,13 @@ if ! docker ps ; then fi echo "Checking disk space" -if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}" ; then +if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL:-4h}" docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL:-4h}" echo "Checking disk space again" - if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}"; then + if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}"; then echo "Disk health checks failed" >&2 exit 1 fi diff --git a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc index 44d772974..c0697c9c3 100644 --- a/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc +++ b/packer/linux/conf/docker/cron.hourly/docker-low-disk-gc @@ -21,18 +21,16 @@ trap mark_instance_unhealthy ERR if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then # shellcheck source=/dev/null source /var/lib/buildkite-agent/cron-env -else - DISK_MIN_AVAILABLE=5G fi ## ----------------------------------------------------------------- ## Check disk, we only want to prune images/containers if we have to -if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then +if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then echo "Cleaning up docker resources older than 1h" docker image prune --all --force --filter "until=1h" - if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then + if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-}" ; then echo "Disk health checks failed" >&2 exit 1 fi