From ecf891ff02790b3a21df7c753ccb211cda5408ea Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Wed, 1 Apr 2026 15:17:15 -0700 Subject: [PATCH] Don't abort cert renewal when a single domain fails The renewal script was exiting immediately when certbot returned a non-zero exit code, which happens when ANY cert fails to renew. A single dead domain (e.g., DNS no longer pointed here) would block ALL other certificates from being processed and combined for HAProxy. Now logs the failures but continues to copy/combine successfully renewed certificates and reload HAProxy. Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/renew-certificates.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/renew-certificates.sh b/scripts/renew-certificates.sh index c35d233..3be1eaa 100644 --- a/scripts/renew-certificates.sh +++ b/scripts/renew-certificates.sh @@ -20,12 +20,21 @@ log_error() { log_info "Starting certificate renewal process" -# Run certbot renewal -if certbot renew --quiet --no-random-sleep-on-renew; then - log_info "Certbot renewal completed" +# Run certbot renewal — don't exit on failure, some certs may have +# renewed successfully even if others failed (e.g., domain no longer +# pointed here). Continue to copy/combine whatever succeeded. +CERTBOT_OUTPUT=$(certbot renew --no-random-sleep-on-renew 2>&1) +CERTBOT_EXIT=$? + +if [ $CERTBOT_EXIT -eq 0 ]; then + log_info "Certbot renewal completed successfully" else - log_error "Certbot renewal failed with exit code $?" - exit 1 + log_error "Certbot renewal had failures (exit code $CERTBOT_EXIT):" + # Log the specific failures + echo "$CERTBOT_OUTPUT" | grep -E "Failed to renew|failure" | while read -r line; do + log_error " $line" + done + log_info "Continuing to process successfully renewed certificates..." fi # Copy all certificates to HAProxy format