Addresses the local code-review on the OLS-tier images: - [HIGH] ols-htaccess-watcher.sh: the debounce drain read ALL inotify events unfiltered, so on a busy multi-tenant server it never timed out and the restart was STARVED (rewrite changes silently never applied). Now coalesces with a hard DEBOUNCE-bounded window. Verified under continuous noise. - [HIGH] render-shared-ols-config.sh: built httpd_config.conf in-place across several appends, so a concurrent OLS restart (watcher) or parallel render could read a half-written config and 503 the whole tier. Now flock-serialized, built in a temp file and atomically moved into place; refuses to publish empty. - [MED] render + entrypoint: replaced recursive chown of the whole conf tree (O(N-sites) on every single-site change / boot) with a targeted chown of just the file written. - [MED] render: parse site.meta with sed instead of sourcing it (do not execute panel-written data as shell). - [cleanup] removed the unused configs/shared-ols/vhconf.tpl (the panel copy is the single source; the image never read it). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
127 lines
4.5 KiB
Bash
127 lines
4.5 KiB
Bash
#!/usr/bin/env bash
|
|
## entrypoint-shared-ols.sh — PID 1 for the shared-ols tier.
|
|
##
|
|
## One OpenLiteSpeed container fronting MANY tenants' detached cac-lsphp
|
|
## sidecars (the OLS analogue of the shared-httpd container). Webserver ONLY —
|
|
## it runs NO PHP locally (render-shared-ols-config.sh strips the stock local
|
|
## lsphp; every site's PHP goes to its own sidecar over LSAPI). HAProxy stays
|
|
## the TLS/WAF/SNI edge and routes OLS-type hostnames here on :443.
|
|
##
|
|
## Reuses cac-litespeed's hard-won DAEMON-MODE supervision (NOT `openlitespeed
|
|
## -n` + wait): OLS self-restarts on QUIC.cloud IP refresh would otherwise exit
|
|
## PID 1 cleanly and tear the container down. See entrypoint-litespeed.sh and
|
|
## feedback_ols_quiccloud_restart_kills_container.
|
|
set -euo pipefail
|
|
|
|
: "${environment:=PROD}"
|
|
export CONTAINER_ROLE="shared_ols"
|
|
|
|
LSWS_CONF=/usr/local/lsws/conf
|
|
CERT_DIR="$LSWS_CONF/cert"
|
|
HEALTH_DIR=/usr/local/lsws/shared-ols-health
|
|
export SITES_ROOT="${SITES_ROOT:-$LSWS_CONF/shared-sites}"
|
|
export LSCACHE_ROOT="${LSCACHE_ROOT:-/var/lscache}"
|
|
export CERT_FILE="$CERT_DIR/shared-ols.crt"
|
|
export KEY_FILE="$CERT_DIR/shared-ols.key"
|
|
|
|
mkdir -p "$SITES_ROOT" "$LSCACHE_ROOT" "$CERT_DIR" "$HEALTH_DIR/html"
|
|
|
|
## ---- self-signed cert for the :443 listener (HAProxy verifies none) ----
|
|
if [ ! -f "$CERT_FILE" ]; then
|
|
openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \
|
|
-keyout "$KEY_FILE" -out "$CERT_FILE" -subj "/CN=shared-ols" 2>/dev/null
|
|
fi
|
|
|
|
## ---- health vhost (catch-all): valid server with zero customer sites +
|
|
## answers HAProxy health checks that hit by IP / unknown Host with a 200 ----
|
|
cat > "$HEALTH_DIR/vhconf.conf" <<'EOF'
|
|
docRoot $VH_ROOT/html
|
|
enableScript 0
|
|
context / {
|
|
allowBrowse 1
|
|
location $DOC_ROOT/
|
|
}
|
|
EOF
|
|
printf 'ok\n' > "$HEALTH_DIR/html/healthz"
|
|
printf 'shared-ols\n' > "$HEALTH_DIR/html/index.html"
|
|
|
|
## ---- ownership: OLS reads conf/ as lsadm. chown the base conf dir + health dir
|
|
## NON-recursively (the per-site files under conf/shared-sites are written by the
|
|
## panel and are world-readable; a recursive chown here would be O(N-sites) on
|
|
## every container (re)start, delaying first-listen after a crash). The render
|
|
## script chowns the httpd_config.conf it produces. ----
|
|
chown lsadm:nogroup "$LSWS_CONF" "$HEALTH_DIR" "$HEALTH_DIR/html" 2>/dev/null || true
|
|
chown lsadm:nogroup "$HEALTH_DIR/vhconf.conf" "$HEALTH_DIR/html/healthz" "$HEALTH_DIR/html/index.html" 2>/dev/null || true
|
|
|
|
## ---- assemble httpd_config.conf from the panel's per-site files ----
|
|
/scripts/render-shared-ols-config.sh
|
|
|
|
## ---- stream OLS logs to PID-1 stdout (follows across restarts) ----
|
|
mkdir -p /usr/local/lsws/logs
|
|
touch /usr/local/lsws/logs/error.log /usr/local/lsws/logs/access.log
|
|
tail -F /usr/local/lsws/logs/error.log /usr/local/lsws/logs/access.log 2>/dev/null &
|
|
|
|
## ---- .htaccess watcher (required; spec 5.3). Background; the panel monitors
|
|
## that it stays alive (its death silently stops rewrite changes applying). ----
|
|
/scripts/ols-htaccess-watcher.sh &
|
|
WATCHER_PID=$!
|
|
|
|
## ---- supervise OLS in DAEMON mode (verbatim model from entrypoint-litespeed.sh) ----
|
|
STOP_REQUESTED=0
|
|
term_handler() {
|
|
STOP_REQUESTED=1
|
|
kill "$WATCHER_PID" 2>/dev/null || true
|
|
/usr/local/lsws/bin/lswsctrl stop >/dev/null 2>&1 || true
|
|
}
|
|
trap term_handler TERM INT
|
|
|
|
ols_running() { /usr/local/lsws/bin/lswsctrl status 2>/dev/null | grep -qi 'running with pid'; }
|
|
|
|
MAX_STARTS=5
|
|
WINDOW=60
|
|
starts=""
|
|
|
|
start_ols() {
|
|
/usr/local/lsws/bin/lswsctrl start >/dev/null 2>&1 || true
|
|
for _ in $(seq 1 20); do
|
|
ols_running && return 0
|
|
sleep 0.5
|
|
done
|
|
return 1
|
|
}
|
|
|
|
if ! start_ols; then
|
|
echo "entrypoint-shared-ols: OLS failed to start (not running after 10s)." >&2
|
|
exit 1
|
|
fi
|
|
echo "entrypoint-shared-ols: OLS started in daemon mode — $(/usr/local/lsws/bin/lswsctrl status 2>/dev/null || true)"
|
|
|
|
while true; do
|
|
if ols_running; then
|
|
sleep 3
|
|
continue
|
|
fi
|
|
sleep 2
|
|
if [ "$STOP_REQUESTED" -eq 0 ] && ols_running; then
|
|
continue
|
|
fi
|
|
if [ "$STOP_REQUESTED" -eq 1 ]; then
|
|
echo "entrypoint-shared-ols: SIGTERM received, OLS stopped — exiting."
|
|
exit 0
|
|
fi
|
|
now=$(date +%s)
|
|
starts="$starts $now"
|
|
pruned=""
|
|
for t in $starts; do
|
|
[ $((now - t)) -lt "$WINDOW" ] && pruned="$pruned $t"
|
|
done
|
|
starts="$pruned"
|
|
n=$(echo $starts | wc -w)
|
|
echo "entrypoint-shared-ols: OLS not running — relaunching (attempt $n/$MAX_STARTS within ${WINDOW}s)." >&2
|
|
if [ "$n" -ge "$MAX_STARTS" ]; then
|
|
echo "entrypoint-shared-ols: OLS crash-looping — bailing for Docker restart policy / monitor." >&2
|
|
exit 1
|
|
fi
|
|
start_ols || true
|
|
done
|