Files
cloud-apache-container/scripts/entrypoint-shared-ols.sh
jknapp 6bb494c72f fix(shared-ols): review fixes — watcher starvation, atomic render, O(N) chown, safe meta parse
Addresses the local code-review on the OLS-tier images:
- [HIGH] ols-htaccess-watcher.sh: the debounce drain read ALL inotify events
  unfiltered, so on a busy multi-tenant server it never timed out and the
  restart was STARVED (rewrite changes silently never applied). Now coalesces
  with a hard DEBOUNCE-bounded window. Verified under continuous noise.
- [HIGH] render-shared-ols-config.sh: built httpd_config.conf in-place across
  several appends, so a concurrent OLS restart (watcher) or parallel render
  could read a half-written config and 503 the whole tier. Now flock-serialized,
  built in a temp file and atomically moved into place; refuses to publish empty.
- [MED] render + entrypoint: replaced recursive chown of the whole conf tree
  (O(N-sites) on every single-site change / boot) with a targeted chown of just
  the file written.
- [MED] render: parse site.meta with sed instead of sourcing it (do not execute
  panel-written data as shell).
- [cleanup] removed the unused configs/shared-ols/vhconf.tpl (the panel copy is
  the single source; the image never read it).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-10 08:35:26 -07:00

127 lines
4.5 KiB
Bash

#!/usr/bin/env bash
## entrypoint-shared-ols.sh — PID 1 for the shared-ols tier.
##
## One OpenLiteSpeed container fronting MANY tenants' detached cac-lsphp
## sidecars (the OLS analogue of the shared-httpd container). Webserver ONLY —
## it runs NO PHP locally (render-shared-ols-config.sh strips the stock local
## lsphp; every site's PHP goes to its own sidecar over LSAPI). HAProxy stays
## the TLS/WAF/SNI edge and routes OLS-type hostnames here on :443.
##
## Reuses cac-litespeed's hard-won DAEMON-MODE supervision (NOT `openlitespeed
## -n` + wait): OLS self-restarts on QUIC.cloud IP refresh would otherwise exit
## PID 1 cleanly and tear the container down. See entrypoint-litespeed.sh and
## feedback_ols_quiccloud_restart_kills_container.
set -euo pipefail
: "${environment:=PROD}"
export CONTAINER_ROLE="shared_ols"
LSWS_CONF=/usr/local/lsws/conf
CERT_DIR="$LSWS_CONF/cert"
HEALTH_DIR=/usr/local/lsws/shared-ols-health
export SITES_ROOT="${SITES_ROOT:-$LSWS_CONF/shared-sites}"
export LSCACHE_ROOT="${LSCACHE_ROOT:-/var/lscache}"
export CERT_FILE="$CERT_DIR/shared-ols.crt"
export KEY_FILE="$CERT_DIR/shared-ols.key"
mkdir -p "$SITES_ROOT" "$LSCACHE_ROOT" "$CERT_DIR" "$HEALTH_DIR/html"
## ---- self-signed cert for the :443 listener (HAProxy verifies none) ----
if [ ! -f "$CERT_FILE" ]; then
openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \
-keyout "$KEY_FILE" -out "$CERT_FILE" -subj "/CN=shared-ols" 2>/dev/null
fi
## ---- health vhost (catch-all): valid server with zero customer sites +
## answers HAProxy health checks that hit by IP / unknown Host with a 200 ----
cat > "$HEALTH_DIR/vhconf.conf" <<'EOF'
docRoot $VH_ROOT/html
enableScript 0
context / {
allowBrowse 1
location $DOC_ROOT/
}
EOF
printf 'ok\n' > "$HEALTH_DIR/html/healthz"
printf 'shared-ols\n' > "$HEALTH_DIR/html/index.html"
## ---- ownership: OLS reads conf/ as lsadm. chown the base conf dir + health dir
## NON-recursively (the per-site files under conf/shared-sites are written by the
## panel and are world-readable; a recursive chown here would be O(N-sites) on
## every container (re)start, delaying first-listen after a crash). The render
## script chowns the httpd_config.conf it produces. ----
chown lsadm:nogroup "$LSWS_CONF" "$HEALTH_DIR" "$HEALTH_DIR/html" 2>/dev/null || true
chown lsadm:nogroup "$HEALTH_DIR/vhconf.conf" "$HEALTH_DIR/html/healthz" "$HEALTH_DIR/html/index.html" 2>/dev/null || true
## ---- assemble httpd_config.conf from the panel's per-site files ----
/scripts/render-shared-ols-config.sh
## ---- stream OLS logs to PID-1 stdout (follows across restarts) ----
mkdir -p /usr/local/lsws/logs
touch /usr/local/lsws/logs/error.log /usr/local/lsws/logs/access.log
tail -F /usr/local/lsws/logs/error.log /usr/local/lsws/logs/access.log 2>/dev/null &
## ---- .htaccess watcher (required; spec 5.3). Background; the panel monitors
## that it stays alive (its death silently stops rewrite changes applying). ----
/scripts/ols-htaccess-watcher.sh &
WATCHER_PID=$!
## ---- supervise OLS in DAEMON mode (verbatim model from entrypoint-litespeed.sh) ----
STOP_REQUESTED=0
term_handler() {
STOP_REQUESTED=1
kill "$WATCHER_PID" 2>/dev/null || true
/usr/local/lsws/bin/lswsctrl stop >/dev/null 2>&1 || true
}
trap term_handler TERM INT
ols_running() { /usr/local/lsws/bin/lswsctrl status 2>/dev/null | grep -qi 'running with pid'; }
MAX_STARTS=5
WINDOW=60
starts=""
start_ols() {
/usr/local/lsws/bin/lswsctrl start >/dev/null 2>&1 || true
for _ in $(seq 1 20); do
ols_running && return 0
sleep 0.5
done
return 1
}
if ! start_ols; then
echo "entrypoint-shared-ols: OLS failed to start (not running after 10s)." >&2
exit 1
fi
echo "entrypoint-shared-ols: OLS started in daemon mode — $(/usr/local/lsws/bin/lswsctrl status 2>/dev/null || true)"
while true; do
if ols_running; then
sleep 3
continue
fi
sleep 2
if [ "$STOP_REQUESTED" -eq 0 ] && ols_running; then
continue
fi
if [ "$STOP_REQUESTED" -eq 1 ]; then
echo "entrypoint-shared-ols: SIGTERM received, OLS stopped — exiting."
exit 0
fi
now=$(date +%s)
starts="$starts $now"
pruned=""
for t in $starts; do
[ $((now - t)) -lt "$WINDOW" ] && pruned="$pruned $t"
done
starts="$pruned"
n=$(echo $starts | wc -w)
echo "entrypoint-shared-ols: OLS not running — relaunching (attempt $n/$MAX_STARTS within ${WINDOW}s)." >&2
if [ "$n" -ge "$MAX_STARTS" ]; then
echo "entrypoint-shared-ols: OLS crash-looping — bailing for Docker restart policy / monitor." >&2
exit 1
fi
start_ols || true
done