fix(shared-ols): review fixes — watcher starvation, atomic render, O(N) chown, safe meta parse
Addresses the local code-review on the OLS-tier images: - [HIGH] ols-htaccess-watcher.sh: the debounce drain read ALL inotify events unfiltered, so on a busy multi-tenant server it never timed out and the restart was STARVED (rewrite changes silently never applied). Now coalesces with a hard DEBOUNCE-bounded window. Verified under continuous noise. - [HIGH] render-shared-ols-config.sh: built httpd_config.conf in-place across several appends, so a concurrent OLS restart (watcher) or parallel render could read a half-written config and 503 the whole tier. Now flock-serialized, built in a temp file and atomically moved into place; refuses to publish empty. - [MED] render + entrypoint: replaced recursive chown of the whole conf tree (O(N-sites) on every single-site change / boot) with a targeted chown of just the file written. - [MED] render: parse site.meta with sed instead of sourcing it (do not execute panel-written data as shell). - [cleanup] removed the unused configs/shared-ols/vhconf.tpl (the panel copy is the single source; the image never read it). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,11 +45,17 @@ EOF
|
||||
printf 'ok\n' > "$HEALTH_DIR/html/healthz"
|
||||
printf 'shared-ols\n' > "$HEALTH_DIR/html/index.html"
|
||||
|
||||
## ---- ownership: OLS reads conf/ as lsadm. chown the base conf dir + health dir
|
||||
## NON-recursively (the per-site files under conf/shared-sites are written by the
|
||||
## panel and are world-readable; a recursive chown here would be O(N-sites) on
|
||||
## every container (re)start, delaying first-listen after a crash). The render
|
||||
## script chowns the httpd_config.conf it produces. ----
|
||||
chown lsadm:nogroup "$LSWS_CONF" "$HEALTH_DIR" "$HEALTH_DIR/html" 2>/dev/null || true
|
||||
chown lsadm:nogroup "$HEALTH_DIR/vhconf.conf" "$HEALTH_DIR/html/healthz" "$HEALTH_DIR/html/index.html" 2>/dev/null || true
|
||||
|
||||
## ---- assemble httpd_config.conf from the panel's per-site files ----
|
||||
/scripts/render-shared-ols-config.sh
|
||||
|
||||
chown -R lsadm:nogroup "$LSWS_CONF" "$HEALTH_DIR" 2>/dev/null || true
|
||||
|
||||
## ---- stream OLS logs to PID-1 stdout (follows across restarts) ----
|
||||
mkdir -p /usr/local/lsws/logs
|
||||
touch /usr/local/lsws/logs/error.log /usr/local/lsws/logs/access.log
|
||||
|
||||
@@ -52,7 +52,22 @@ while read -r fname; do
|
||||
.htaccess) ;;
|
||||
*) continue ;;
|
||||
esac
|
||||
## Drain further events for DEBOUNCE seconds (coalesce the burst), then act.
|
||||
while read -r -t "$DEBOUNCE" _; do :; done
|
||||
## A tenant .htaccess changed. Coalesce the save-burst, then restart ONCE.
|
||||
##
|
||||
## The coalesce is HARD-BOUNDED to DEBOUNCE seconds: a previous version blocked
|
||||
## on `read -t DEBOUNCE` which, on a busy multi-tenant server, never timed out
|
||||
## (unrelated file writes under $WATCH_ROOT kept resetting it) — so the restart
|
||||
## was starved and rewrite changes silently never applied. Here we read further
|
||||
## events only until the deadline OR ~2s of total quiet, whichever comes first,
|
||||
## so continuous activity can delay us by at most DEBOUNCE. do_restart's FLOOR
|
||||
## then rate-limits across consecutive bursts.
|
||||
deadline=$(( $(date +%s) + DEBOUNCE ))
|
||||
while [ "$(date +%s)" -lt "$deadline" ]; do
|
||||
if read -r -t 2 _; then
|
||||
continue # more activity — keep coalescing toward the deadline
|
||||
else
|
||||
break # ~2s of total quiet — the burst has settled
|
||||
fi
|
||||
done
|
||||
do_restart
|
||||
done
|
||||
|
||||
@@ -11,8 +11,10 @@
|
||||
## (Empirically established 2026-06-10 — see the OLS-tier PoC.)
|
||||
##
|
||||
## Per-site contract — the panel writes, for each site, a directory:
|
||||
## $SITES_ROOT/<vhname>/vhconf.conf (rendered from configs/shared-ols/vhconf.tpl)
|
||||
## $SITES_ROOT/<vhname>/site.meta (shell: VHNAME, VHROOT, DOMAINS="a.com,www.a.com")
|
||||
## $SITES_ROOT/<vhname>/vhconf.conf (rendered by the WHP panel from its own
|
||||
## web-files/configs/shared-ols-vhconf-template.tpl
|
||||
## — the single source of truth for vhost detail)
|
||||
## $SITES_ROOT/<vhname>/site.meta (VHNAME=, VHROOT=, DOMAINS=a.com,www.a.com)
|
||||
## This script turns each into a `virtualhost {configFile}` stanza + a listener
|
||||
## `map` line. A site dir missing either file is skipped (logged).
|
||||
##
|
||||
@@ -28,10 +30,23 @@ KEY_FILE=${KEY_FILE:-$LSWS_CONF/cert/shared-ols.key}
|
||||
export LSCACHE_ROOT
|
||||
|
||||
OUT="$LSWS_CONF/httpd_config.conf"
|
||||
TMP="$LSWS_CONF/.httpd_config.conf.tmp.$$"
|
||||
STOCK="/usr/local/lsws/.conf/httpd_config.conf"
|
||||
|
||||
mkdir -p "$SITES_ROOT" "$LSCACHE_ROOT"
|
||||
|
||||
## --- SERIALIZE concurrent renders + write ATOMICALLY ---
|
||||
## The panel can fire two renders at once (parallel provisioning), and the
|
||||
## in-container .htaccess watcher issues `lswsctrl restart` independently. If OLS
|
||||
## (re)reads httpd_config.conf while it's half-written, it fails to parse and the
|
||||
## whole tier 503s. So: (1) flock so only one render runs at a time; (2) build
|
||||
## into $TMP and atomically `mv` into place at the end, so any concurrent OLS
|
||||
## restart always sees a COMPLETE config (the old one until the instant of mv).
|
||||
exec 9>"$LSWS_CONF/.render.lock"
|
||||
flock 9 || { echo "render-shared-ols: could not acquire render lock" >&2; exit 1; }
|
||||
trap 'rm -f "$TMP"' EXIT
|
||||
## From here on, build into $TMP (not $OUT).
|
||||
|
||||
## --- 1. start from a pristine stock config (idempotent) ---
|
||||
if [ ! -f "$STOCK" ]; then
|
||||
## Some image builds keep the only copy at conf/; snapshot it once so future
|
||||
@@ -52,13 +67,13 @@ awk '
|
||||
/^scriptHandler ?\{/ { skip=1; next }
|
||||
skip && /^\}/ { skip=0; next }
|
||||
!skip { print }
|
||||
' "$STOCK" > "$OUT"
|
||||
' "$STOCK" > "$TMP"
|
||||
|
||||
## --- 3. append our server-level base (real-IP, cache module, no local PHP) ---
|
||||
{
|
||||
echo ""
|
||||
envsubst '${LSCACHE_ROOT}' < "$TPL_DIR/httpd_config_base.tpl"
|
||||
} >> "$OUT"
|
||||
} >> "$TMP"
|
||||
|
||||
## --- 4. emit per-site vhost stanzas + collect listener map lines ---
|
||||
maps=""
|
||||
@@ -66,9 +81,13 @@ site_count=0
|
||||
for meta in "$SITES_ROOT"/*/site.meta; do
|
||||
[ -e "$meta" ] || continue
|
||||
sdir=$(dirname "$meta")
|
||||
VHNAME=""; VHROOT=""; DOMAINS=""
|
||||
# shellcheck source=/dev/null
|
||||
. "$meta"
|
||||
## PARSE site.meta with sed — do NOT `source` it. The panel writes these values
|
||||
## (derived from DB domains), so they should be safe, but sourcing paneldata as
|
||||
## shell would execute any metacharacters as root in this container if a value
|
||||
## ever slipped validation. sed extraction treats them as plain data.
|
||||
VHNAME=$(sed -n 's/^VHNAME=//p' "$meta" | head -1)
|
||||
VHROOT=$(sed -n 's/^VHROOT=//p' "$meta" | head -1)
|
||||
DOMAINS=$(sed -n 's/^DOMAINS=//p' "$meta" | head -1)
|
||||
if [ -z "$VHNAME" ] || [ -z "$VHROOT" ] || [ -z "$DOMAINS" ] || [ ! -f "$sdir/vhconf.conf" ]; then
|
||||
echo "render-shared-ols: skipping $sdir (incomplete: VHNAME/VHROOT/DOMAINS/vhconf.conf)" >&2
|
||||
continue
|
||||
@@ -82,7 +101,7 @@ for meta in "$SITES_ROOT"/*/site.meta; do
|
||||
echo " enableScript 1"
|
||||
echo " restrained 1"
|
||||
echo "}"
|
||||
} >> "$OUT"
|
||||
} >> "$TMP"
|
||||
maps="${maps} map ${VHNAME} ${DOMAINS}"$'\n'
|
||||
site_count=$((site_count + 1))
|
||||
done
|
||||
@@ -98,7 +117,7 @@ done
|
||||
echo " allowSymbolLink 1"
|
||||
echo " enableScript 0"
|
||||
echo "}"
|
||||
} >> "$OUT"
|
||||
} >> "$TMP"
|
||||
maps="${maps} map _health *"$'\n'
|
||||
|
||||
## --- 6. listeners (HTTP :80 + HTTPS :443 self-signed) carrying ALL maps.
|
||||
@@ -119,7 +138,17 @@ maps="${maps} map _health *"$'\n'
|
||||
echo " certFile ${CERT_FILE}"
|
||||
printf '%s' "$maps"
|
||||
echo "}"
|
||||
} >> "$OUT"
|
||||
} >> "$TMP"
|
||||
|
||||
chown -R lsadm:nogroup "$LSWS_CONF" 2>/dev/null || true
|
||||
## --- 7. publish atomically. Validate the temp parses as non-empty, then mv into
|
||||
## place (rename is atomic on the same filesystem) so a concurrent OLS restart
|
||||
## never sees a half-written config. chown only the file we wrote — NOT a
|
||||
## recursive chown of the whole conf tree (that was O(N-sites) on every single
|
||||
## change; the per-site files are world-readable and owned correctly already). ---
|
||||
if [ ! -s "$TMP" ]; then
|
||||
echo "render-shared-ols: refusing to publish empty config" >&2
|
||||
exit 1
|
||||
fi
|
||||
chown lsadm:nogroup "$TMP" 2>/dev/null || true
|
||||
mv -f "$TMP" "$OUT"
|
||||
echo "render-shared-ols: wrote $OUT ($site_count customer vhost(s) + health)"
|
||||
|
||||
Reference in New Issue
Block a user