diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 42d98f8..d78564c 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -85,9 +85,22 @@ SANITIZED_DIR="/host/sanitized/$IMPORT_ID" mkdir -p "$QUARANTINE_DIR" "$SANITIZED_DIR" \ || die "cannot create quarantine/sanitized output dirs (are the bind mounts RW?)" -# Container-internal scratch space (mounted as tmpfs by the panel). -EXTRACT_DIR="/tmp/extract" -WORK_DIR="/tmp/sanitized" +# Working scratch lives inside the disk-backed bind mount, NOT under /tmp. +# /tmp is mounted as tmpfs (RAM-backed) by the panel for fast small-file +# scratch (per-stage reports, exclude lists). Putting the multi-GB cpmove +# extract there blew the container's --memory 2g cgroup ceiling (tmpfs +# writes count against cgroup RSS), surfaced as rc=137 OOM kills mid-tar. +# +# Layout: +# EXTRACT_DIR $SANITIZED_DIR/extract-work — tar untars here. After +# scan-files quarantines bad files, this is the cleaned +# tree. Renamed to $SANITIZED_DIR/extracted at the end of +# the run so the panel can find it at the expected path. +# WORK_DIR $SANITIZED_DIR/work — scan-dbs writes cleaned +# SQL dumps here; folded into $SANITIZED_DIR/mysql at the +# end of the run. +EXTRACT_DIR="$SANITIZED_DIR/extract-work" +WORK_DIR="$SANITIZED_DIR/work" mkdir -p "$EXTRACT_DIR" "$WORK_DIR/mysql" # --- refresh ClamAV signatures -------------------------------------------- @@ -95,9 +108,15 @@ mkdir -p "$EXTRACT_DIR" "$WORK_DIR/mysql" STAGE="freshclam" if [[ "$CLAMAV_REFRESH" == "true" ]]; then log "refreshing ClamAV signatures (freshclam)" + # freshclam writes freshclam.dat to its CWD; the container's WORKDIR + # is /opt/whp which lives on the read-only rootfs, so freshclam errors + # with "Can't create freshclam.dat in /opt/whp" before it ever reaches + # the database directory. Subshell + cd to the tmpfs at /var/lib/clamav + # (the DatabaseDirectory configured in /etc/freshclam.conf) keeps the + # entrypoint's CWD intact for later stages. # freshclam is allowed to fail (e.g., container has no outbound net); # we proceed with the baseline rules from build time + log a warning. - if ! freshclam --no-warnings >/tmp/freshclam.log 2>&1; then + if ! ( cd /var/lib/clamav && freshclam --no-warnings >/tmp/freshclam.log 2>&1 ); then log "WARN: freshclam failed; proceeding with build-time signature DB" tail -20 /tmp/freshclam.log || true fi @@ -109,7 +128,11 @@ fi STAGE="extract" log "stage: extract" -if ! /scripts/extract.sh "$IMPORT_BACKUP_FILE" "$EXTRACT_DIR" "$IMPORT_USERNAME"; then +# 4th arg pins the stripped-symlinks actions sidecar to /tmp (not inside +# $EXTRACT_DIR) so finalize_layout's mv doesn't carry an importer dotfile +# into the cleaned tree and so write_report can read it after the rename. +STRIPPED_SYMLINKS_FILE="/tmp/stripped-symlinks.json" +if ! /scripts/extract.sh "$IMPORT_BACKUP_FILE" "$EXTRACT_DIR" "$IMPORT_USERNAME" "$STRIPPED_SYMLINKS_FILE"; then die "extract.sh failed; see stderr above" fi @@ -137,29 +160,31 @@ php /scripts/scan-dbs.php \ --username "$IMPORT_USERNAME" \ || die "scan-dbs.php failed; see stderr above" -# --- rsync cleaned tree to /host/sanitized -------------------------------- +# --- finalize cleaned tree into /host/sanitized// --------------------- -STAGE="rsync_out" -log "stage: rsync_out" -# Copy the (now-cleaned) extracted tree to the sanitized output. We exclude -# files that scan-files.php quarantined — they are NOT present in the -# extract dir anymore (the scanner moved them), so this is the cleaned -# tree by construction. -rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \ - "$EXTRACT_DIR"/ "$SANITIZED_DIR/extracted/" \ - || die "rsync to sanitized dir failed" - -# Then drop the cleaned .sql files in place too. -rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \ - "$WORK_DIR/mysql"/ "$SANITIZED_DIR/mysql/" \ - || die "rsync of cleaned .sql files failed" +STAGE="finalize_layout" +log "stage: finalize_layout" +# Both EXTRACT_DIR and WORK_DIR already live INSIDE $SANITIZED_DIR (the +# bind-mounted disk-backed output root), so we don't need to cross-filesystem +# rsync 10GB+ of cleaned files. A same-filesystem `mv` is constant-time +# (just a rename) — turns what used to be a multi-minute rsync into a +# fraction of a second. +# +# Cleanup posture: if a previous run partially populated `extracted/` or +# `mysql/`, we wipe them first so the rename can't fail with EEXIST. The +# container's --read-only rootfs makes accidentally removing the wrong +# path impossible — these are under the per-import bind mount only. +rm -rf "$SANITIZED_DIR/extracted" "$SANITIZED_DIR/mysql" +mv "$EXTRACT_DIR" "$SANITIZED_DIR/extracted" || die "finalize: rename extract-work failed" +mv "$WORK_DIR/mysql" "$SANITIZED_DIR/mysql" || die "finalize: rename work/mysql failed" +# Tidy up the now-empty WORK_DIR shell. +rmdir "$WORK_DIR" 2>/dev/null || true # --- merge per-stage reports into the final report.json ------------------- STAGE="write_report" log "stage: write_report" DURATION=$(( $(date -u +%s) - START_TS )) -STRIPPED_SYMLINKS_FILE="$EXTRACT_DIR/.cpanel-importer-stripped-symlinks.json" php -r ' $importId = $argv[1]; $duration = (int) $argv[2];