#!/usr/bin/env bash # # entrypoint.sh — main controller for the cpanel-importer sandbox. # # Inputs (env, set by the panel's docker run): # IMPORT_ID unique id for this run; used in quarantine + report paths # IMPORT_USERNAME cPanel/WHP username the cpmove belongs to # IMPORT_BACKUP_FILE absolute path inside the container, typically # /host/backup/cpmove-.tar.gz # CLAMAV_REFRESH "true" to run freshclam at start (default: true) # # Flow (spec §0): # 1. validate env # 2. (optional) refresh ClamAV signatures # 3. extract → /tmp/extract/ # 4. file scan → /tmp/scan-files-report.json # 5. DB sanitize → /tmp/sanitized/mysql/, /tmp/scan-dbs-report.json # 6. rsync /tmp/sanitized/ → /host/sanitized// # 7. write /host/sanitized//report.json (merged) # # On failure at any stage we still write a partial report.json with # status="failed" + the stage that broke, then exit non-zero. set -euo pipefail # --- logging --------------------------------------------------------------- ts() { date -u +'%Y-%m-%dT%H:%M:%SZ'; } log() { printf '[%s] %s\n' "$(ts)" "$*"; } die() { log "FATAL: $*"; write_failure_report "$STAGE" "$*"; exit 1; } # Buffered partial state. The final report.json is written by the merge # step (see write_final_report); if we crash before then, write_failure_report # emits whatever partial pieces exist. STAGE="init" START_TS="$(date -u +%s)" write_failure_report() { local stage="$1" local msg="$2" local out_dir="/host/sanitized/${IMPORT_ID:-unknown}" # mkdir AND the report write can both fail (mount RO, missing # /host/sanitized, etc.); we log every failure to stderr and never # let the report-writer abort the script. if ! mkdir -p "$out_dir" 2>/dev/null; then log "WARN: failure-report mkdir failed for $out_dir; report will not be persisted" return 0 fi if ! cat > "$out_dir/report.json" 2>/dev/null </dev/null || echo '"(unencodable)"'), "scan_duration_seconds": $(( $(date -u +%s) - START_TS )), "files": null, "databases": null } JSON then log "WARN: failure-report write failed for $out_dir/report.json" fi } # --- env validation -------------------------------------------------------- STAGE="validate_env" log "cpanel-importer starting (container UID=$(id -u) GID=$(id -g))" : "${IMPORT_ID:?IMPORT_ID env var is required}" : "${IMPORT_USERNAME:?IMPORT_USERNAME env var is required}" : "${IMPORT_BACKUP_FILE:?IMPORT_BACKUP_FILE env var is required}" CLAMAV_REFRESH="${CLAMAV_REFRESH:-true}" log "import_id=$IMPORT_ID username=$IMPORT_USERNAME backup=$IMPORT_BACKUP_FILE" if [[ ! -f "$IMPORT_BACKUP_FILE" ]]; then die "backup file does not exist or is not a regular file: $IMPORT_BACKUP_FILE" fi # Make sure the output dirs exist (they're bind mounts, so we trust the # host to have created them, but mkdir -p is harmless). QUARANTINE_DIR="/host/quarantine/$IMPORT_ID" SANITIZED_DIR="/host/sanitized/$IMPORT_ID" mkdir -p "$QUARANTINE_DIR" "$SANITIZED_DIR" \ || die "cannot create quarantine/sanitized output dirs (are the bind mounts RW?)" # Working scratch lives inside the disk-backed bind mount, NOT under /tmp. # /tmp is mounted as tmpfs (RAM-backed) by the panel for fast small-file # scratch (per-stage reports, exclude lists). Putting the multi-GB cpmove # extract there blew the container's --memory 2g cgroup ceiling (tmpfs # writes count against cgroup RSS), surfaced as rc=137 OOM kills mid-tar. # # Layout: # EXTRACT_DIR $SANITIZED_DIR/extract-work — tar untars here. After # scan-files quarantines bad files, this is the cleaned # tree. Renamed to $SANITIZED_DIR/extracted at the end of # the run so the panel can find it at the expected path. # WORK_DIR $SANITIZED_DIR/work — scan-dbs writes cleaned # SQL dumps here; folded into $SANITIZED_DIR/mysql at the # end of the run. EXTRACT_DIR="$SANITIZED_DIR/extract-work" WORK_DIR="$SANITIZED_DIR/work" mkdir -p "$EXTRACT_DIR" "$WORK_DIR/mysql" # --- refresh ClamAV signatures -------------------------------------------- STAGE="freshclam" if [[ "$CLAMAV_REFRESH" == "true" ]]; then log "refreshing ClamAV signatures (freshclam)" # freshclam writes freshclam.dat to its CWD; the container's WORKDIR # is /opt/whp which lives on the read-only rootfs, so freshclam errors # with "Can't create freshclam.dat in /opt/whp" before it ever reaches # the database directory. Subshell + cd to the tmpfs at /var/lib/clamav # (the DatabaseDirectory configured in /etc/freshclam.conf) keeps the # entrypoint's CWD intact for later stages. # freshclam is allowed to fail (e.g., container has no outbound net); # we proceed with the baseline rules from build time + log a warning. if ! ( cd /var/lib/clamav && freshclam --no-warnings >/tmp/freshclam.log 2>&1 ); then log "WARN: freshclam failed; proceeding with build-time signature DB" tail -20 /tmp/freshclam.log || true fi else log "CLAMAV_REFRESH=false; skipping freshclam" fi # --- extract the cpmove ---------------------------------------------------- STAGE="extract" log "stage: extract" # 4th arg pins the stripped-symlinks actions sidecar to /tmp (not inside # $EXTRACT_DIR) so finalize_layout's mv doesn't carry an importer dotfile # into the cleaned tree and so write_report can read it after the rename. STRIPPED_SYMLINKS_FILE="/tmp/stripped-symlinks.json" if ! /scripts/extract.sh "$IMPORT_BACKUP_FILE" "$EXTRACT_DIR" "$IMPORT_USERNAME" "$STRIPPED_SYMLINKS_FILE"; then die "extract.sh failed; see stderr above" fi # --- ClamAV scan + auto-clean/quarantine ---------------------------------- STAGE="scan_files" log "stage: scan_files" php /scripts/scan-files.php \ --extract "$EXTRACT_DIR" \ --quarantine "$QUARANTINE_DIR" \ --report /tmp/scan-files-report.json \ --import-id "$IMPORT_ID" \ || die "scan-files.php failed; see stderr above" # --- DB engine swap + WP content scan ------------------------------------- STAGE="scan_dbs" log "stage: scan_dbs" php /scripts/scan-dbs.php \ --extract "$EXTRACT_DIR" \ --out "$WORK_DIR/mysql" \ --final-prefix "$SANITIZED_DIR/mysql" \ --report /tmp/scan-dbs-report.json \ --import-id "$IMPORT_ID" \ --username "$IMPORT_USERNAME" \ || die "scan-dbs.php failed; see stderr above" # --- finalize cleaned tree into /host/sanitized// --------------------- STAGE="finalize_layout" log "stage: finalize_layout" # Both EXTRACT_DIR and WORK_DIR already live INSIDE $SANITIZED_DIR (the # bind-mounted disk-backed output root), so we don't need to cross-filesystem # rsync 10GB+ of cleaned files. A same-filesystem `mv` is constant-time # (just a rename) — turns what used to be a multi-minute rsync into a # fraction of a second. # # Cleanup posture: if a previous run partially populated `extracted/` or # `mysql/`, we wipe them first so the rename can't fail with EEXIST. The # container's --read-only rootfs makes accidentally removing the wrong # path impossible — these are under the per-import bind mount only. rm -rf "$SANITIZED_DIR/extracted" "$SANITIZED_DIR/mysql" mv "$EXTRACT_DIR" "$SANITIZED_DIR/extracted" || die "finalize: rename extract-work failed" mv "$WORK_DIR/mysql" "$SANITIZED_DIR/mysql" || die "finalize: rename work/mysql failed" # Tidy up the now-empty WORK_DIR shell. rmdir "$WORK_DIR" 2>/dev/null || true # --- merge per-stage reports into the final report.json ------------------- STAGE="write_report" log "stage: write_report" DURATION=$(( $(date -u +%s) - START_TS )) php -r ' $importId = $argv[1]; $duration = (int) $argv[2]; $filesPath = $argv[3]; $dbsPath = $argv[4]; $strippedPath = $argv[5]; $outPath = $argv[6]; $files = is_file($filesPath) ? json_decode(file_get_contents($filesPath), true) : null; $dbs = is_file($dbsPath) ? json_decode(file_get_contents($dbsPath), true) : null; $stripped = is_file($strippedPath) ? json_decode(file_get_contents($strippedPath), true) : null; $filesScanned = $files["files_scanned"] ?? 0; $filesClean = $files["files_clean"] ?? 0; $filesCleaned = $files["files_cleaned"] ?? 0; $filesQuarantined = $files["files_quarantined"] ?? 0; $actions = $files["actions"] ?? []; $databases = $dbs["databases"] ?? []; // Prepend the stripped-symlinks actions from extract.sh so the operator // sees them at the top of the actions[] table on the results page. Bumps // files_quarantined because the strip-action is morally equivalent to a // quarantine - the entry was not extracted, the symlink file is "in the // archive but absent from the cleaned tree". $strippedActions = $stripped["actions"] ?? []; $strippedCount = count($strippedActions); if ($strippedCount > 0) { $actions = array_merge($strippedActions, $actions); $filesQuarantined += $strippedCount; } $dbRefused = 0; foreach ($databases as $db) { if (($db["imported_into_new_server"] ?? true) === false) $dbRefused++; } $severity = "info"; $alert = false; $msg = "Sanitization clean: no malware signatures detected."; if ($filesQuarantined > 0 || $dbRefused > 0 || $strippedCount > 0) { $alert = true; $severity = ($filesQuarantined > 50 || $dbRefused > 0 || $strippedCount > 0) ? "warning" : "info"; $parts = []; if ($strippedCount > 0) { $parts[] = sprintf("%d dangerous symlink(s) stripped during extract", $strippedCount); } if ($filesQuarantined - $strippedCount > 0) { $parts[] = sprintf("%d files quarantined", $filesQuarantined - $strippedCount); } if ($filesCleaned > 0) { $parts[] = sprintf("%d cleaned in place", $filesCleaned); } if ($dbRefused > 0) { $parts[] = sprintf("%d database(s) refused as compromised", $dbRefused); } $msg = implode("; ", $parts) . ". Customer site may have been compromised at the source — recommend review."; } $report = [ "import_id" => $importId, "status" => "completed", "scan_duration_seconds" => $duration, "files_scanned" => $filesScanned, "files_clean" => $filesClean, "files_cleaned" => $filesCleaned, "files_quarantined" => $filesQuarantined, "actions" => $actions, "databases" => $databases, "summary_for_panel" => [ "show_alert" => $alert, "alert_severity" => $severity, "alert_message" => $msg, ], ]; file_put_contents($outPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n"); fprintf(STDERR, "report written: %s\n", $outPath); ' "$IMPORT_ID" "$DURATION" /tmp/scan-files-report.json /tmp/scan-dbs-report.json "$STRIPPED_SYMLINKS_FILE" "$SANITIZED_DIR/report.json" \ || die "report merge failed" log "done — exited cleanly after ${DURATION}s" exit 0