Files
cpanel-importer/scripts/entrypoint.sh

220 lines
8.0 KiB
Bash
Raw Normal View History

Initial bootstrap: cpanel-importer sanitization sandbox Skeleton for the cpanel-importer Docker container — a one-shot sandbox the WHP panel invokes BEFORE extracting a customer cpmove tarball. See cpanel-import-container-spec.md (in /workspace/) for the full design. What this ships in v1.0: - Dockerfile: almalinux:10-minimal + PHP 8.4 (Remi) + ClamAV 1.4 + SaneSecurity Foxhole.PHP rules + tar/mariadb-client/rsync. Runs as UID 999 (whp-import) via the panel-side --user 999:999 flag. - scripts/entrypoint.sh: validates env, runs (optional) freshclam, drives extract -> scan-files -> scan-dbs -> rsync -> report.json. - scripts/extract.sh + scripts/lib/scan-symlinks.php: pre-extract symlink scan ported standalone from web-files/libs/CpanelBackupImporter.php (the existing 2026-05-29 whp02 destruction-vector fix). Aborts with exit 3 before tar runs if any DANGEROUS symlink is found. - scripts/scan-files.php: ClamAV walk + classify-and-action. v1.0 ships with an empty cleaner registry — every hit is QUARANTINE_ONLY. Cleaner hooks are stubbed for v1.1. - scripts/scan-dbs.php: regex MyISAM -> InnoDB rewrite (always applied), WordPress identification, and ONE WP content scan check (siteurl_external_domain). v1.1 will grow the check set. - scripts/lib/safety-net.php: container-narrow open_basedir allow-list, much tighter than the panel-side one. - .gitea/workflows/build-push.yaml: builds + smoke-tests + PHP-syntax-checks + bash-syntax-checks before pushing to repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer. - tests/build-fixtures.sh: builds cpmove-clean.tar.gz (benign WP dump) and cpmove-alfa.tar.gz (the ALFA-shell symlink-to-/etc vector) for local end-to-end testing. - README.md / CONTRIBUTING.md: docker-run invocation, bind-mount catalog, report.json schema, how to add a cleaner pattern or a WP scan signature. Local acceptance test results: - clean fixture -> status=completed, 3 MyISAM->InnoDB, no flags, 0 - ALFA fixture -> exit 1, status=failed, failed_stage=extract, "tarball contains dangerous symlinks; aborting" on stderr - compromised-siteurl fixture -> imported_into_new_server=false, .flagged file written, summary_for_panel.show_alert=true Image size: 197 MB compressed (gzipped docker save), ~397 MB unique layers extracted. Well under the spec's 600 MB compressed / 1.2 GB extracted budget. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-30 19:56:57 -07:00
#!/usr/bin/env bash
#
# entrypoint.sh — main controller for the cpanel-importer sandbox.
#
# Inputs (env, set by the panel's docker run):
# IMPORT_ID unique id for this run; used in quarantine + report paths
# IMPORT_USERNAME cPanel/WHP username the cpmove belongs to
# IMPORT_BACKUP_FILE absolute path inside the container, typically
# /host/backup/cpmove-<user>.tar.gz
# CLAMAV_REFRESH "true" to run freshclam at start (default: true)
#
# Flow (spec §0):
# 1. validate env
# 2. (optional) refresh ClamAV signatures
# 3. extract → /tmp/extract/
# 4. file scan → /tmp/scan-files-report.json
# 5. DB sanitize → /tmp/sanitized/mysql/, /tmp/scan-dbs-report.json
# 6. rsync /tmp/sanitized/ → /host/sanitized/<importid>/
# 7. write /host/sanitized/<importid>/report.json (merged)
#
# On failure at any stage we still write a partial report.json with
# status="failed" + the stage that broke, then exit non-zero.
set -euo pipefail
# --- logging ---------------------------------------------------------------
ts() { date -u +'%Y-%m-%dT%H:%M:%SZ'; }
log() { printf '[%s] %s\n' "$(ts)" "$*"; }
die() { log "FATAL: $*"; write_failure_report "$STAGE" "$*"; exit 1; }
# Buffered partial state. The final report.json is written by the merge
# step (see write_final_report); if we crash before then, write_failure_report
# emits whatever partial pieces exist.
STAGE="init"
START_TS="$(date -u +%s)"
write_failure_report() {
local stage="$1"
local msg="$2"
local out_dir="/host/sanitized/${IMPORT_ID:-unknown}"
# mkdir AND the report write can both fail (mount RO, missing
# /host/sanitized, etc.); we log every failure to stderr and never
# let the report-writer abort the script.
if ! mkdir -p "$out_dir" 2>/dev/null; then
log "WARN: failure-report mkdir failed for $out_dir; report will not be persisted"
return 0
fi
if ! cat > "$out_dir/report.json" 2>/dev/null <<JSON
{
"import_id": "${IMPORT_ID:-unknown}",
"status": "failed",
"failed_stage": "$stage",
"error": $(printf '%s' "$msg" | php -r 'echo json_encode(stream_get_contents(STDIN));' 2>/dev/null || echo '"(unencodable)"'),
"scan_duration_seconds": $(( $(date -u +%s) - START_TS )),
"files": null,
"databases": null
}
JSON
then
log "WARN: failure-report write failed for $out_dir/report.json"
fi
}
# --- env validation --------------------------------------------------------
STAGE="validate_env"
log "cpanel-importer starting (container UID=$(id -u) GID=$(id -g))"
: "${IMPORT_ID:?IMPORT_ID env var is required}"
: "${IMPORT_USERNAME:?IMPORT_USERNAME env var is required}"
: "${IMPORT_BACKUP_FILE:?IMPORT_BACKUP_FILE env var is required}"
CLAMAV_REFRESH="${CLAMAV_REFRESH:-true}"
log "import_id=$IMPORT_ID username=$IMPORT_USERNAME backup=$IMPORT_BACKUP_FILE"
if [[ ! -f "$IMPORT_BACKUP_FILE" ]]; then
die "backup file does not exist or is not a regular file: $IMPORT_BACKUP_FILE"
fi
# Make sure the output dirs exist (they're bind mounts, so we trust the
# host to have created them, but mkdir -p is harmless).
QUARANTINE_DIR="/host/quarantine/$IMPORT_ID"
SANITIZED_DIR="/host/sanitized/$IMPORT_ID"
mkdir -p "$QUARANTINE_DIR" "$SANITIZED_DIR" \
|| die "cannot create quarantine/sanitized output dirs (are the bind mounts RW?)"
# Container-internal scratch space (mounted as tmpfs by the panel).
EXTRACT_DIR="/tmp/extract"
WORK_DIR="/tmp/sanitized"
mkdir -p "$EXTRACT_DIR" "$WORK_DIR/mysql"
# --- refresh ClamAV signatures --------------------------------------------
STAGE="freshclam"
if [[ "$CLAMAV_REFRESH" == "true" ]]; then
log "refreshing ClamAV signatures (freshclam)"
# freshclam is allowed to fail (e.g., container has no outbound net);
# we proceed with the baseline rules from build time + log a warning.
if ! freshclam --no-warnings >/tmp/freshclam.log 2>&1; then
log "WARN: freshclam failed; proceeding with build-time signature DB"
tail -20 /tmp/freshclam.log || true
fi
else
log "CLAMAV_REFRESH=false; skipping freshclam"
fi
# --- extract the cpmove ----------------------------------------------------
STAGE="extract"
log "stage: extract"
if ! /scripts/extract.sh "$IMPORT_BACKUP_FILE" "$EXTRACT_DIR" "$IMPORT_USERNAME"; then
die "extract.sh failed; see stderr above"
fi
# --- ClamAV scan + auto-clean/quarantine ----------------------------------
STAGE="scan_files"
log "stage: scan_files"
php /scripts/scan-files.php \
--extract "$EXTRACT_DIR" \
--quarantine "$QUARANTINE_DIR" \
--report /tmp/scan-files-report.json \
--import-id "$IMPORT_ID" \
|| die "scan-files.php failed; see stderr above"
# --- DB engine swap + WP content scan -------------------------------------
STAGE="scan_dbs"
log "stage: scan_dbs"
php /scripts/scan-dbs.php \
--extract "$EXTRACT_DIR" \
--out "$WORK_DIR/mysql" \
--final-prefix "$SANITIZED_DIR/mysql" \
--report /tmp/scan-dbs-report.json \
--import-id "$IMPORT_ID" \
--username "$IMPORT_USERNAME" \
|| die "scan-dbs.php failed; see stderr above"
# --- rsync cleaned tree to /host/sanitized --------------------------------
STAGE="rsync_out"
log "stage: rsync_out"
# Copy the (now-cleaned) extracted tree to the sanitized output. We exclude
# files that scan-files.php quarantined — they are NOT present in the
# extract dir anymore (the scanner moved them), so this is the cleaned
# tree by construction.
rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \
"$EXTRACT_DIR"/ "$SANITIZED_DIR/extracted/" \
|| die "rsync to sanitized dir failed"
# Then drop the cleaned .sql files in place too.
rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \
"$WORK_DIR/mysql"/ "$SANITIZED_DIR/mysql/" \
|| die "rsync of cleaned .sql files failed"
# --- merge per-stage reports into the final report.json -------------------
STAGE="write_report"
log "stage: write_report"
DURATION=$(( $(date -u +%s) - START_TS ))
php -r '
$importId = $argv[1];
$duration = (int) $argv[2];
$filesPath = $argv[3];
$dbsPath = $argv[4];
$outPath = $argv[5];
$files = is_file($filesPath) ? json_decode(file_get_contents($filesPath), true) : null;
$dbs = is_file($dbsPath) ? json_decode(file_get_contents($dbsPath), true) : null;
$filesScanned = $files["files_scanned"] ?? 0;
$filesClean = $files["files_clean"] ?? 0;
$filesCleaned = $files["files_cleaned"] ?? 0;
$filesQuarantined = $files["files_quarantined"] ?? 0;
$actions = $files["actions"] ?? [];
$databases = $dbs["databases"] ?? [];
$dbRefused = 0;
foreach ($databases as $db) {
if (($db["imported_into_new_server"] ?? true) === false) $dbRefused++;
}
$severity = "info";
$alert = false;
$msg = "Sanitization clean: no malware signatures detected.";
if ($filesQuarantined > 0 || $dbRefused > 0) {
$alert = true;
$severity = ($filesQuarantined > 50 || $dbRefused > 0) ? "warning" : "info";
$msg = sprintf(
"%d files quarantined + %d cleaned in place; %d database(s) refused as compromised. Customer site may have been compromised at the source — recommend review.",
$filesQuarantined, $filesCleaned, $dbRefused
);
}
$report = [
"import_id" => $importId,
"status" => "completed",
"scan_duration_seconds" => $duration,
"files_scanned" => $filesScanned,
"files_clean" => $filesClean,
"files_cleaned" => $filesCleaned,
"files_quarantined" => $filesQuarantined,
"actions" => $actions,
"databases" => $databases,
"summary_for_panel" => [
"show_alert" => $alert,
"alert_severity" => $severity,
"alert_message" => $msg,
],
];
file_put_contents($outPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
fprintf(STDERR, "report written: %s\n", $outPath);
' "$IMPORT_ID" "$DURATION" /tmp/scan-files-report.json /tmp/scan-dbs-report.json "$SANITIZED_DIR/report.json" \
|| die "report merge failed"
log "done — exited cleanly after ${DURATION}s"
exit 0