diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index d3125a3..42d98f8 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -159,15 +159,18 @@ rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo STAGE="write_report" log "stage: write_report" DURATION=$(( $(date -u +%s) - START_TS )) +STRIPPED_SYMLINKS_FILE="$EXTRACT_DIR/.cpanel-importer-stripped-symlinks.json" php -r ' -$importId = $argv[1]; -$duration = (int) $argv[2]; -$filesPath = $argv[3]; -$dbsPath = $argv[4]; -$outPath = $argv[5]; +$importId = $argv[1]; +$duration = (int) $argv[2]; +$filesPath = $argv[3]; +$dbsPath = $argv[4]; +$strippedPath = $argv[5]; +$outPath = $argv[6]; -$files = is_file($filesPath) ? json_decode(file_get_contents($filesPath), true) : null; -$dbs = is_file($dbsPath) ? json_decode(file_get_contents($dbsPath), true) : null; +$files = is_file($filesPath) ? json_decode(file_get_contents($filesPath), true) : null; +$dbs = is_file($dbsPath) ? json_decode(file_get_contents($dbsPath), true) : null; +$stripped = is_file($strippedPath) ? json_decode(file_get_contents($strippedPath), true) : null; $filesScanned = $files["files_scanned"] ?? 0; $filesClean = $files["files_clean"] ?? 0; @@ -176,6 +179,18 @@ $filesQuarantined = $files["files_quarantined"] ?? 0; $actions = $files["actions"] ?? []; $databases = $dbs["databases"] ?? []; +// Prepend the stripped-symlinks actions from extract.sh so the operator +// sees them at the top of the actions[] table on the results page. Bumps +// files_quarantined because the strip-action is morally equivalent to a +// quarantine - the entry was not extracted, the symlink file is "in the +// archive but absent from the cleaned tree". +$strippedActions = $stripped["actions"] ?? []; +$strippedCount = count($strippedActions); +if ($strippedCount > 0) { + $actions = array_merge($strippedActions, $actions); + $filesQuarantined += $strippedCount; +} + $dbRefused = 0; foreach ($databases as $db) { if (($db["imported_into_new_server"] ?? true) === false) $dbRefused++; @@ -184,13 +199,24 @@ foreach ($databases as $db) { $severity = "info"; $alert = false; $msg = "Sanitization clean: no malware signatures detected."; -if ($filesQuarantined > 0 || $dbRefused > 0) { +if ($filesQuarantined > 0 || $dbRefused > 0 || $strippedCount > 0) { $alert = true; - $severity = ($filesQuarantined > 50 || $dbRefused > 0) ? "warning" : "info"; - $msg = sprintf( - "%d files quarantined + %d cleaned in place; %d database(s) refused as compromised. Customer site may have been compromised at the source — recommend review.", - $filesQuarantined, $filesCleaned, $dbRefused - ); + $severity = ($filesQuarantined > 50 || $dbRefused > 0 || $strippedCount > 0) ? "warning" : "info"; + $parts = []; + if ($strippedCount > 0) { + $parts[] = sprintf("%d dangerous symlink(s) stripped during extract", $strippedCount); + } + if ($filesQuarantined - $strippedCount > 0) { + $parts[] = sprintf("%d files quarantined", $filesQuarantined - $strippedCount); + } + if ($filesCleaned > 0) { + $parts[] = sprintf("%d cleaned in place", $filesCleaned); + } + if ($dbRefused > 0) { + $parts[] = sprintf("%d database(s) refused as compromised", $dbRefused); + } + $msg = implode("; ", $parts) + . ". Customer site may have been compromised at the source — recommend review."; } $report = [ @@ -212,7 +238,7 @@ $report = [ file_put_contents($outPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n"); fprintf(STDERR, "report written: %s\n", $outPath); -' "$IMPORT_ID" "$DURATION" /tmp/scan-files-report.json /tmp/scan-dbs-report.json "$SANITIZED_DIR/report.json" \ +' "$IMPORT_ID" "$DURATION" /tmp/scan-files-report.json /tmp/scan-dbs-report.json "$STRIPPED_SYMLINKS_FILE" "$SANITIZED_DIR/report.json" \ || die "report merge failed" log "done — exited cleanly after ${DURATION}s" diff --git a/scripts/extract.sh b/scripts/extract.sh index a3ba98c..07cd824 100755 --- a/scripts/extract.sh +++ b/scripts/extract.sh @@ -1,19 +1,25 @@ #!/usr/bin/env bash # -# extract.sh — pre-extract symlink scan + cpmove untar. +# extract.sh — symlink scan + sanitized cpmove untar. # -# Usage: extract.sh +# Usage: extract.sh [] # -# Calls scripts/lib/scan-symlinks.php first; if it reports any DANGEROUS -# findings we abort BEFORE tar runs (per spec §0 step 2). On clean, -# extracts with the same hardening flags CpanelBackupImporter::extractBackup -# uses on the panel today (see web-files/libs/CpanelBackupImporter.php). +# Calls scripts/lib/scan-symlinks.php first, then untars the cpmove with +# every DANGEROUS-classified symlink entry stripped via tar --exclude. +# The stripped-symlinks list is written as JSON to (default +# $DEST/.cpanel-importer-stripped-symlinks.json) so the merge step in +# entrypoint.sh can fold the stripped entries into report.json's actions[]. +# +# Sandbox-mode posture: never refuse. ALFA-class root symlinks and other +# DANGEROUS entries are silently excluded from extraction; the panel sees +# them as quarantine actions on the results page instead of an import abort. set -euo pipefail -TARBALL="${1:?usage: extract.sh }" -DEST="${2:?usage: extract.sh }" -USERNAME="${3:?usage: extract.sh }" +TARBALL="${1:?usage: extract.sh []}" +DEST="${2:?usage: extract.sh []}" +USERNAME="${3:?usage: extract.sh []}" +ACTIONS_OUT="${4:-${DEST}/.cpanel-importer-stripped-symlinks.json}" ts() { date -u +'%Y-%m-%dT%H:%M:%SZ'; } log() { printf '[%s] extract: %s\n' "$(ts)" "$*"; } @@ -29,15 +35,56 @@ if ! php /scripts/lib/scan-symlinks.php \ --tarball "$TARBALL" \ --username "$USERNAME" \ --report "$SYMLINK_REPORT"; then - log "scan-symlinks.php exited non-zero" + log "scan-symlinks.php exited with usage/IO error; aborting (this is not a sanitize-able state)" cat "$SYMLINK_REPORT" >&2 || true - log "ABORT: tarball contains dangerous symlinks; aborting" - # Propagate the report on stdout so entrypoint.sh can include it - # in the failure record. exit 3 fi -log "symlink scan clean (no DANGEROUS findings)" +# --- compute exclude list from dangerous findings ------------------------- + +# Build a newline-delimited list of archive_path strings for tar --exclude- +# from. Also build a JSON actions[] array so entrypoint.sh's merge step can +# fold the strip-actions into report.json without re-parsing scan-symlinks. +EXCLUDES_FILE=$(mktemp -p /tmp tar-excludes.XXXXXX) +DANGEROUS_COUNT=$(python3 - "$SYMLINK_REPORT" "$EXCLUDES_FILE" "$ACTIONS_OUT" <<'PY' +import json, sys +src, excl_path, actions_path = sys.argv[1], sys.argv[2], sys.argv[3] +try: + with open(src) as fh: + r = json.load(fh) +except Exception as e: + sys.stderr.write(f"failed to parse scan-symlinks report: {e}\n") + print(0) + sys.exit(0) + +dangerous = [f for f in r.get('findings', []) if f.get('type') == 'DANGEROUS'] +with open(excl_path, 'w') as eh: + for f in dangerous: + p = f.get('archive_path', '') + if p: + eh.write(p + '\n') + +actions = [ + { + 'action': 'stripped_dangerous_symlink', + 'path': f.get('archive_path', ''), + 'target': f.get('target', ''), + 'reason': f.get('reason', ''), + } + for f in dangerous +] +with open(actions_path, 'w') as ah: + json.dump({'actions': actions, 'count': len(actions)}, ah, indent=2) +print(len(dangerous)) +PY +) + +if [[ "$DANGEROUS_COUNT" -gt 0 ]]; then + log "stripping $DANGEROUS_COUNT dangerous symlink(s) via tar --exclude-from" + while IFS= read -r path; do + log " STRIP: $path" + done < "$EXCLUDES_FILE" +fi # --- extract -------------------------------------------------------------- @@ -56,9 +103,17 @@ log "extracting with hardened tar flags into $DEST" # uid/perm bits so the cpmove can't drop setuid binaries at us. # --no-overwrite-dir: refuse to clobber existing directory metadata, # closing one historical tar-symlink-escape vector. +# --exclude-from=$EXCLUDES_FILE: strip every DANGEROUS-classified +# symlink (target = /, /etc, /root, /boot, /proc, /sys, /dev). +# Empty file = no-op exclude. tar's --exclude pattern matching +# uses fnmatch but our archive_path entries don't contain glob +# metacharacters (they came verbatim from `tar -tvf`), so the +# match is effectively a literal-path skip. # --absolute-names is NOT used — leading / in a member name is stripped. cd "$DEST" -tar --no-same-owner --no-same-permissions --no-overwrite-dir $TAR_FLAGS "$TARBALL" +tar --no-same-owner --no-same-permissions --no-overwrite-dir \ + --exclude-from="$EXCLUDES_FILE" \ + $TAR_FLAGS "$TARBALL" -log "extracted OK ($(find "$DEST" -type f | wc -l) files)" +log "extracted OK ($(find "$DEST" -type f | wc -l) files; $DANGEROUS_COUNT symlinks stripped)" exit 0 diff --git a/scripts/lib/scan-symlinks.php b/scripts/lib/scan-symlinks.php index 3d36f68..43da433 100644 --- a/scripts/lib/scan-symlinks.php +++ b/scripts/lib/scan-symlinks.php @@ -9,9 +9,13 @@ * gate without dragging in the rest of the importer. * * Exit codes: - * 0 — clean (no DANGEROUS findings) - * 1 — one or more DANGEROUS findings; tarball MUST NOT be extracted - * 2 — usage / I/O error + * 0 — scan completed successfully (with or without DANGEROUS findings). + * Findings are recorded in --report; extract.sh inspects the report + * to decide which entries to --exclude from `tar -xzf`. Sandbox-mode + * posture is "sanitize, don't refuse" — the container drops the + * dangerous symlinks from extraction and records the actions in + * report.json instead of aborting the whole import. + * 2 — usage / I/O error (couldn't read tarball, couldn't write report). * * Always writes a JSON report to --report describing every absolute-target * symlink seen and the classification verdict. @@ -181,15 +185,17 @@ $report = [ @file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n"); +// Sandbox-mode posture: never refuse. Log every DANGEROUS finding to +// stderr so the panel sees them in the streamed [container] log, and let +// extract.sh inspect --report to decide which entries to exclude from +// the tar untar. Caller treats exit 0 as "scan completed; consult report". if ($dangerousCount > 0) { - fwrite(STDERR, "scan-symlinks: $dangerousCount DANGEROUS finding(s); refusing tarball\n"); + fwrite(STDERR, "scan-symlinks: $dangerousCount DANGEROUS finding(s) will be stripped during extract\n"); foreach ($findings as $f) { if ($f['type'] === 'DANGEROUS') { - fwrite(STDERR, sprintf(" %s -> %s (%s)\n", $f['archive_path'], $f['target'], $f['reason'])); + fwrite(STDERR, sprintf(" STRIP %s -> %s (%s)\n", $f['archive_path'], $f['target'], $f['reason'])); } } - exit(1); } - -fwrite(STDERR, "scan-symlinks: clean (uncertain=$uncertainCount, dangerous=0)\n"); +fwrite(STDERR, "scan-symlinks: scan complete (uncertain=$uncertainCount, dangerous=$dangerousCount)\n"); exit(0);