--quarantine --report --import-id * * Exit codes: * 0 — scan completed (regardless of how many hits) * 1 — fatal scanner error (clamscan binary missing, signature DB unreadable) * 2 — usage error * * Report shape: matches spec §3, e.g.: * { * "files_scanned": N, * "files_clean": N, * "files_cleaned": 0, // always 0 in v1.0 — no cleaners yet * "files_quarantined": N, * "actions": [ { path, signature, action, cleaner, backup } ] * } */ require __DIR__ . '/lib/safety-net.php'; const SCANNER_VERSION = '1.0.0'; $opts = getopt('', ['extract:', 'quarantine:', 'report:', 'import-id:']); foreach (['extract', 'quarantine', 'report', 'import-id'] as $k) { if (!isset($opts[$k])) { fwrite(STDERR, "usage: scan-files.php --extract --quarantine --report --import-id \n"); exit(2); } } $extractDir = rtrim($opts['extract'], '/'); $quarantineDir = rtrim($opts['quarantine'], '/'); $reportPath = $opts['report']; $importId = $opts['import-id']; if (!is_dir($extractDir)) { fwrite(STDERR, "scan-files: extract dir does not exist: $extractDir\n"); exit(2); } @mkdir($quarantineDir, 0750, true); fwrite(STDERR, "scan-files: starting (extract=$extractDir, quarantine=$quarantineDir)\n"); // -- v1.0 cleaner registry (intentionally empty) ---------------------------- // // Each entry maps a ClamAV signature substring -> classification + // cleaner callable. v1.0 ships empty so EVERY hit is classified as // QUARANTINE_ONLY. See CONTRIBUTING.md "Adding an auto-cleaner pattern" // for how to add a tested entry. // // Shape (v1.1+): // $cleaners = [ // 'php-eval-base64-prefix' => [ // 'class' => 'KNOWN_REMOVABLE', // 'match' => fn(string $sig): bool => str_contains($sig, 'PHP.Trojan.EvalB64'), // 'clean' => fn(string $path): bool => /* rewrite file in place; return ok */, // ], // ]; $cleaners = []; // -- run clamscan recursively over the extract dir -------------------------- // We use --infected so the output is only hits, and --recursive so we // walk subdirectories. We deliberately do NOT use --remove (we never want // clamscan unlinking files — we control quarantine). // // Output format per line on a hit: // /tmp/extract/foo/bar.php: Some.Signature.Name FOUND $cmd = sprintf( 'clamscan --infected --recursive --no-summary --stdout %s 2>/dev/null', escapeshellarg($extractDir) ); $fh = popen($cmd, 'r'); if (!$fh) { fwrite(STDERR, "scan-files: failed to spawn clamscan\n"); exit(1); } $hits = []; while (($line = fgets($fh)) !== false) { $line = rtrim($line, "\r\n"); if ($line === '' || !str_ends_with($line, ' FOUND')) continue; // Strip trailing ' FOUND'. $body = substr($line, 0, -6); $colon = strrpos($body, ': '); if ($colon === false) continue; $path = substr($body, 0, $colon); $sig = substr($body, $colon + 2); if (!str_starts_with($path, $extractDir)) { // Defensive: shouldn't happen with our invocation. continue; } $hits[] = ['path' => $path, 'signature' => $sig]; } pclose($fh); // File count — we need files_scanned for the report. clamscan's summary // counting is suppressed; do a fast file count ourselves. // // Symlinks are skipped entirely: // 1. cPanel cpmove tarballs contain symlinks with absolute targets that // point at the SOURCE server's filesystem (e.g., /home//...) // which don't exist inside the container. PHP's SplFileInfo::isFile() // tries to follow the symlink, the resolved target is not under any // open_basedir-allowed prefix, and PHP throws RuntimeException // mid-iteration — aborting the whole scan. // 2. clamscan itself handles symlinks via its own walk (default: does // NOT follow them — same posture we want). Counting them here would // double-count vs clamscan's signal anyway. // 3. Quarantining a symlink-file is meaningless (it's a 0-byte fs entry // whose target is the actual artifact). // // Use a CallbackFilterIterator that performs an lstat-based isLink() check // BEFORE the iterator hands the entry off to RecursiveIteratorIterator's // hasChildren / isFile follow-paths. is_link() is open_basedir-safe. // The try/catch is a defense-in-depth belt: if any other fs-op throws // (e.g. a symlink that races mid-walk), skip the entry rather than abort. $filesScanned = 0; $skippedLinks = 0; $walkErrors = 0; // Silence open_basedir E_WARNINGs during the walk. cPanel cpmove tarballs // frequently contain symlinks pointing at the SOURCE server's absolute // paths (db.php -> /home///wp-content/db.php, access-logs -> // /usr/local/apache/domlogs/, etc.). PHP's open_basedir check // normalizes via realpath() even for is_link/is_file; when the symlink // target lies outside the container's allow-list (it does — there's no // /home or /usr/local in the container), PHP emits a Warning per call. // Our filter callback STILL returns the right answer (is_link returns // true even when warning), so the skip-symlink logic works regardless // of the noise — but the noise floods 100k+ lines into the import log // for a typical customer account, drowning the actually-useful // quarantine actions. // // set_error_handler limited to E_WARNING with a needle filter is // narrower than `error_reporting(0)` (we still surface non-open_basedir // warnings) and narrower than `@`-prefixing every call site (covers // PHP-internal callbacks invoked deep inside the iterator). $prevHandler = set_error_handler(function ($errno, $errstr) { if ($errno === E_WARNING && strpos($errstr, 'open_basedir restriction') !== false) { return true; // suppress } return false; // let PHP handle (or chain to previous handler) }, E_WARNING); $rdi = new RecursiveDirectoryIterator( $extractDir, FilesystemIterator::SKIP_DOTS | FilesystemIterator::CURRENT_AS_PATHNAME ); $filter = new RecursiveCallbackFilterIterator($rdi, function ($pathname, $key, $iterator) use (&$skippedLinks) { // $pathname is a string when CURRENT_AS_PATHNAME is set. if (is_link($pathname)) { $skippedLinks++; return false; } return true; }); $it = new RecursiveIteratorIterator($filter, RecursiveIteratorIterator::LEAVES_ONLY); foreach ($it as $pathname) { try { // is_file() will follow symlinks, but we already filtered links // out. For regular files this is a cheap stat. if (is_file($pathname)) { $filesScanned++; } } catch (\Throwable $e) { // Belt: a race or filesystem oddity here shouldn't bomb the whole // scanner. Log + continue. $walkErrors++; } } restore_error_handler(); fwrite(STDERR, sprintf( "scan-files: file walk: counted=%d, symlinks-skipped=%d, walk-errors=%d\n", $filesScanned, $skippedLinks, $walkErrors )); // -- classify + action each hit -------------------------------------------- $actions = []; $cleaned = 0; $quarantined = 0; foreach ($hits as $h) { $path = $h['path']; $sig = $h['signature']; // v1.0 — every hit is QUARANTINE_ONLY because the cleaner registry // is empty. Future work in v1.1 will iterate $cleaners and pick a // matching cleaner. $classification = 'QUARANTINE_ONLY'; foreach ($cleaners as $name => $entry) { if (($entry['match'])($sig)) { $classification = $entry['class']; $cleanerName = $name; break; } } $relPath = ltrim(substr($path, strlen($extractDir)), '/'); $qPath = $quarantineDir . '/' . $relPath; if ($classification === 'QUARANTINE_ONLY') { // Move the whole file to quarantine; remove from extract dir so // the rsync to /host/sanitized/ does not include it. @mkdir(dirname($qPath), 0750, true); if (!@rename($path, $qPath)) { // Fall back to copy + unlink (rename across mount boundaries // sometimes EXDEVs even though /tmp and /host are both ours). if (@copy($path, $qPath)) { @unlink($path); } else { fwrite(STDERR, "scan-files: WARN failed to quarantine $path -> $qPath\n"); continue; } } $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath, ]; continue; } // v1.1+ paths: if ($classification === 'KNOWN_REMOVABLE' || $classification === 'REMOVABLE_WITH_BACKUP') { // Backup first, then run the cleaner. @mkdir(dirname($qPath), 0750, true); $backup = $qPath . '.original'; if (!@copy($path, $backup)) { fwrite(STDERR, "scan-files: backup before clean failed: $path; quarantining instead\n"); @rename($path, $qPath); $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath, ]; continue; } $cleanerOk = ($cleaners[$cleanerName]['clean'])($path); if (!$cleanerOk) { // Cleaner refused; fall back to quarantine. @rename($path, $qPath); $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => $cleanerName, 'backup' => $qPath, ]; continue; } $cleaned++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'cleaned', 'cleaner' => $cleanerName, 'backup' => $backup, ]; } } $report = [ 'scanner_version' => SCANNER_VERSION, 'import_id' => $importId, 'files_scanned' => $filesScanned, 'files_clean' => max(0, $filesScanned - count($hits)), 'files_cleaned' => $cleaned, 'files_quarantined' => $quarantined, 'actions' => $actions, ]; @file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n"); fwrite(STDERR, sprintf( "scan-files: done — scanned=%d clean=%d cleaned=%d quarantined=%d\n", $filesScanned, $report['files_clean'], $cleaned, $quarantined )); exit(0);