--quarantine --report --import-id * * Exit codes: * 0 — scan completed (regardless of how many hits) * 1 — fatal scanner error (clamscan binary missing, signature DB unreadable) * 2 — usage error * * Report shape: matches spec §3, e.g.: * { * "files_scanned": N, * "files_clean": N, * "files_cleaned": 0, // always 0 in v1.0 — no cleaners yet * "files_quarantined": N, * "actions": [ { path, signature, action, cleaner, backup } ] * } */ require __DIR__ . '/lib/safety-net.php'; const SCANNER_VERSION = '1.0.0'; $opts = getopt('', ['extract:', 'quarantine:', 'report:', 'import-id:']); foreach (['extract', 'quarantine', 'report', 'import-id'] as $k) { if (!isset($opts[$k])) { fwrite(STDERR, "usage: scan-files.php --extract --quarantine --report --import-id \n"); exit(2); } } $extractDir = rtrim($opts['extract'], '/'); $quarantineDir = rtrim($opts['quarantine'], '/'); $reportPath = $opts['report']; $importId = $opts['import-id']; if (!is_dir($extractDir)) { fwrite(STDERR, "scan-files: extract dir does not exist: $extractDir\n"); exit(2); } @mkdir($quarantineDir, 0750, true); fwrite(STDERR, "scan-files: starting (extract=$extractDir, quarantine=$quarantineDir)\n"); // -- v1.0 cleaner registry (intentionally empty) ---------------------------- // // Each entry maps a ClamAV signature substring -> classification + // cleaner callable. v1.0 ships empty so EVERY hit is classified as // QUARANTINE_ONLY. See CONTRIBUTING.md "Adding an auto-cleaner pattern" // for how to add a tested entry. // // Shape (v1.1+): // $cleaners = [ // 'php-eval-base64-prefix' => [ // 'class' => 'KNOWN_REMOVABLE', // 'match' => fn(string $sig): bool => str_contains($sig, 'PHP.Trojan.EvalB64'), // 'clean' => fn(string $path): bool => /* rewrite file in place; return ok */, // ], // ]; $cleaners = []; // -- run clamscan recursively over the extract dir -------------------------- // We use --infected so the output is only hits, and --recursive so we // walk subdirectories. We deliberately do NOT use --remove (we never want // clamscan unlinking files — we control quarantine). // // Output format per line on a hit: // /tmp/extract/foo/bar.php: Some.Signature.Name FOUND $cmd = sprintf( 'clamscan --infected --recursive --no-summary --stdout %s 2>/dev/null', escapeshellarg($extractDir) ); $fh = popen($cmd, 'r'); if (!$fh) { fwrite(STDERR, "scan-files: failed to spawn clamscan\n"); exit(1); } $hits = []; while (($line = fgets($fh)) !== false) { $line = rtrim($line, "\r\n"); if ($line === '' || !str_ends_with($line, ' FOUND')) continue; // Strip trailing ' FOUND'. $body = substr($line, 0, -6); $colon = strrpos($body, ': '); if ($colon === false) continue; $path = substr($body, 0, $colon); $sig = substr($body, $colon + 2); if (!str_starts_with($path, $extractDir)) { // Defensive: shouldn't happen with our invocation. continue; } $hits[] = ['path' => $path, 'signature' => $sig]; } pclose($fh); // File count — we need files_scanned for the report. clamscan's summary // counting is suppressed; do a fast file count ourselves. $filesScanned = 0; $rdi = new RecursiveDirectoryIterator($extractDir, FilesystemIterator::SKIP_DOTS); $it = new RecursiveIteratorIterator($rdi); foreach ($it as $entry) { /** @var SplFileInfo $entry */ if ($entry->isFile()) $filesScanned++; } // -- classify + action each hit -------------------------------------------- $actions = []; $cleaned = 0; $quarantined = 0; foreach ($hits as $h) { $path = $h['path']; $sig = $h['signature']; // v1.0 — every hit is QUARANTINE_ONLY because the cleaner registry // is empty. Future work in v1.1 will iterate $cleaners and pick a // matching cleaner. $classification = 'QUARANTINE_ONLY'; foreach ($cleaners as $name => $entry) { if (($entry['match'])($sig)) { $classification = $entry['class']; $cleanerName = $name; break; } } $relPath = ltrim(substr($path, strlen($extractDir)), '/'); $qPath = $quarantineDir . '/' . $relPath; if ($classification === 'QUARANTINE_ONLY') { // Move the whole file to quarantine; remove from extract dir so // the rsync to /host/sanitized/ does not include it. @mkdir(dirname($qPath), 0750, true); if (!@rename($path, $qPath)) { // Fall back to copy + unlink (rename across mount boundaries // sometimes EXDEVs even though /tmp and /host are both ours). if (@copy($path, $qPath)) { @unlink($path); } else { fwrite(STDERR, "scan-files: WARN failed to quarantine $path -> $qPath\n"); continue; } } $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath, ]; continue; } // v1.1+ paths: if ($classification === 'KNOWN_REMOVABLE' || $classification === 'REMOVABLE_WITH_BACKUP') { // Backup first, then run the cleaner. @mkdir(dirname($qPath), 0750, true); $backup = $qPath . '.original'; if (!@copy($path, $backup)) { fwrite(STDERR, "scan-files: backup before clean failed: $path; quarantining instead\n"); @rename($path, $qPath); $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath, ]; continue; } $cleanerOk = ($cleaners[$cleanerName]['clean'])($path); if (!$cleanerOk) { // Cleaner refused; fall back to quarantine. @rename($path, $qPath); $quarantined++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'quarantined', 'cleaner' => $cleanerName, 'backup' => $qPath, ]; continue; } $cleaned++; $actions[] = [ 'path' => $relPath, 'signature' => $sig, 'action' => 'cleaned', 'cleaner' => $cleanerName, 'backup' => $backup, ]; } } $report = [ 'scanner_version' => SCANNER_VERSION, 'import_id' => $importId, 'files_scanned' => $filesScanned, 'files_clean' => max(0, $filesScanned - count($hits)), 'files_cleaned' => $cleaned, 'files_quarantined' => $quarantined, 'actions' => $actions, ]; @file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n"); fwrite(STDERR, sprintf( "scan-files: done — scanned=%d clean=%d cleaned=%d quarantined=%d\n", $filesScanned, $report['files_clean'], $cleaned, $quarantined )); exit(0);