217 lines
7.3 KiB
PHP
217 lines
7.3 KiB
PHP
|
|
<?php
|
||
|
|
/**
|
||
|
|
* scan-files.php — ClamAV scan + classify-and-action orchestrator.
|
||
|
|
*
|
||
|
|
* v1.0: quarantine-on-every-hit. No auto-cleaners enabled. The cleaner
|
||
|
|
* registry (KNOWN_REMOVABLE / REMOVABLE_WITH_BACKUP) is stubbed below
|
||
|
|
* for v1.1 expansion; see CONTRIBUTING.md for how to wire one in.
|
||
|
|
*
|
||
|
|
* Usage:
|
||
|
|
* scan-files.php --extract <dir> --quarantine <dir> --report <out.json> --import-id <id>
|
||
|
|
*
|
||
|
|
* Exit codes:
|
||
|
|
* 0 — scan completed (regardless of how many hits)
|
||
|
|
* 1 — fatal scanner error (clamscan binary missing, signature DB unreadable)
|
||
|
|
* 2 — usage error
|
||
|
|
*
|
||
|
|
* Report shape: matches spec §3, e.g.:
|
||
|
|
* {
|
||
|
|
* "files_scanned": N,
|
||
|
|
* "files_clean": N,
|
||
|
|
* "files_cleaned": 0, // always 0 in v1.0 — no cleaners yet
|
||
|
|
* "files_quarantined": N,
|
||
|
|
* "actions": [ { path, signature, action, cleaner, backup } ]
|
||
|
|
* }
|
||
|
|
*/
|
||
|
|
|
||
|
|
require __DIR__ . '/lib/safety-net.php';
|
||
|
|
|
||
|
|
const SCANNER_VERSION = '1.0.0';
|
||
|
|
|
||
|
|
$opts = getopt('', ['extract:', 'quarantine:', 'report:', 'import-id:']);
|
||
|
|
foreach (['extract', 'quarantine', 'report', 'import-id'] as $k) {
|
||
|
|
if (!isset($opts[$k])) {
|
||
|
|
fwrite(STDERR, "usage: scan-files.php --extract <dir> --quarantine <dir> --report <out.json> --import-id <id>\n");
|
||
|
|
exit(2);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
$extractDir = rtrim($opts['extract'], '/');
|
||
|
|
$quarantineDir = rtrim($opts['quarantine'], '/');
|
||
|
|
$reportPath = $opts['report'];
|
||
|
|
$importId = $opts['import-id'];
|
||
|
|
|
||
|
|
if (!is_dir($extractDir)) {
|
||
|
|
fwrite(STDERR, "scan-files: extract dir does not exist: $extractDir\n");
|
||
|
|
exit(2);
|
||
|
|
}
|
||
|
|
|
||
|
|
@mkdir($quarantineDir, 0750, true);
|
||
|
|
|
||
|
|
fwrite(STDERR, "scan-files: starting (extract=$extractDir, quarantine=$quarantineDir)\n");
|
||
|
|
|
||
|
|
// -- v1.0 cleaner registry (intentionally empty) ----------------------------
|
||
|
|
//
|
||
|
|
// Each entry maps a ClamAV signature substring -> classification +
|
||
|
|
// cleaner callable. v1.0 ships empty so EVERY hit is classified as
|
||
|
|
// QUARANTINE_ONLY. See CONTRIBUTING.md "Adding an auto-cleaner pattern"
|
||
|
|
// for how to add a tested entry.
|
||
|
|
//
|
||
|
|
// Shape (v1.1+):
|
||
|
|
// $cleaners = [
|
||
|
|
// 'php-eval-base64-prefix' => [
|
||
|
|
// 'class' => 'KNOWN_REMOVABLE',
|
||
|
|
// 'match' => fn(string $sig): bool => str_contains($sig, 'PHP.Trojan.EvalB64'),
|
||
|
|
// 'clean' => fn(string $path): bool => /* rewrite file in place; return ok */,
|
||
|
|
// ],
|
||
|
|
// ];
|
||
|
|
$cleaners = [];
|
||
|
|
|
||
|
|
// -- run clamscan recursively over the extract dir --------------------------
|
||
|
|
|
||
|
|
// We use --infected so the output is only hits, and --recursive so we
|
||
|
|
// walk subdirectories. We deliberately do NOT use --remove (we never want
|
||
|
|
// clamscan unlinking files — we control quarantine).
|
||
|
|
//
|
||
|
|
// Output format per line on a hit:
|
||
|
|
// /tmp/extract/foo/bar.php: Some.Signature.Name FOUND
|
||
|
|
$cmd = sprintf(
|
||
|
|
'clamscan --infected --recursive --no-summary --stdout %s 2>/dev/null',
|
||
|
|
escapeshellarg($extractDir)
|
||
|
|
);
|
||
|
|
|
||
|
|
$fh = popen($cmd, 'r');
|
||
|
|
if (!$fh) {
|
||
|
|
fwrite(STDERR, "scan-files: failed to spawn clamscan\n");
|
||
|
|
exit(1);
|
||
|
|
}
|
||
|
|
|
||
|
|
$hits = [];
|
||
|
|
while (($line = fgets($fh)) !== false) {
|
||
|
|
$line = rtrim($line, "\r\n");
|
||
|
|
if ($line === '' || !str_ends_with($line, ' FOUND')) continue;
|
||
|
|
// Strip trailing ' FOUND'.
|
||
|
|
$body = substr($line, 0, -6);
|
||
|
|
$colon = strrpos($body, ': ');
|
||
|
|
if ($colon === false) continue;
|
||
|
|
$path = substr($body, 0, $colon);
|
||
|
|
$sig = substr($body, $colon + 2);
|
||
|
|
if (!str_starts_with($path, $extractDir)) {
|
||
|
|
// Defensive: shouldn't happen with our invocation.
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
$hits[] = ['path' => $path, 'signature' => $sig];
|
||
|
|
}
|
||
|
|
pclose($fh);
|
||
|
|
|
||
|
|
// File count — we need files_scanned for the report. clamscan's summary
|
||
|
|
// counting is suppressed; do a fast file count ourselves.
|
||
|
|
$filesScanned = 0;
|
||
|
|
$rdi = new RecursiveDirectoryIterator($extractDir, FilesystemIterator::SKIP_DOTS);
|
||
|
|
$it = new RecursiveIteratorIterator($rdi);
|
||
|
|
foreach ($it as $entry) {
|
||
|
|
/** @var SplFileInfo $entry */
|
||
|
|
if ($entry->isFile()) $filesScanned++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// -- classify + action each hit --------------------------------------------
|
||
|
|
|
||
|
|
$actions = [];
|
||
|
|
$cleaned = 0;
|
||
|
|
$quarantined = 0;
|
||
|
|
|
||
|
|
foreach ($hits as $h) {
|
||
|
|
$path = $h['path'];
|
||
|
|
$sig = $h['signature'];
|
||
|
|
|
||
|
|
// v1.0 — every hit is QUARANTINE_ONLY because the cleaner registry
|
||
|
|
// is empty. Future work in v1.1 will iterate $cleaners and pick a
|
||
|
|
// matching cleaner.
|
||
|
|
$classification = 'QUARANTINE_ONLY';
|
||
|
|
foreach ($cleaners as $name => $entry) {
|
||
|
|
if (($entry['match'])($sig)) {
|
||
|
|
$classification = $entry['class'];
|
||
|
|
$cleanerName = $name;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
$relPath = ltrim(substr($path, strlen($extractDir)), '/');
|
||
|
|
$qPath = $quarantineDir . '/' . $relPath;
|
||
|
|
|
||
|
|
if ($classification === 'QUARANTINE_ONLY') {
|
||
|
|
// Move the whole file to quarantine; remove from extract dir so
|
||
|
|
// the rsync to /host/sanitized/ does not include it.
|
||
|
|
@mkdir(dirname($qPath), 0750, true);
|
||
|
|
if (!@rename($path, $qPath)) {
|
||
|
|
// Fall back to copy + unlink (rename across mount boundaries
|
||
|
|
// sometimes EXDEVs even though /tmp and /host are both ours).
|
||
|
|
if (@copy($path, $qPath)) {
|
||
|
|
@unlink($path);
|
||
|
|
} else {
|
||
|
|
fwrite(STDERR, "scan-files: WARN failed to quarantine $path -> $qPath\n");
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
$quarantined++;
|
||
|
|
$actions[] = [
|
||
|
|
'path' => $relPath,
|
||
|
|
'signature' => $sig,
|
||
|
|
'action' => 'quarantined',
|
||
|
|
'cleaner' => null,
|
||
|
|
'backup' => $qPath,
|
||
|
|
];
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
// v1.1+ paths:
|
||
|
|
if ($classification === 'KNOWN_REMOVABLE' || $classification === 'REMOVABLE_WITH_BACKUP') {
|
||
|
|
// Backup first, then run the cleaner.
|
||
|
|
@mkdir(dirname($qPath), 0750, true);
|
||
|
|
$backup = $qPath . '.original';
|
||
|
|
if (!@copy($path, $backup)) {
|
||
|
|
fwrite(STDERR, "scan-files: backup before clean failed: $path; quarantining instead\n");
|
||
|
|
@rename($path, $qPath);
|
||
|
|
$quarantined++;
|
||
|
|
$actions[] = [
|
||
|
|
'path' => $relPath, 'signature' => $sig,
|
||
|
|
'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath,
|
||
|
|
];
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
$cleanerOk = ($cleaners[$cleanerName]['clean'])($path);
|
||
|
|
if (!$cleanerOk) {
|
||
|
|
// Cleaner refused; fall back to quarantine.
|
||
|
|
@rename($path, $qPath);
|
||
|
|
$quarantined++;
|
||
|
|
$actions[] = [
|
||
|
|
'path' => $relPath, 'signature' => $sig,
|
||
|
|
'action' => 'quarantined', 'cleaner' => $cleanerName, 'backup' => $qPath,
|
||
|
|
];
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
$cleaned++;
|
||
|
|
$actions[] = [
|
||
|
|
'path' => $relPath, 'signature' => $sig,
|
||
|
|
'action' => 'cleaned', 'cleaner' => $cleanerName, 'backup' => $backup,
|
||
|
|
];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
$report = [
|
||
|
|
'scanner_version' => SCANNER_VERSION,
|
||
|
|
'import_id' => $importId,
|
||
|
|
'files_scanned' => $filesScanned,
|
||
|
|
'files_clean' => max(0, $filesScanned - count($hits)),
|
||
|
|
'files_cleaned' => $cleaned,
|
||
|
|
'files_quarantined' => $quarantined,
|
||
|
|
'actions' => $actions,
|
||
|
|
];
|
||
|
|
|
||
|
|
@file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
|
||
|
|
|
||
|
|
fwrite(STDERR, sprintf(
|
||
|
|
"scan-files: done — scanned=%d clean=%d cleaned=%d quarantined=%d\n",
|
||
|
|
$filesScanned, $report['files_clean'], $cleaned, $quarantined
|
||
|
|
));
|
||
|
|
exit(0);
|