diff --git a/scripts/scan-files.php b/scripts/scan-files.php index 6b96d4f..6bd939c 100755 --- a/scripts/scan-files.php +++ b/scripts/scan-files.php @@ -105,13 +105,59 @@ pclose($fh); // File count — we need files_scanned for the report. clamscan's summary // counting is suppressed; do a fast file count ourselves. +// +// Symlinks are skipped entirely: +// 1. cPanel cpmove tarballs contain symlinks with absolute targets that +// point at the SOURCE server's filesystem (e.g., /home//...) +// which don't exist inside the container. PHP's SplFileInfo::isFile() +// tries to follow the symlink, the resolved target is not under any +// open_basedir-allowed prefix, and PHP throws RuntimeException +// mid-iteration — aborting the whole scan. +// 2. clamscan itself handles symlinks via its own walk (default: does +// NOT follow them — same posture we want). Counting them here would +// double-count vs clamscan's signal anyway. +// 3. Quarantining a symlink-file is meaningless (it's a 0-byte fs entry +// whose target is the actual artifact). +// +// Use a CallbackFilterIterator that performs an lstat-based isLink() check +// BEFORE the iterator hands the entry off to RecursiveIteratorIterator's +// hasChildren / isFile follow-paths. is_link() is open_basedir-safe. +// The try/catch is a defense-in-depth belt: if any other fs-op throws +// (e.g. a symlink that races mid-walk), skip the entry rather than abort. $filesScanned = 0; -$rdi = new RecursiveDirectoryIterator($extractDir, FilesystemIterator::SKIP_DOTS); -$it = new RecursiveIteratorIterator($rdi); -foreach ($it as $entry) { - /** @var SplFileInfo $entry */ - if ($entry->isFile()) $filesScanned++; +$skippedLinks = 0; +$walkErrors = 0; + +$rdi = new RecursiveDirectoryIterator( + $extractDir, + FilesystemIterator::SKIP_DOTS | FilesystemIterator::CURRENT_AS_PATHNAME +); +$filter = new RecursiveCallbackFilterIterator($rdi, function ($pathname, $key, $iterator) use (&$skippedLinks) { + // $pathname is a string when CURRENT_AS_PATHNAME is set. + if (is_link($pathname)) { + $skippedLinks++; + return false; + } + return true; +}); +$it = new RecursiveIteratorIterator($filter, RecursiveIteratorIterator::LEAVES_ONLY); +foreach ($it as $pathname) { + try { + // is_file() will follow symlinks, but we already filtered links + // out. For regular files this is a cheap stat. + if (is_file($pathname)) { + $filesScanned++; + } + } catch (\Throwable $e) { + // Belt: a race or filesystem oddity here shouldn't bomb the whole + // scanner. Log + continue. + $walkErrors++; + } } +fwrite(STDERR, sprintf( + "scan-files: file walk: counted=%d, symlinks-skipped=%d, walk-errors=%d\n", + $filesScanned, $skippedLinks, $walkErrors +)); // -- classify + action each hit --------------------------------------------