From cda432e8086d5e0b487336f4d4701505c6f1ae0a Mon Sep 17 00:00:00 2001 From: "Claude (bootstrap)" Date: Sun, 31 May 2026 11:40:44 -0700 Subject: [PATCH] scan-files: skip symlinks during file walk to avoid open_basedir aborts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cPanel cpmove tarballs contain symlinks with absolute targets pointing at the SOURCE server's filesystem (e.g. addon docroots symlinked to /home//.com/ on the cPanel host). After extract into the container, those symlinks dangle — their targets don't exist in the container's namespace AND are not under any open_basedir-allowed prefix. PHP's SplFileInfo::isFile() (called from the RecursiveIteratorIterator file-count loop) follows symlinks. The realpath check against open_basedir then fires on the symlink TARGET, not the link path, and throws RuntimeException mid-iteration — aborting the entire scan without writing report.json. Surfaced on darkside import as: PHP Fatal error: Uncaught RuntimeException: SplFileInfo::isFile(): open_basedir restriction in effect. File(/host/sanitized/.../ cybercoveconsulting.com/wp-content/db.php) is not within the allowed path(s): (/host:/tmp:/opt/whp:/scripts:/var/lib/clamav:...) Fix is two-layered: 1. RecursiveCallbackFilterIterator pre-filters symlinks via is_link() before they reach hasChildren/isFile. is_link is open_basedir-safe (it stats the link itself, doesn't resolve). Skipped count is reported on STDERR so operators see what was skipped. 2. try/catch around the per-entry isFile() as a defense-in-depth layer — if any other fs op throws mid-walk (race, planted device node, etc.) we count it as a walk_error and continue, not abort. Note that clamscan already walks the extract tree on its own pass and its default symlink posture is "don't follow" — the same posture we want here. Symlink-as-file would also be useless to quarantine (it's a 0-byte fs entry whose target is the actual artifact). Skipping symlinks therefore doesn't miss anything. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/scan-files.php | 56 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/scripts/scan-files.php b/scripts/scan-files.php index 6b96d4f..6bd939c 100755 --- a/scripts/scan-files.php +++ b/scripts/scan-files.php @@ -105,13 +105,59 @@ pclose($fh); // File count — we need files_scanned for the report. clamscan's summary // counting is suppressed; do a fast file count ourselves. +// +// Symlinks are skipped entirely: +// 1. cPanel cpmove tarballs contain symlinks with absolute targets that +// point at the SOURCE server's filesystem (e.g., /home//...) +// which don't exist inside the container. PHP's SplFileInfo::isFile() +// tries to follow the symlink, the resolved target is not under any +// open_basedir-allowed prefix, and PHP throws RuntimeException +// mid-iteration — aborting the whole scan. +// 2. clamscan itself handles symlinks via its own walk (default: does +// NOT follow them — same posture we want). Counting them here would +// double-count vs clamscan's signal anyway. +// 3. Quarantining a symlink-file is meaningless (it's a 0-byte fs entry +// whose target is the actual artifact). +// +// Use a CallbackFilterIterator that performs an lstat-based isLink() check +// BEFORE the iterator hands the entry off to RecursiveIteratorIterator's +// hasChildren / isFile follow-paths. is_link() is open_basedir-safe. +// The try/catch is a defense-in-depth belt: if any other fs-op throws +// (e.g. a symlink that races mid-walk), skip the entry rather than abort. $filesScanned = 0; -$rdi = new RecursiveDirectoryIterator($extractDir, FilesystemIterator::SKIP_DOTS); -$it = new RecursiveIteratorIterator($rdi); -foreach ($it as $entry) { - /** @var SplFileInfo $entry */ - if ($entry->isFile()) $filesScanned++; +$skippedLinks = 0; +$walkErrors = 0; + +$rdi = new RecursiveDirectoryIterator( + $extractDir, + FilesystemIterator::SKIP_DOTS | FilesystemIterator::CURRENT_AS_PATHNAME +); +$filter = new RecursiveCallbackFilterIterator($rdi, function ($pathname, $key, $iterator) use (&$skippedLinks) { + // $pathname is a string when CURRENT_AS_PATHNAME is set. + if (is_link($pathname)) { + $skippedLinks++; + return false; + } + return true; +}); +$it = new RecursiveIteratorIterator($filter, RecursiveIteratorIterator::LEAVES_ONLY); +foreach ($it as $pathname) { + try { + // is_file() will follow symlinks, but we already filtered links + // out. For regular files this is a cheap stat. + if (is_file($pathname)) { + $filesScanned++; + } + } catch (\Throwable $e) { + // Belt: a race or filesystem oddity here shouldn't bomb the whole + // scanner. Log + continue. + $walkErrors++; + } } +fwrite(STDERR, sprintf( + "scan-files: file walk: counted=%d, symlinks-skipped=%d, walk-errors=%d\n", + $filesScanned, $skippedLinks, $walkErrors +)); // -- classify + action each hit --------------------------------------------