Initial bootstrap: cpanel-importer sanitization sandbox
Skeleton for the cpanel-importer Docker container — a one-shot sandbox the WHP panel invokes BEFORE extracting a customer cpmove tarball. See cpanel-import-container-spec.md (in /workspace/) for the full design. What this ships in v1.0: - Dockerfile: almalinux:10-minimal + PHP 8.4 (Remi) + ClamAV 1.4 + SaneSecurity Foxhole.PHP rules + tar/mariadb-client/rsync. Runs as UID 999 (whp-import) via the panel-side --user 999:999 flag. - scripts/entrypoint.sh: validates env, runs (optional) freshclam, drives extract -> scan-files -> scan-dbs -> rsync -> report.json. - scripts/extract.sh + scripts/lib/scan-symlinks.php: pre-extract symlink scan ported standalone from web-files/libs/CpanelBackupImporter.php (the existing 2026-05-29 whp02 destruction-vector fix). Aborts with exit 3 before tar runs if any DANGEROUS symlink is found. - scripts/scan-files.php: ClamAV walk + classify-and-action. v1.0 ships with an empty cleaner registry — every hit is QUARANTINE_ONLY. Cleaner hooks are stubbed for v1.1. - scripts/scan-dbs.php: regex MyISAM -> InnoDB rewrite (always applied), WordPress identification, and ONE WP content scan check (siteurl_external_domain). v1.1 will grow the check set. - scripts/lib/safety-net.php: container-narrow open_basedir allow-list, much tighter than the panel-side one. - .gitea/workflows/build-push.yaml: builds + smoke-tests + PHP-syntax-checks + bash-syntax-checks before pushing to repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer. - tests/build-fixtures.sh: builds cpmove-clean.tar.gz (benign WP dump) and cpmove-alfa.tar.gz (the ALFA-shell symlink-to-/etc vector) for local end-to-end testing. - README.md / CONTRIBUTING.md: docker-run invocation, bind-mount catalog, report.json schema, how to add a cleaner pattern or a WP scan signature. Local acceptance test results: - clean fixture -> status=completed, 3 MyISAM->InnoDB, no flags, 0 - ALFA fixture -> exit 1, status=failed, failed_stage=extract, "tarball contains dangerous symlinks; aborting" on stderr - compromised-siteurl fixture -> imported_into_new_server=false, .flagged file written, summary_for_panel.show_alert=true Image size: 197 MB compressed (gzipped docker save), ~397 MB unique layers extracted. Well under the spec's 600 MB compressed / 1.2 GB extracted budget. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
18
.editorconfig
Normal file
18
.editorconfig
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
charset = utf-8
|
||||||
|
end_of_line = lf
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
indent_style = space
|
||||||
|
indent_size = 4
|
||||||
|
|
||||||
|
[*.{yaml,yml,json,md}]
|
||||||
|
indent_size = 2
|
||||||
|
|
||||||
|
[Dockerfile]
|
||||||
|
indent_size = 4
|
||||||
|
|
||||||
|
[Makefile]
|
||||||
|
indent_style = tab
|
||||||
98
.gitea/workflows/build-push.yaml
Normal file
98
.gitea/workflows/build-push.yaml
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
name: cpanel-importer Build and Push
|
||||||
|
run-name: ${{ gitea.actor }} pushed a change to trunk
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- trunk
|
||||||
|
tags:
|
||||||
|
- '20[0-9][0-9].[0-9][0-9].[0-9]+'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
Build-and-Push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Gitea
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: repo.anhonesthost.net
|
||||||
|
username: ${{ secrets.CI_USER }}
|
||||||
|
password: ${{ secrets.CI_TOKEN }}
|
||||||
|
|
||||||
|
# Compute the version tag. If the commit is on a `YYYY.MM.NNN` tag
|
||||||
|
# we tag the image with that version; otherwise we only tag :latest
|
||||||
|
# and :<sha>.
|
||||||
|
- name: Compute tags
|
||||||
|
id: tags
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
SHA="${GITHUB_SHA:0:12}"
|
||||||
|
REG="repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer"
|
||||||
|
TAGS="${REG}:latest"$'\n'"${REG}:${SHA}"
|
||||||
|
# If this push includes a YYYY.MM.NNN tag, add it.
|
||||||
|
VER_TAG="${GITHUB_REF_NAME:-}"
|
||||||
|
if [[ "${GITHUB_REF:-}" == refs/tags/* && "$VER_TAG" =~ ^20[0-9][0-9]\.[0-9][0-9]\.[0-9]+$ ]]; then
|
||||||
|
TAGS="${TAGS}"$'\n'"${REG}:${VER_TAG}"
|
||||||
|
fi
|
||||||
|
echo "tags<<EOF" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "$TAGS" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "EOF" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "Resolved tags:"
|
||||||
|
echo "$TAGS"
|
||||||
|
|
||||||
|
# First build locally (no push) so we can run a smoke test against
|
||||||
|
# the resolved image before pushing. The build is cached by Buildx
|
||||||
|
# so the push step below re-uses layers and is near-instant.
|
||||||
|
- name: Build Image (local, for smoke test)
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: false
|
||||||
|
load: true
|
||||||
|
tags: cpanel-importer:smoke
|
||||||
|
no-cache: true
|
||||||
|
|
||||||
|
- name: Smoke test — image starts and `echo ok` works
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
# Override the entrypoint so we don't have to provide the full
|
||||||
|
# IMPORT_* env set just to verify the image runs.
|
||||||
|
out="$(docker run --rm --entrypoint /bin/echo cpanel-importer:smoke ok)"
|
||||||
|
if [[ "$out" != "ok" ]]; then
|
||||||
|
echo "smoke test failed: expected 'ok', got '$out'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "smoke test passed"
|
||||||
|
|
||||||
|
- name: PHP syntax check
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
for f in scripts/*.php scripts/lib/*.php; do
|
||||||
|
docker run --rm -v "$PWD:/src" --entrypoint php cpanel-importer:smoke -l "/src/$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Bash syntax check
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
for f in scripts/*.sh; do
|
||||||
|
docker run --rm -v "$PWD:/src" --entrypoint bash cpanel-importer:smoke -n "/src/$f"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Build and Push Image
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.tags.outputs.tags }}
|
||||||
|
cache-from: type=registry,ref=repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer:latest
|
||||||
|
cache-to: type=inline
|
||||||
32
.gitignore
vendored
Normal file
32
.gitignore
vendored
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Build artifacts
|
||||||
|
*.tar.gz
|
||||||
|
*.tgz
|
||||||
|
*.iso
|
||||||
|
|
||||||
|
# Test fixtures are generated by tests/build-fixtures.sh — do NOT check in
|
||||||
|
# the synthetic tarballs themselves; rebuild from the script.
|
||||||
|
tests/fixtures/*.tar.gz
|
||||||
|
tests/fixtures/*.tgz
|
||||||
|
|
||||||
|
# Local docker scratch
|
||||||
|
.docker-build/
|
||||||
|
.docker-cache/
|
||||||
|
|
||||||
|
# Editor noise
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Local secrets (should never exist, but defense in depth)
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
*.key
|
||||||
|
*.pem
|
||||||
|
|
||||||
|
# Local test output
|
||||||
|
/tmp/
|
||||||
|
test-output/
|
||||||
|
*.log
|
||||||
192
CONTRIBUTING.md
Normal file
192
CONTRIBUTING.md
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
# Contributing — cpanel-importer
|
||||||
|
|
||||||
|
## How to add an auto-cleaner pattern
|
||||||
|
|
||||||
|
Auto-cleaners live in `scripts/scan-files.php`, in the `$cleaners`
|
||||||
|
registry at the top of the main flow.
|
||||||
|
|
||||||
|
A cleaner has three parts:
|
||||||
|
|
||||||
|
```php
|
||||||
|
$cleaners['short-cleaner-name'] = [
|
||||||
|
'class' => 'KNOWN_REMOVABLE', // or 'REMOVABLE_WITH_BACKUP'
|
||||||
|
'match' => fn(string $sig): bool => str_contains($sig, 'PHP.Trojan.EvalB64'),
|
||||||
|
'clean' => function (string $path): bool {
|
||||||
|
// Read $path, transform, write back; return true on success.
|
||||||
|
// The file at $path is the LIVE extracted file — your edit
|
||||||
|
// here is what ends up in /host/sanitized/<id>/extracted/.
|
||||||
|
// The original has ALREADY been backed up to <path>.original
|
||||||
|
// by the orchestrator before this is called.
|
||||||
|
},
|
||||||
|
];
|
||||||
|
```
|
||||||
|
|
||||||
|
### Safety checklist before merging a new cleaner
|
||||||
|
|
||||||
|
1. **Backup is guaranteed.** The orchestrator copies the file to
|
||||||
|
`<quarantine>/<relpath>.original` BEFORE calling `clean()`. Verify
|
||||||
|
this is still true in `scan-files.php` if you refactor the dispatch.
|
||||||
|
2. **Cleaner is idempotent.** Running it twice on the same file must
|
||||||
|
produce the same output the second time as the first.
|
||||||
|
3. **Cleaner is conservative.** If the file does NOT match your
|
||||||
|
transform exactly, return `false` (the orchestrator will fall back
|
||||||
|
to quarantining). Never "best-effort" a half-clean.
|
||||||
|
4. **Cleaner has a regression test.** Add a fixture under
|
||||||
|
`tests/fixtures/cleaner-<name>/` with input + expected output, and
|
||||||
|
exercise it from `tests/run-tests.sh` (or your CI step).
|
||||||
|
5. **Cleaner classification is correct.**
|
||||||
|
- `KNOWN_REMOVABLE` = the whole pattern is known-safe to strip.
|
||||||
|
- `REMOVABLE_WITH_BACKUP` = legit file with injected lines; we are
|
||||||
|
confident in surgical removal but back up anyway.
|
||||||
|
- `QUARANTINE_ONLY` = no clean variant; don't write a `clean()`.
|
||||||
|
6. **Signature match is tight.** Prefer
|
||||||
|
`str_contains($sig, 'specific-sig-name')` over broad regex matches.
|
||||||
|
A false-positive cleaner can corrupt customer files.
|
||||||
|
|
||||||
|
### Manual test loop
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t cpanel-importer:dev .
|
||||||
|
# Place a known-infected synthetic file under tests/fixtures/cleaner-X/in/
|
||||||
|
# Run scan-files.php directly against it:
|
||||||
|
docker run --rm \
|
||||||
|
--entrypoint /scripts/scan-files.php \
|
||||||
|
-v "$PWD/tests/fixtures/cleaner-X/in:/tmp/extract" \
|
||||||
|
-v "$PWD/tests/fixtures/cleaner-X/quarantine:/host/quarantine" \
|
||||||
|
cpanel-importer:dev \
|
||||||
|
--extract /tmp/extract --quarantine /host/quarantine \
|
||||||
|
--report /tmp/r.json --import-id test
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to add a WordPress content scan signature
|
||||||
|
|
||||||
|
Scan checks live in `scripts/scan-dbs.php`, in `wp_content_scan()`.
|
||||||
|
|
||||||
|
Each check should produce a flag dict on hit:
|
||||||
|
|
||||||
|
```php
|
||||||
|
$flags[] = [
|
||||||
|
'severity' => 'high', // 'high' refuses the DB (per default threshold N=1)
|
||||||
|
// 'medium' / 'low' flag in the report but allow import
|
||||||
|
'code' => 'short_machine_readable_code',
|
||||||
|
'details' => 'Human-readable explanation including the matched value(s).',
|
||||||
|
];
|
||||||
|
```
|
||||||
|
|
||||||
|
### Safety checklist
|
||||||
|
|
||||||
|
1. **Severity reflects confidence.** Use `high` only when a false
|
||||||
|
positive is acceptable for the customer (they re-import via the
|
||||||
|
"import anyway" UI button). Errors of measurement here translate
|
||||||
|
directly to admin support tickets.
|
||||||
|
2. **Check is fast.** The whole `.sql` dump is in memory as a string;
|
||||||
|
prefer `preg_match` on the raw string or a pre-built map (see
|
||||||
|
`extract_wp_options()`) over re-parsing the full dump.
|
||||||
|
3. **Check is well-tested.** Add a fixture under
|
||||||
|
`tests/fixtures/wp-scan-<code>/` with a synthetic dump that
|
||||||
|
triggers the flag and one that does not.
|
||||||
|
4. **Allow-list awareness.** If the check is comparing a value against
|
||||||
|
the customer's domain list, use
|
||||||
|
`domain_in_allowlist($host, $allowedDomains)` so subdomain matches
|
||||||
|
work consistently with the rest of the scanner.
|
||||||
|
5. **Don't break engine swap.** `wp_content_scan()` runs AFTER the
|
||||||
|
engine swap on the same `$rewritten` string. Both your check and
|
||||||
|
the engine swap must be tolerant of each other's output.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to test locally
|
||||||
|
|
||||||
|
### Build the image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t cpanel-importer:dev .
|
||||||
|
```
|
||||||
|
|
||||||
|
Confirm the image is under the budget:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker images cpanel-importer:dev --format '{{.Size}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Target: < 1 GB extracted (spec asks < 600 MB compressed for prod, but
|
||||||
|
local builds typically come in around 700–900 MB extracted including
|
||||||
|
ClamAV signature DBs).
|
||||||
|
|
||||||
|
### Build the fixtures
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bash tests/build-fixtures.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Two tarballs land under `tests/fixtures/`:
|
||||||
|
- `cpmove-clean.tar.gz` — a benign cpmove with a WordPress MyISAM dump.
|
||||||
|
- `cpmove-alfa.tar.gz` — same shape PLUS an ALFA-style symlink to /etc.
|
||||||
|
|
||||||
|
### Run against the clean fixture
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p /tmp/test-quarantine /tmp/test-sanitized
|
||||||
|
docker run --rm \
|
||||||
|
-e IMPORT_ID=test-clean \
|
||||||
|
-e IMPORT_USERNAME=testuser \
|
||||||
|
-e IMPORT_BACKUP_FILE=/host/backup/cpmove-clean.tar.gz \
|
||||||
|
-e CLAMAV_REFRESH=false \
|
||||||
|
-v "$PWD/tests/fixtures/cpmove-clean.tar.gz:/host/backup/cpmove-clean.tar.gz:ro" \
|
||||||
|
-v /tmp/test-quarantine:/host/quarantine \
|
||||||
|
-v /tmp/test-sanitized:/host/sanitized \
|
||||||
|
cpanel-importer:dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect `status=completed`, MyISAM count > 0, no flags, exit 0.
|
||||||
|
|
||||||
|
### Run against the ALFA fixture
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm \
|
||||||
|
-e IMPORT_ID=test-alfa \
|
||||||
|
-e IMPORT_USERNAME=testuser \
|
||||||
|
-e IMPORT_BACKUP_FILE=/host/backup/cpmove-alfa.tar.gz \
|
||||||
|
-e CLAMAV_REFRESH=false \
|
||||||
|
-v "$PWD/tests/fixtures/cpmove-alfa.tar.gz:/host/backup/cpmove-alfa.tar.gz:ro" \
|
||||||
|
-v /tmp/test-quarantine:/host/quarantine \
|
||||||
|
-v /tmp/test-sanitized:/host/sanitized \
|
||||||
|
cpanel-importer:dev
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect non-zero exit, `status=failed`, `failed_stage=extract`, and
|
||||||
|
stderr from inside the container containing
|
||||||
|
`tarball contains dangerous symlinks; aborting`.
|
||||||
|
|
||||||
|
### Iterating on PHP / shell scripts
|
||||||
|
|
||||||
|
The `scripts/` directory is `COPY`ed in late in the Dockerfile, so
|
||||||
|
edits there only re-trigger the last layer of the build — typical
|
||||||
|
turnaround is ~5 seconds.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Code style
|
||||||
|
|
||||||
|
- Bash scripts: `set -euo pipefail`, absolute paths only, every external
|
||||||
|
command on its own logical line, comment each non-obvious flag.
|
||||||
|
- PHP scripts: 4-space indent, single quotes for non-interpolated
|
||||||
|
strings, `<?php` opener on line 1, no closing `?>`.
|
||||||
|
- All scripts must be idempotent — the worker may be re-run against the
|
||||||
|
same `IMPORT_ID` on retry; second runs must overwrite the prior
|
||||||
|
`report.json` cleanly.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## CI
|
||||||
|
|
||||||
|
Pushes to `trunk` build + push the image to
|
||||||
|
`repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer:latest` and
|
||||||
|
`...:<sha>`. Pushes of a `YYYY.MM.NNN` tag additionally tag
|
||||||
|
`...:YYYY.MM.NNN`. CI runs the smoke test (image starts and
|
||||||
|
`echo ok` runs) and PHP `-l` / `bash -n` syntax checks on every script
|
||||||
|
before pushing.
|
||||||
|
|
||||||
|
See `.gitea/workflows/build-push.yaml`.
|
||||||
166
Dockerfile
Normal file
166
Dockerfile
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# cpanel-importer — sanitization sandbox for cPanel cpmove tarballs.
|
||||||
|
#
|
||||||
|
# See cpanel-import-container-spec.md §1 for the full design.
|
||||||
|
#
|
||||||
|
# Build: docker build -t cpanel-importer:dev .
|
||||||
|
# Run: see README.md for the docker run invocation the WHP panel uses.
|
||||||
|
|
||||||
|
FROM almalinux:10-minimal
|
||||||
|
|
||||||
|
LABEL org.opencontainers.image.title="cpanel-importer"
|
||||||
|
LABEL org.opencontainers.image.description="cPanel cpmove sanitization sandbox (ClamAV + SaneSecurity + WP content scan)"
|
||||||
|
LABEL org.opencontainers.image.source="https://repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer"
|
||||||
|
LABEL org.opencontainers.image.licenses="MIT"
|
||||||
|
|
||||||
|
ARG TARGETARCH=amd64
|
||||||
|
# UID/GID of the unprivileged worker. Matches the spec — panel calls
|
||||||
|
# `docker run --user 999:999`, so this UID must actually exist inside the
|
||||||
|
# image (the EPEL `clamav` and `php` user accounts collide with low UIDs;
|
||||||
|
# 999 is well clear of them).
|
||||||
|
ARG WHP_UID=999
|
||||||
|
ARG WHP_GID=999
|
||||||
|
|
||||||
|
ENV LANG=C.UTF-8 \
|
||||||
|
LC_ALL=C.UTF-8 \
|
||||||
|
PHP_INI_DIR=/etc/php.d
|
||||||
|
|
||||||
|
# Single RUN to minimize layers and image size. Cleans dnf cache and
|
||||||
|
# the SaneSecurity rsync temp files at the end of the layer.
|
||||||
|
#
|
||||||
|
# Pinning strategy:
|
||||||
|
# - PHP 8.4: AlmaLinux 10 stock ships PHP 8.3 only; the spec asks for
|
||||||
|
# 8.4 specifically. We add Remi's modular repo and enable the
|
||||||
|
# `php:remi-8.4` stream. We DO NOT pin to a specific 8.4.X because
|
||||||
|
# Remi rolls security patches into the same minor and an exact pin
|
||||||
|
# would block updates.
|
||||||
|
# - clamav / clamav-update: track the AL10 EPEL stream. CI builds
|
||||||
|
# monthly so signature DB age is bounded.
|
||||||
|
# - SaneSecurity: rsync at build time, then again at container start
|
||||||
|
# via `freshclam` (with the SaneSecurity third-party DBs configured).
|
||||||
|
#
|
||||||
|
# Ordering note: clamav-filesystem's RPM scripts auto-create a
|
||||||
|
# `virusgroup` system group at the next free GID. If we let dnf install
|
||||||
|
# clamav first, that lands at GID 999 — which then collides with the
|
||||||
|
# UID/GID we want for whp-import. We pre-create our user FIRST so
|
||||||
|
# virusgroup ends up at 998.
|
||||||
|
RUN set -eux; \
|
||||||
|
# microdnf is what almalinux:10-minimal ships with by default.
|
||||||
|
microdnf -y install --setopt=install_weak_deps=0 \
|
||||||
|
epel-release \
|
||||||
|
dnf \
|
||||||
|
shadow-utils \
|
||||||
|
; \
|
||||||
|
# Add Remi's repo for PHP 8.4 (AL10 stock has 8.3 only).
|
||||||
|
dnf -y --setopt=install_weak_deps=0 install \
|
||||||
|
https://rpms.remirepo.net/enterprise/remi-release-10.rpm ; \
|
||||||
|
dnf -y --setopt=install_weak_deps=0 module reset php ; \
|
||||||
|
dnf -y --setopt=install_weak_deps=0 module enable php:remi-8.4 ; \
|
||||||
|
# Pre-create the worker BEFORE installing clamav so virusgroup
|
||||||
|
# doesn't claim our GID.
|
||||||
|
groupadd --system --gid ${WHP_GID} whp-import ; \
|
||||||
|
useradd --system --uid ${WHP_UID} --gid ${WHP_GID} \
|
||||||
|
--home-dir /opt/whp --no-create-home \
|
||||||
|
--shell /sbin/nologin whp-import ; \
|
||||||
|
dnf -y --setopt=install_weak_deps=0 install \
|
||||||
|
php-cli \
|
||||||
|
php-json \
|
||||||
|
php-mbstring \
|
||||||
|
php-pdo \
|
||||||
|
php-mysqlnd \
|
||||||
|
php-xml \
|
||||||
|
php-zip \
|
||||||
|
php-process \
|
||||||
|
clamav \
|
||||||
|
clamav-update \
|
||||||
|
tar \
|
||||||
|
gzip \
|
||||||
|
bzip2 \
|
||||||
|
xz \
|
||||||
|
mariadb \
|
||||||
|
rsync \
|
||||||
|
ca-certificates \
|
||||||
|
coreutils-single \
|
||||||
|
findutils \
|
||||||
|
which \
|
||||||
|
; \
|
||||||
|
mkdir -p /opt/whp /scripts /host/backup /host/quarantine /host/sanitized \
|
||||||
|
/var/lib/clamav /var/log/clamav ; \
|
||||||
|
# /opt/whp + /var/log/clamav owned by worker now. /var/lib/clamav
|
||||||
|
# ownership is set AFTER the freshclam build-time pull below — root
|
||||||
|
# has to be able to write there during the build.
|
||||||
|
chown -R whp-import:whp-import /opt/whp /var/log/clamav ; \
|
||||||
|
# /host/quarantine and /host/sanitized are the bind-mount RW
|
||||||
|
# targets. The panel chowns the HOST paths to UID 999 before
|
||||||
|
# invocation (see README.md). When the host path is empty Docker
|
||||||
|
# copies the IMAGE-side dir's ownership onto the new volume; we
|
||||||
|
# need that ownership to be whp-import so an empty bind mount on
|
||||||
|
# those paths still results in a writable volume. (Bind mounts to
|
||||||
|
# an EXISTING host dir keep host ownership and are independent of
|
||||||
|
# this — the panel sets up its own dirs with mode 750 owner 999.)
|
||||||
|
chown whp-import:whp-import /host/quarantine /host/sanitized ; \
|
||||||
|
# Strip dnf cache.
|
||||||
|
dnf -y clean all ; \
|
||||||
|
rm -rf /var/cache/dnf /var/cache/yum /var/cache/ldconfig/* \
|
||||||
|
/usr/share/doc /usr/share/man /usr/share/info
|
||||||
|
|
||||||
|
# Pre-seed ClamAV signature databases at build time so the first
|
||||||
|
# container run isn't dependent on freshclam succeeding before the scan.
|
||||||
|
#
|
||||||
|
# We do two passes:
|
||||||
|
# 1. freshclam (mainline ClamAV signatures: main.cvd, daily.cvd, bytecode.cvd).
|
||||||
|
# 2. rsync the SaneSecurity Foxhole.PHP DB — PHP-malware-focused, this
|
||||||
|
# is the high-value addition for our use case. Junkemailfilter rules
|
||||||
|
# are deliberately skipped (we don't scan email here).
|
||||||
|
#
|
||||||
|
# Both runs are wrapped in `|| true` so a transient network failure
|
||||||
|
# during build does not break the image build; the container also runs
|
||||||
|
# `freshclam` on start so a stale baseline gets refreshed at runtime.
|
||||||
|
COPY configs/freshclam.conf /etc/freshclam.conf
|
||||||
|
COPY configs/sanesecurity-mirror.txt /opt/whp/sanesecurity-mirror.txt
|
||||||
|
|
||||||
|
# Pre-seed signatures as root, then chown the result. We don't ship the
|
||||||
|
# privilege-switching tools (runuser/su are in util-linux full, ~2MB we
|
||||||
|
# don't need at runtime) — the worker only needs to READ /var/lib/clamav
|
||||||
|
# and the runtime freshclam refresh runs as the same UID 999 anyway, so
|
||||||
|
# ownership matters there.
|
||||||
|
RUN set -eux; \
|
||||||
|
chown whp-import:whp-import /etc/freshclam.conf ; \
|
||||||
|
# Mainline ClamAV DB pull at build time so we have something to scan
|
||||||
|
# against even if the runtime freshclam refresh fails (e.g., no net).
|
||||||
|
# freshclam has a compile-time default --user=clamupdate (UID 997)
|
||||||
|
# and tries to setuid() to it; the build-time dir is whp-import-owned
|
||||||
|
# so we tell it explicitly to stay as root for this one-shot pull.
|
||||||
|
freshclam --no-warnings --user=root || \
|
||||||
|
echo "WARN: freshclam failed during build; runtime refresh will retry" ; \
|
||||||
|
# SaneSecurity Foxhole.PHP rules. The project rotates mirrors; the
|
||||||
|
# file we COPYed lists the working rsync mirror used at build time.
|
||||||
|
SANE_MIRROR="$(cat /opt/whp/sanesecurity-mirror.txt)" ; \
|
||||||
|
rsync -av --no-motd --contimeout=30 \
|
||||||
|
--include='foxhole_filename.cdb' \
|
||||||
|
--include='foxhole_filename.cdb.sig' \
|
||||||
|
--include='foxhole_generic.cdb' \
|
||||||
|
--include='foxhole_generic.cdb.sig' \
|
||||||
|
--include='foxhole_js.cdb' \
|
||||||
|
--include='foxhole_js.cdb.sig' \
|
||||||
|
--include='foxhole_js.ndb' \
|
||||||
|
--include='foxhole_js.ndb.sig' \
|
||||||
|
--include='foxhole_mail.cdb' \
|
||||||
|
--include='foxhole_mail.cdb.sig' \
|
||||||
|
--include='foxhole_all.ndb' \
|
||||||
|
--include='foxhole_all.ndb.sig' \
|
||||||
|
--exclude='*' \
|
||||||
|
"rsync://${SANE_MIRROR}/sanesecurity/" /var/lib/clamav/ \
|
||||||
|
|| echo "WARN: SaneSecurity rsync failed during build; runtime freshclam will retry" ; \
|
||||||
|
chown -R whp-import:whp-import /var/lib/clamav ; \
|
||||||
|
chmod -R u=rwX,g=rX,o= /var/lib/clamav ; \
|
||||||
|
ls -la /var/lib/clamav/
|
||||||
|
|
||||||
|
COPY --chown=whp-import:whp-import scripts/ /scripts/
|
||||||
|
RUN chmod 0755 /scripts/entrypoint.sh /scripts/extract.sh \
|
||||||
|
/scripts/scan-files.php /scripts/scan-dbs.php
|
||||||
|
|
||||||
|
WORKDIR /opt/whp
|
||||||
|
USER whp-import
|
||||||
|
|
||||||
|
# stdin is closed — the container reads its inputs from env + bind mounts.
|
||||||
|
ENTRYPOINT ["/scripts/entrypoint.sh"]
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 An Honest Host, LLC / cloud-hosting-platform contributors
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
221
README.md
Normal file
221
README.md
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
# cpanel-importer
|
||||||
|
|
||||||
|
A **sanitization sandbox** for cPanel `cpmove` tarballs, run as a one-shot
|
||||||
|
Docker container before WHP imports a customer site.
|
||||||
|
|
||||||
|
It is **not** a full importer. The container:
|
||||||
|
|
||||||
|
1. extracts the cpmove tarball into a tmpfs scratch dir (after a
|
||||||
|
pre-extract symlink scan),
|
||||||
|
2. runs ClamAV (with SaneSecurity PHP-malware rules) over every file,
|
||||||
|
quarantining hits,
|
||||||
|
3. rewrites `ENGINE=MyISAM` → `ENGINE=InnoDB` in every `.sql` dump,
|
||||||
|
4. runs a WordPress content scan on each WP dump and refuses dumps with
|
||||||
|
high-confidence malware signals (e.g. `siteurl` pointing at a
|
||||||
|
non-customer domain),
|
||||||
|
5. rsyncs the cleaned tree to `/host/sanitized/<importid>/`,
|
||||||
|
6. emits a JSON report describing every action taken.
|
||||||
|
|
||||||
|
The WHP panel reads `/host/sanitized/<importid>/report.json` after the
|
||||||
|
container exits and hands the cleaned files off to the existing
|
||||||
|
`CpanelBackupImporter` flow (Linux-user create, MySQL DB create, file
|
||||||
|
rsync, DNS push, container provision, etc.).
|
||||||
|
|
||||||
|
**Full design:** `/workspace/cpanel-import-container-spec.md` (also
|
||||||
|
checked in at `docs/cpanel-import-container-spec.md` when this repo is
|
||||||
|
mirrored to the panel).
|
||||||
|
|
||||||
|
**Panel-side glue:** `/workspace/whp/web-files/libs/CpanelBackupImporter.php`
|
||||||
|
+ `web-files/api/cpanel-import-ajax.php` + `web-files/pages/cpanel-import-results.php`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How the panel invokes it
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run \
|
||||||
|
--rm \
|
||||||
|
--name whp-cpanel-import-${IMPORT_ID} \
|
||||||
|
--network client-net \
|
||||||
|
--user 999:999 \
|
||||||
|
--cap-drop=ALL \
|
||||||
|
--security-opt=no-new-privileges \
|
||||||
|
--read-only \
|
||||||
|
--tmpfs /tmp:rw,nosuid,nodev,exec,size=4g \
|
||||||
|
--tmpfs /var/lib/clamav:rw,nosuid,nodev,size=512m \
|
||||||
|
--volume /docker/users/${USERNAME}/userfiles/${BACKUP_NAME}:/host/backup/${BACKUP_NAME}:ro \
|
||||||
|
--volume /docker/users/${USERNAME}/.cpanel-import-quarantine:/host/quarantine:rw \
|
||||||
|
--volume /docker/users/${USERNAME}/.cpanel-import-sanitized:/host/sanitized:rw \
|
||||||
|
--env IMPORT_ID=${IMPORT_ID} \
|
||||||
|
--env IMPORT_USERNAME=${USERNAME} \
|
||||||
|
--env IMPORT_BACKUP_FILE=/host/backup/${BACKUP_NAME} \
|
||||||
|
--env CLAMAV_REFRESH=true \
|
||||||
|
--memory=4g \
|
||||||
|
--memory-swap=4g \
|
||||||
|
--cpus=2 \
|
||||||
|
--pull=missing \
|
||||||
|
repo.anhonesthost.net/cloud-hosting-platform/cpanel-importer:2026.05.NNN
|
||||||
|
```
|
||||||
|
|
||||||
|
Container exits with status `0` on success, non-zero on any failure
|
||||||
|
(missing/unreadable backup, dangerous symlink found, scanner error).
|
||||||
|
Even on failure, `/host/sanitized/<importid>/report.json` is written
|
||||||
|
with `"status": "failed"` and the failing stage.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Bind-mount catalog
|
||||||
|
|
||||||
|
| Host path | Container path | Mode | Purpose |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `/docker/users/<user>/userfiles/<tarball>` | `/host/backup/<tarball>` | RO | the cpmove input |
|
||||||
|
| `/docker/users/<user>/.cpanel-import-quarantine/` | `/host/quarantine/` | RW | files moved here on ClamAV hit |
|
||||||
|
| `/docker/users/<user>/.cpanel-import-sanitized/<importid>/` | `/host/sanitized/` | RW | cleaned output the panel reads |
|
||||||
|
|
||||||
|
Anything not listed here is **not** visible to the container. No `/etc`,
|
||||||
|
no `/usr`, no `/root`, no `/home`, no `docker.sock`. The worker runs as
|
||||||
|
UID/GID 999 with `--cap-drop=ALL --read-only`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## `report.json` schema
|
||||||
|
|
||||||
|
Written to `/host/sanitized/<importid>/report.json` at the end of every
|
||||||
|
run, success or failure.
|
||||||
|
|
||||||
|
### Success
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"import_id": "import_abc123",
|
||||||
|
"status": "completed",
|
||||||
|
"scan_duration_seconds": 143,
|
||||||
|
"files_scanned": 28471,
|
||||||
|
"files_clean": 28432,
|
||||||
|
"files_cleaned": 0,
|
||||||
|
"files_quarantined": 39,
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"path": "cpmove-testuser/homedir/public_html/example.com/ALFA_DATA/index.php",
|
||||||
|
"signature": "PHP.Webshell.ALFA",
|
||||||
|
"action": "quarantined",
|
||||||
|
"cleaner": null,
|
||||||
|
"backup": "/host/quarantine/import_abc123/cpmove-testuser/homedir/public_html/example.com/ALFA_DATA/index.php"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"databases": [
|
||||||
|
{
|
||||||
|
"dbname": "testuser_wp",
|
||||||
|
"size_bytes": 5393199573,
|
||||||
|
"engine_changes": {
|
||||||
|
"myisam_to_innodb": 17,
|
||||||
|
"row_format_dynamic_applied": 0,
|
||||||
|
"fulltext_indexes_dropped": 0
|
||||||
|
},
|
||||||
|
"wp_content_scan": {
|
||||||
|
"is_wordpress": true,
|
||||||
|
"flags": [
|
||||||
|
{
|
||||||
|
"severity": "high",
|
||||||
|
"code": "siteurl_external_domain",
|
||||||
|
"details": "wp_options.siteurl = \"http://evil.tld\" — host 'evil.tld' not in allowed domain list (example.com)"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"imported_into_new_server": false,
|
||||||
|
"flagged_sql_path": "/host/sanitized/import_abc123/mysql/testuser_wp.sql.flagged"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"summary_for_panel": {
|
||||||
|
"show_alert": true,
|
||||||
|
"alert_severity": "warning",
|
||||||
|
"alert_message": "39 files quarantined + 0 cleaned in place; 1 database(s) refused as compromised. ..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Failure
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"import_id": "import_abc123",
|
||||||
|
"status": "failed",
|
||||||
|
"failed_stage": "extract",
|
||||||
|
"error": "scan-symlinks.php exited non-zero — tarball contains DANGEROUS symlinks",
|
||||||
|
"scan_duration_seconds": 4,
|
||||||
|
"files": null,
|
||||||
|
"databases": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`failed_stage` is one of: `validate_env`, `freshclam`, `extract`,
|
||||||
|
`scan_files`, `scan_dbs`, `rsync_out`, `write_report`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Local development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the image
|
||||||
|
docker build -t cpanel-importer:dev .
|
||||||
|
|
||||||
|
# Build the synthetic fixture tarballs
|
||||||
|
bash tests/build-fixtures.sh
|
||||||
|
|
||||||
|
# Run against the clean fixture
|
||||||
|
mkdir -p /tmp/test-quarantine /tmp/test-sanitized
|
||||||
|
docker run --rm \
|
||||||
|
-e IMPORT_ID=test \
|
||||||
|
-e IMPORT_USERNAME=testuser \
|
||||||
|
-e IMPORT_BACKUP_FILE=/host/backup/cpmove-clean.tar.gz \
|
||||||
|
-e CLAMAV_REFRESH=false \
|
||||||
|
-v "$(pwd)/tests/fixtures/cpmove-clean.tar.gz:/host/backup/cpmove-clean.tar.gz:ro" \
|
||||||
|
-v /tmp/test-quarantine:/host/quarantine \
|
||||||
|
-v /tmp/test-sanitized:/host/sanitized \
|
||||||
|
cpanel-importer:dev
|
||||||
|
cat /tmp/test-sanitized/test/report.json
|
||||||
|
|
||||||
|
# Run against the ALFA-symlink fixture — must exit non-zero with a
|
||||||
|
# "dangerous symlinks" message and report.json should have
|
||||||
|
# status=failed, failed_stage=extract.
|
||||||
|
docker run --rm \
|
||||||
|
-e IMPORT_ID=test-alfa \
|
||||||
|
-e IMPORT_USERNAME=testuser \
|
||||||
|
-e IMPORT_BACKUP_FILE=/host/backup/cpmove-alfa.tar.gz \
|
||||||
|
-e CLAMAV_REFRESH=false \
|
||||||
|
-v "$(pwd)/tests/fixtures/cpmove-alfa.tar.gz:/host/backup/cpmove-alfa.tar.gz:ro" \
|
||||||
|
-v /tmp/test-quarantine:/host/quarantine \
|
||||||
|
-v /tmp/test-sanitized:/host/sanitized \
|
||||||
|
cpanel-importer:dev \
|
||||||
|
&& echo "BUG: should have exited non-zero" \
|
||||||
|
|| echo "OK: refused dangerous tarball"
|
||||||
|
cat /tmp/test-sanitized/test-alfa/report.json
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What is in this v1.0 vs. what is stubbed for v1.1+
|
||||||
|
|
||||||
|
| Feature | v1.0 | v1.1 |
|
||||||
|
|---|---|---|
|
||||||
|
| Pre-extract symlink scan | full port of `scanTarballForDangerousSymlinks` | – |
|
||||||
|
| Hardened tar extract | yes | – |
|
||||||
|
| ClamAV + SaneSecurity Foxhole.PHP rules | yes | – |
|
||||||
|
| File classification | quarantine-on-every-hit | KNOWN_REMOVABLE + REMOVABLE_WITH_BACKUP cleaners |
|
||||||
|
| MyISAM → InnoDB rewrite | yes | – |
|
||||||
|
| WP identification | yes (wp_options + wp_posts + wp_users + sentinel) | – |
|
||||||
|
| WP content scan | siteurl_external_domain only | post_content script-injection, theme/stylesheet malware patterns, user_pass leaked-hash, Wordfence regex |
|
||||||
|
| ROW_FORMAT=DYNAMIC, FULLTEXT drop | stubbed (always 0) | yes |
|
||||||
|
| Sandboxed MariaDB-in-container for SQL transforms | not present (regex transforms only) | yes |
|
||||||
|
|
||||||
|
See `CONTRIBUTING.md` for how to add a cleaner pattern or a new WP scan
|
||||||
|
signature.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- Spec: `/workspace/cpanel-import-container-spec.md`
|
||||||
|
- Panel-side importer: `/workspace/whp/web-files/libs/CpanelBackupImporter.php`
|
||||||
|
- WHP panel `safety-net.php`: `/workspace/whp/web-files/includes/safety-net.php`
|
||||||
|
- Existing CI workflow for sibling project: `/workspace/cloud-apache-container/.gitea/workflows/build-push.yaml`
|
||||||
41
configs/freshclam.conf
Normal file
41
configs/freshclam.conf
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# cpanel-importer freshclam config.
|
||||||
|
#
|
||||||
|
# Minimal subset of /etc/freshclam.conf that the EL `clamav-update`
|
||||||
|
# package ships. We run freshclam at image build time AND at container
|
||||||
|
# start time (via entrypoint.sh when CLAMAV_REFRESH=true) so the rules
|
||||||
|
# DB is reasonably current.
|
||||||
|
#
|
||||||
|
# Anything not listed here uses the package defaults.
|
||||||
|
|
||||||
|
DatabaseDirectory /var/lib/clamav
|
||||||
|
UpdateLogFile /var/log/clamav/freshclam.log
|
||||||
|
LogVerbose no
|
||||||
|
LogTime yes
|
||||||
|
LogFileMaxSize 10M
|
||||||
|
Foreground yes
|
||||||
|
# NOTE: DatabaseOwner is intentionally omitted. At build time freshclam
|
||||||
|
# runs as root and we chown the DB to whp-import after the pull. At
|
||||||
|
# runtime the entrypoint is already running as UID 999 (whp-import) via
|
||||||
|
# the docker `--user 999:999` flag, so no privilege drop is needed —
|
||||||
|
# leaving DatabaseOwner set would cause freshclam to refuse to start as
|
||||||
|
# whp-import (it tries to setuid to its configured DatabaseOwner before
|
||||||
|
# accepting the running uid is already that user).
|
||||||
|
|
||||||
|
# Mainline ClamAV signatures.
|
||||||
|
DatabaseMirror database.clamav.net
|
||||||
|
|
||||||
|
# Bound the SaneSecurity refresh attempts. SaneSecurity rules are
|
||||||
|
# secondary defense for us; the mainline ClamAV DB is the primary.
|
||||||
|
Checks 12
|
||||||
|
ConnectTimeout 30
|
||||||
|
ReceiveTimeout 60
|
||||||
|
|
||||||
|
# Skip the bytecode signatures — they target binary malware and add ~30
|
||||||
|
# MB to the rules DB with limited payoff against PHP webshells.
|
||||||
|
# (Comment out the next line to re-enable.)
|
||||||
|
Bytecode no
|
||||||
|
|
||||||
|
# Proxy support left at compile-time defaults (none). To enable, set
|
||||||
|
# HTTPProxyServer <host> and HTTPProxyPort <port>. We deliberately do
|
||||||
|
# NOT emit empty values for these — freshclam rejects empty option
|
||||||
|
# values with "Missing argument for option" and refuses to start.
|
||||||
1
configs/sanesecurity-mirror.txt
Normal file
1
configs/sanesecurity-mirror.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
rsync.sanesecurity.net
|
||||||
219
scripts/entrypoint.sh
Executable file
219
scripts/entrypoint.sh
Executable file
@@ -0,0 +1,219 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# entrypoint.sh — main controller for the cpanel-importer sandbox.
|
||||||
|
#
|
||||||
|
# Inputs (env, set by the panel's docker run):
|
||||||
|
# IMPORT_ID unique id for this run; used in quarantine + report paths
|
||||||
|
# IMPORT_USERNAME cPanel/WHP username the cpmove belongs to
|
||||||
|
# IMPORT_BACKUP_FILE absolute path inside the container, typically
|
||||||
|
# /host/backup/cpmove-<user>.tar.gz
|
||||||
|
# CLAMAV_REFRESH "true" to run freshclam at start (default: true)
|
||||||
|
#
|
||||||
|
# Flow (spec §0):
|
||||||
|
# 1. validate env
|
||||||
|
# 2. (optional) refresh ClamAV signatures
|
||||||
|
# 3. extract → /tmp/extract/
|
||||||
|
# 4. file scan → /tmp/scan-files-report.json
|
||||||
|
# 5. DB sanitize → /tmp/sanitized/mysql/, /tmp/scan-dbs-report.json
|
||||||
|
# 6. rsync /tmp/sanitized/ → /host/sanitized/<importid>/
|
||||||
|
# 7. write /host/sanitized/<importid>/report.json (merged)
|
||||||
|
#
|
||||||
|
# On failure at any stage we still write a partial report.json with
|
||||||
|
# status="failed" + the stage that broke, then exit non-zero.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# --- logging ---------------------------------------------------------------
|
||||||
|
|
||||||
|
ts() { date -u +'%Y-%m-%dT%H:%M:%SZ'; }
|
||||||
|
log() { printf '[%s] %s\n' "$(ts)" "$*"; }
|
||||||
|
die() { log "FATAL: $*"; write_failure_report "$STAGE" "$*"; exit 1; }
|
||||||
|
|
||||||
|
# Buffered partial state. The final report.json is written by the merge
|
||||||
|
# step (see write_final_report); if we crash before then, write_failure_report
|
||||||
|
# emits whatever partial pieces exist.
|
||||||
|
STAGE="init"
|
||||||
|
START_TS="$(date -u +%s)"
|
||||||
|
|
||||||
|
write_failure_report() {
|
||||||
|
local stage="$1"
|
||||||
|
local msg="$2"
|
||||||
|
local out_dir="/host/sanitized/${IMPORT_ID:-unknown}"
|
||||||
|
# mkdir AND the report write can both fail (mount RO, missing
|
||||||
|
# /host/sanitized, etc.); we log every failure to stderr and never
|
||||||
|
# let the report-writer abort the script.
|
||||||
|
if ! mkdir -p "$out_dir" 2>/dev/null; then
|
||||||
|
log "WARN: failure-report mkdir failed for $out_dir; report will not be persisted"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! cat > "$out_dir/report.json" 2>/dev/null <<JSON
|
||||||
|
{
|
||||||
|
"import_id": "${IMPORT_ID:-unknown}",
|
||||||
|
"status": "failed",
|
||||||
|
"failed_stage": "$stage",
|
||||||
|
"error": $(printf '%s' "$msg" | php -r 'echo json_encode(stream_get_contents(STDIN));' 2>/dev/null || echo '"(unencodable)"'),
|
||||||
|
"scan_duration_seconds": $(( $(date -u +%s) - START_TS )),
|
||||||
|
"files": null,
|
||||||
|
"databases": null
|
||||||
|
}
|
||||||
|
JSON
|
||||||
|
then
|
||||||
|
log "WARN: failure-report write failed for $out_dir/report.json"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- env validation --------------------------------------------------------
|
||||||
|
|
||||||
|
STAGE="validate_env"
|
||||||
|
log "cpanel-importer starting (container UID=$(id -u) GID=$(id -g))"
|
||||||
|
|
||||||
|
: "${IMPORT_ID:?IMPORT_ID env var is required}"
|
||||||
|
: "${IMPORT_USERNAME:?IMPORT_USERNAME env var is required}"
|
||||||
|
: "${IMPORT_BACKUP_FILE:?IMPORT_BACKUP_FILE env var is required}"
|
||||||
|
CLAMAV_REFRESH="${CLAMAV_REFRESH:-true}"
|
||||||
|
|
||||||
|
log "import_id=$IMPORT_ID username=$IMPORT_USERNAME backup=$IMPORT_BACKUP_FILE"
|
||||||
|
|
||||||
|
if [[ ! -f "$IMPORT_BACKUP_FILE" ]]; then
|
||||||
|
die "backup file does not exist or is not a regular file: $IMPORT_BACKUP_FILE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Make sure the output dirs exist (they're bind mounts, so we trust the
|
||||||
|
# host to have created them, but mkdir -p is harmless).
|
||||||
|
QUARANTINE_DIR="/host/quarantine/$IMPORT_ID"
|
||||||
|
SANITIZED_DIR="/host/sanitized/$IMPORT_ID"
|
||||||
|
mkdir -p "$QUARANTINE_DIR" "$SANITIZED_DIR" \
|
||||||
|
|| die "cannot create quarantine/sanitized output dirs (are the bind mounts RW?)"
|
||||||
|
|
||||||
|
# Container-internal scratch space (mounted as tmpfs by the panel).
|
||||||
|
EXTRACT_DIR="/tmp/extract"
|
||||||
|
WORK_DIR="/tmp/sanitized"
|
||||||
|
mkdir -p "$EXTRACT_DIR" "$WORK_DIR/mysql"
|
||||||
|
|
||||||
|
# --- refresh ClamAV signatures --------------------------------------------
|
||||||
|
|
||||||
|
STAGE="freshclam"
|
||||||
|
if [[ "$CLAMAV_REFRESH" == "true" ]]; then
|
||||||
|
log "refreshing ClamAV signatures (freshclam)"
|
||||||
|
# freshclam is allowed to fail (e.g., container has no outbound net);
|
||||||
|
# we proceed with the baseline rules from build time + log a warning.
|
||||||
|
if ! freshclam --no-warnings >/tmp/freshclam.log 2>&1; then
|
||||||
|
log "WARN: freshclam failed; proceeding with build-time signature DB"
|
||||||
|
tail -20 /tmp/freshclam.log || true
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log "CLAMAV_REFRESH=false; skipping freshclam"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- extract the cpmove ----------------------------------------------------
|
||||||
|
|
||||||
|
STAGE="extract"
|
||||||
|
log "stage: extract"
|
||||||
|
if ! /scripts/extract.sh "$IMPORT_BACKUP_FILE" "$EXTRACT_DIR" "$IMPORT_USERNAME"; then
|
||||||
|
die "extract.sh failed; see stderr above"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# --- ClamAV scan + auto-clean/quarantine ----------------------------------
|
||||||
|
|
||||||
|
STAGE="scan_files"
|
||||||
|
log "stage: scan_files"
|
||||||
|
php /scripts/scan-files.php \
|
||||||
|
--extract "$EXTRACT_DIR" \
|
||||||
|
--quarantine "$QUARANTINE_DIR" \
|
||||||
|
--report /tmp/scan-files-report.json \
|
||||||
|
--import-id "$IMPORT_ID" \
|
||||||
|
|| die "scan-files.php failed; see stderr above"
|
||||||
|
|
||||||
|
# --- DB engine swap + WP content scan -------------------------------------
|
||||||
|
|
||||||
|
STAGE="scan_dbs"
|
||||||
|
log "stage: scan_dbs"
|
||||||
|
php /scripts/scan-dbs.php \
|
||||||
|
--extract "$EXTRACT_DIR" \
|
||||||
|
--out "$WORK_DIR/mysql" \
|
||||||
|
--final-prefix "$SANITIZED_DIR/mysql" \
|
||||||
|
--report /tmp/scan-dbs-report.json \
|
||||||
|
--import-id "$IMPORT_ID" \
|
||||||
|
--username "$IMPORT_USERNAME" \
|
||||||
|
|| die "scan-dbs.php failed; see stderr above"
|
||||||
|
|
||||||
|
# --- rsync cleaned tree to /host/sanitized --------------------------------
|
||||||
|
|
||||||
|
STAGE="rsync_out"
|
||||||
|
log "stage: rsync_out"
|
||||||
|
# Copy the (now-cleaned) extracted tree to the sanitized output. We exclude
|
||||||
|
# files that scan-files.php quarantined — they are NOT present in the
|
||||||
|
# extract dir anymore (the scanner moved them), so this is the cleaned
|
||||||
|
# tree by construction.
|
||||||
|
rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \
|
||||||
|
"$EXTRACT_DIR"/ "$SANITIZED_DIR/extracted/" \
|
||||||
|
|| die "rsync to sanitized dir failed"
|
||||||
|
|
||||||
|
# Then drop the cleaned .sql files in place too.
|
||||||
|
rsync -a --no-owner --no-group --no-perms --chmod=Du=rwx,Dg=rx,Do=,Fu=rw,Fg=r,Fo= \
|
||||||
|
"$WORK_DIR/mysql"/ "$SANITIZED_DIR/mysql/" \
|
||||||
|
|| die "rsync of cleaned .sql files failed"
|
||||||
|
|
||||||
|
# --- merge per-stage reports into the final report.json -------------------
|
||||||
|
|
||||||
|
STAGE="write_report"
|
||||||
|
log "stage: write_report"
|
||||||
|
DURATION=$(( $(date -u +%s) - START_TS ))
|
||||||
|
php -r '
|
||||||
|
$importId = $argv[1];
|
||||||
|
$duration = (int) $argv[2];
|
||||||
|
$filesPath = $argv[3];
|
||||||
|
$dbsPath = $argv[4];
|
||||||
|
$outPath = $argv[5];
|
||||||
|
|
||||||
|
$files = is_file($filesPath) ? json_decode(file_get_contents($filesPath), true) : null;
|
||||||
|
$dbs = is_file($dbsPath) ? json_decode(file_get_contents($dbsPath), true) : null;
|
||||||
|
|
||||||
|
$filesScanned = $files["files_scanned"] ?? 0;
|
||||||
|
$filesClean = $files["files_clean"] ?? 0;
|
||||||
|
$filesCleaned = $files["files_cleaned"] ?? 0;
|
||||||
|
$filesQuarantined = $files["files_quarantined"] ?? 0;
|
||||||
|
$actions = $files["actions"] ?? [];
|
||||||
|
$databases = $dbs["databases"] ?? [];
|
||||||
|
|
||||||
|
$dbRefused = 0;
|
||||||
|
foreach ($databases as $db) {
|
||||||
|
if (($db["imported_into_new_server"] ?? true) === false) $dbRefused++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$severity = "info";
|
||||||
|
$alert = false;
|
||||||
|
$msg = "Sanitization clean: no malware signatures detected.";
|
||||||
|
if ($filesQuarantined > 0 || $dbRefused > 0) {
|
||||||
|
$alert = true;
|
||||||
|
$severity = ($filesQuarantined > 50 || $dbRefused > 0) ? "warning" : "info";
|
||||||
|
$msg = sprintf(
|
||||||
|
"%d files quarantined + %d cleaned in place; %d database(s) refused as compromised. Customer site may have been compromised at the source — recommend review.",
|
||||||
|
$filesQuarantined, $filesCleaned, $dbRefused
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$report = [
|
||||||
|
"import_id" => $importId,
|
||||||
|
"status" => "completed",
|
||||||
|
"scan_duration_seconds" => $duration,
|
||||||
|
"files_scanned" => $filesScanned,
|
||||||
|
"files_clean" => $filesClean,
|
||||||
|
"files_cleaned" => $filesCleaned,
|
||||||
|
"files_quarantined" => $filesQuarantined,
|
||||||
|
"actions" => $actions,
|
||||||
|
"databases" => $databases,
|
||||||
|
"summary_for_panel" => [
|
||||||
|
"show_alert" => $alert,
|
||||||
|
"alert_severity" => $severity,
|
||||||
|
"alert_message" => $msg,
|
||||||
|
],
|
||||||
|
];
|
||||||
|
|
||||||
|
file_put_contents($outPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
|
||||||
|
fprintf(STDERR, "report written: %s\n", $outPath);
|
||||||
|
' "$IMPORT_ID" "$DURATION" /tmp/scan-files-report.json /tmp/scan-dbs-report.json "$SANITIZED_DIR/report.json" \
|
||||||
|
|| die "report merge failed"
|
||||||
|
|
||||||
|
log "done — exited cleanly after ${DURATION}s"
|
||||||
|
exit 0
|
||||||
64
scripts/extract.sh
Executable file
64
scripts/extract.sh
Executable file
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# extract.sh — pre-extract symlink scan + cpmove untar.
|
||||||
|
#
|
||||||
|
# Usage: extract.sh <tarball> <dest> <username>
|
||||||
|
#
|
||||||
|
# Calls scripts/lib/scan-symlinks.php first; if it reports any DANGEROUS
|
||||||
|
# findings we abort BEFORE tar runs (per spec §0 step 2). On clean,
|
||||||
|
# extracts with the same hardening flags CpanelBackupImporter::extractBackup
|
||||||
|
# uses on the panel today (see web-files/libs/CpanelBackupImporter.php).
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
TARBALL="${1:?usage: extract.sh <tarball> <dest> <username>}"
|
||||||
|
DEST="${2:?usage: extract.sh <tarball> <dest> <username>}"
|
||||||
|
USERNAME="${3:?usage: extract.sh <tarball> <dest> <username>}"
|
||||||
|
|
||||||
|
ts() { date -u +'%Y-%m-%dT%H:%M:%SZ'; }
|
||||||
|
log() { printf '[%s] extract: %s\n' "$(ts)" "$*"; }
|
||||||
|
|
||||||
|
[[ -f "$TARBALL" ]] || { log "tarball not found: $TARBALL"; exit 2; }
|
||||||
|
mkdir -p "$DEST"
|
||||||
|
|
||||||
|
# --- pre-extract symlink scan ---------------------------------------------
|
||||||
|
|
||||||
|
log "scanning tarball for dangerous symlinks (cpmove vector check)"
|
||||||
|
SYMLINK_REPORT=$(mktemp -p /tmp scan-symlinks.XXXXXX.json)
|
||||||
|
if ! php /scripts/lib/scan-symlinks.php \
|
||||||
|
--tarball "$TARBALL" \
|
||||||
|
--username "$USERNAME" \
|
||||||
|
--report "$SYMLINK_REPORT"; then
|
||||||
|
log "scan-symlinks.php exited non-zero"
|
||||||
|
cat "$SYMLINK_REPORT" >&2 || true
|
||||||
|
log "ABORT: tarball contains dangerous symlinks; aborting"
|
||||||
|
# Propagate the report on stdout so entrypoint.sh can include it
|
||||||
|
# in the failure record.
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "symlink scan clean (no DANGEROUS findings)"
|
||||||
|
|
||||||
|
# --- extract --------------------------------------------------------------
|
||||||
|
|
||||||
|
# Detect compression. cpmove can be .tar.gz / .tar.bz2 / .tar.
|
||||||
|
TAR_FLAGS="-xf"
|
||||||
|
case "$TARBALL" in
|
||||||
|
*.tar.gz|*.tgz) TAR_FLAGS="-xzf" ;;
|
||||||
|
*.tar.bz2|*.tbz2) TAR_FLAGS="-xjf" ;;
|
||||||
|
*.tar.xz|*.txz) TAR_FLAGS="-xJf" ;;
|
||||||
|
*.tar) TAR_FLAGS="-xf" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
log "extracting with hardened tar flags into $DEST"
|
||||||
|
# Hardening flags (mirrored from CpanelBackupImporter::extractBackup):
|
||||||
|
# --no-same-owner / --no-same-permissions: drop archive-recorded
|
||||||
|
# uid/perm bits so the cpmove can't drop setuid binaries at us.
|
||||||
|
# --no-overwrite-dir: refuse to clobber existing directory metadata,
|
||||||
|
# closing one historical tar-symlink-escape vector.
|
||||||
|
# --absolute-names is NOT used — leading / in a member name is stripped.
|
||||||
|
cd "$DEST"
|
||||||
|
tar --no-same-owner --no-same-permissions --no-overwrite-dir $TAR_FLAGS "$TARBALL"
|
||||||
|
|
||||||
|
log "extracted OK ($(find "$DEST" -type f | wc -l) files)"
|
||||||
|
exit 0
|
||||||
46
scripts/lib/safety-net.php
Normal file
46
scripts/lib/safety-net.php
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* safety-net.php — container-narrow open_basedir allow-list.
|
||||||
|
*
|
||||||
|
* The sibling at /workspace/whp/web-files/includes/safety-net.php is the
|
||||||
|
* panel's allow-list — it includes /docker, /root/whp, /etc/whp, etc.,
|
||||||
|
* because the panel legitimately reads from those.
|
||||||
|
*
|
||||||
|
* Inside this container, the worker has a much smaller set of paths it
|
||||||
|
* needs. Anything outside this list is blocked at the PHP filesystem-
|
||||||
|
* function level (PHP enforces open_basedir in unlink/scandir/fopen/
|
||||||
|
* RecursiveDirectoryIterator/etc. AFTER symlink resolution, so a planted
|
||||||
|
* symlink-to-/proc cannot escape the allow-list).
|
||||||
|
*
|
||||||
|
* HISTORY — the same destruction-bug class that motivated the panel-side
|
||||||
|
* safety-net (whp02 /usr/bin + /etc wipe, 2026-05-28/29) is the reason
|
||||||
|
* this exists. In the container the host /etc /usr /root are not bind-
|
||||||
|
* mounted, but open_basedir gives belt-and-suspenders enforcement
|
||||||
|
* against any extracted-archive symlink walker we add later.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (function_exists('ini_set')) {
|
||||||
|
// Container-internal paths only. Notable absences:
|
||||||
|
// - /etc, /usr, /var, /root — never written to by this container
|
||||||
|
// - /docker — there is no /docker in this image
|
||||||
|
// - /home — there is no /home in this image
|
||||||
|
$allowed = implode(PATH_SEPARATOR, [
|
||||||
|
'/host', // /host/backup (RO), /host/quarantine, /host/sanitized
|
||||||
|
'/tmp', // tmpfs scratch space
|
||||||
|
'/opt/whp', // WORKDIR + per-run state
|
||||||
|
'/scripts', // our own code
|
||||||
|
'/var/lib/clamav', // ClamAV signature DB
|
||||||
|
'/var/log/clamav', // freshclam log
|
||||||
|
'/etc/freshclam.conf', // single file, read-only
|
||||||
|
'/proc/self', // pid/cgroup introspection
|
||||||
|
]);
|
||||||
|
|
||||||
|
if ((string) ini_get('open_basedir') === '') {
|
||||||
|
@ini_set('open_basedir', $allowed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Realpath cache tuning matches the panel — open_basedir adds a
|
||||||
|
// realpath() to every fs op, so a bigger cache pays back fast.
|
||||||
|
@ini_set('realpath_cache_size', '512K');
|
||||||
|
@ini_set('realpath_cache_ttl', '600');
|
||||||
|
}
|
||||||
161
scripts/lib/scan-symlinks.php
Normal file
161
scripts/lib/scan-symlinks.php
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* scan-symlinks.php — standalone port of
|
||||||
|
* CpanelBackupImporter::scanTarballForDangerousSymlinks().
|
||||||
|
*
|
||||||
|
* This is the same classification logic that ships in the WHP panel today
|
||||||
|
* (web-files/libs/CpanelBackupImporter.php, ~line 2438). Lifted into a
|
||||||
|
* standalone CLI so the container can run it as an independent pre-extract
|
||||||
|
* gate without dragging in the rest of the importer.
|
||||||
|
*
|
||||||
|
* Exit codes:
|
||||||
|
* 0 — clean (no DANGEROUS findings)
|
||||||
|
* 1 — one or more DANGEROUS findings; tarball MUST NOT be extracted
|
||||||
|
* 2 — usage / I/O error
|
||||||
|
*
|
||||||
|
* Always writes a JSON report to --report describing every absolute-target
|
||||||
|
* symlink seen and the classification verdict.
|
||||||
|
*
|
||||||
|
* SECURITY NOTE — this differs from the panel implementation in ONE way:
|
||||||
|
* The panel uses file_exists($target) on the *host* to decide whether a
|
||||||
|
* target under a dangerous prefix is BENIGN_DANGLING vs DANGEROUS. We
|
||||||
|
* are running INSIDE the container so /etc and /usr DO exist (they're
|
||||||
|
* the container's own), but `--read-only --tmpfs /tmp` plus the worker
|
||||||
|
* running as UID 999 means even DANGEROUS targets cannot reach the host.
|
||||||
|
*
|
||||||
|
* We treat any absolute-target symlink under a dangerous prefix as
|
||||||
|
* DANGEROUS regardless of `file_exists()` — this is a stricter check
|
||||||
|
* than the panel's, because in the container we *can* safely refuse to
|
||||||
|
* even try the extract on a clearly malicious tarball.
|
||||||
|
*/
|
||||||
|
|
||||||
|
require __DIR__ . '/safety-net.php';
|
||||||
|
|
||||||
|
$opts = getopt('', ['tarball:', 'username:', 'report:']);
|
||||||
|
if (!isset($opts['tarball']) || !isset($opts['report'])) {
|
||||||
|
fwrite(STDERR, "usage: scan-symlinks.php --tarball <path> --report <out.json> [--username <u>]\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
$tarPath = $opts['tarball'];
|
||||||
|
$reportPath = $opts['report'];
|
||||||
|
$username = $opts['username'] ?? '';
|
||||||
|
|
||||||
|
if (!is_file($tarPath) || !is_readable($tarPath)) {
|
||||||
|
fwrite(STDERR, "scan-symlinks: not a readable file: $tarPath\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same prefix list as the panel.
|
||||||
|
$dangerousPrefixes = [
|
||||||
|
'/etc', '/usr', '/bin', '/sbin', '/lib', '/lib64',
|
||||||
|
'/boot', '/root',
|
||||||
|
'/var/lib', '/var/log', '/var/cache', '/var/spool',
|
||||||
|
];
|
||||||
|
|
||||||
|
$findings = [];
|
||||||
|
$cpanelUsername = null;
|
||||||
|
|
||||||
|
$cmd = 'tar -tvf ' . escapeshellarg($tarPath) . ' 2>/dev/null';
|
||||||
|
$fh = @popen($cmd, 'r');
|
||||||
|
if (!$fh) {
|
||||||
|
fwrite(STDERR, "scan-symlinks: failed to spawn tar -tvf on $tarPath\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (($line = fgets($fh)) !== false) {
|
||||||
|
if ($line === '' || $line[0] !== 'l') continue;
|
||||||
|
$arrow = strpos($line, ' -> ');
|
||||||
|
if ($arrow === false) continue;
|
||||||
|
$left = substr($line, 0, $arrow);
|
||||||
|
$right = rtrim(substr($line, $arrow + 4), "\r\n");
|
||||||
|
$parts = preg_split('/\s+/', $left, 6);
|
||||||
|
if (count($parts) < 6) continue;
|
||||||
|
$archivePath = $parts[5];
|
||||||
|
$target = $right;
|
||||||
|
|
||||||
|
if ($target === '' || $target[0] !== '/') continue;
|
||||||
|
|
||||||
|
if ($cpanelUsername === null) {
|
||||||
|
if (preg_match('#^cpmove-([^/]+)/#', $archivePath, $m)) {
|
||||||
|
$cpanelUsername = $m[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// (1) user-internal — accept symlinks pointing into the customer's
|
||||||
|
// own /home/<user>/ tree. The panel rewrites these on extract.
|
||||||
|
$userInternal = false;
|
||||||
|
$usernames = [];
|
||||||
|
if ($cpanelUsername !== null && $cpanelUsername !== '') $usernames[] = $cpanelUsername;
|
||||||
|
if ($username !== '') $usernames[] = $username;
|
||||||
|
foreach ($usernames as $u) {
|
||||||
|
$prefix = '/home/' . $u . '/';
|
||||||
|
if (strpos($target, $prefix) === 0 || $target === rtrim($prefix, '/')) {
|
||||||
|
$userInternal = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (preg_match('#^/home\d+/' . preg_quote($u, '#') . '(/|$)#', $target)) {
|
||||||
|
$userInternal = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($userInternal) continue;
|
||||||
|
|
||||||
|
// (2) exact root.
|
||||||
|
$type = null;
|
||||||
|
$reason = '';
|
||||||
|
if ($target === '/') {
|
||||||
|
$type = 'DANGEROUS';
|
||||||
|
$reason = 'absolute target is root /';
|
||||||
|
} else {
|
||||||
|
// (3) — in container, every dangerous-prefix target is treated
|
||||||
|
// as DANGEROUS without a file_exists() check (see security note
|
||||||
|
// at top of file).
|
||||||
|
foreach ($dangerousPrefixes as $p) {
|
||||||
|
if ($target === $p || strpos($target, $p . '/') === 0) {
|
||||||
|
$type = 'DANGEROUS';
|
||||||
|
$reason = "absolute target resolves under system path $p";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($type === null) {
|
||||||
|
// Target is absolute, not user-internal, not under a known
|
||||||
|
// dangerous prefix. Operators want to know about these.
|
||||||
|
$type = 'UNCERTAIN';
|
||||||
|
$reason = 'absolute target outside user tree and not on dangerous-prefix list';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$findings[] = [
|
||||||
|
'type' => $type,
|
||||||
|
'archive_path' => $archivePath,
|
||||||
|
'target' => $target,
|
||||||
|
'reason' => $reason,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
pclose($fh);
|
||||||
|
|
||||||
|
$dangerousCount = count(array_filter($findings, fn($f) => $f['type'] === 'DANGEROUS'));
|
||||||
|
$uncertainCount = count(array_filter($findings, fn($f) => $f['type'] === 'UNCERTAIN'));
|
||||||
|
|
||||||
|
$report = [
|
||||||
|
'tarball' => $tarPath,
|
||||||
|
'total_findings' => count($findings),
|
||||||
|
'dangerous_count' => $dangerousCount,
|
||||||
|
'uncertain_count' => $uncertainCount,
|
||||||
|
'findings' => $findings,
|
||||||
|
];
|
||||||
|
|
||||||
|
@file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
|
||||||
|
|
||||||
|
if ($dangerousCount > 0) {
|
||||||
|
fwrite(STDERR, "scan-symlinks: $dangerousCount DANGEROUS finding(s); refusing tarball\n");
|
||||||
|
foreach ($findings as $f) {
|
||||||
|
if ($f['type'] === 'DANGEROUS') {
|
||||||
|
fwrite(STDERR, sprintf(" %s -> %s (%s)\n", $f['archive_path'], $f['target'], $f['reason']));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fwrite(STDERR, "scan-symlinks: clean (uncertain=$uncertainCount, dangerous=0)\n");
|
||||||
|
exit(0);
|
||||||
399
scripts/scan-dbs.php
Executable file
399
scripts/scan-dbs.php
Executable file
@@ -0,0 +1,399 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* scan-dbs.php — SQL dump engine swap + WordPress content scan.
|
||||||
|
*
|
||||||
|
* v1.0 scope:
|
||||||
|
* - Walk every cpmove-USER/mysql/DBNAME.sql under the extract dir.
|
||||||
|
* - ALWAYS: regex-rewrite ENGINE=MyISAM -> ENGINE=InnoDB.
|
||||||
|
* - WordPress identification: presence of wp_options/wp_posts/wp_users
|
||||||
|
* CREATE TABLEs (or prefix-variants where prefix != "wp_").
|
||||||
|
* - WP content scan: ONE check — siteurl_external_domain — comparing
|
||||||
|
* wp_options.siteurl / wp_options.home against the cpanel userdata's
|
||||||
|
* main_domain + addon-domain list.
|
||||||
|
* - If any high-confidence flag fires, the .sql file is written with
|
||||||
|
* a .flagged suffix and imported_into_new_server=false.
|
||||||
|
* - Otherwise the rewritten .sql lands in /tmp/sanitized/mysql/.
|
||||||
|
*
|
||||||
|
* v1.1 will grow the WP scan check set (post_content script-injection,
|
||||||
|
* user_pass leaked-hash, Wordfence regex). See CONTRIBUTING.md for how
|
||||||
|
* to add a check.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* scan-dbs.php --extract DIR --out DIR --report OUT.json
|
||||||
|
* --import-id ID --username USER
|
||||||
|
*
|
||||||
|
* Exit codes:
|
||||||
|
* 0 on success (regardless of flags); 1 fatal; 2 usage.
|
||||||
|
*
|
||||||
|
* NOTE: docblock above must not contain the literal sequence "* /"
|
||||||
|
* (without the space) anywhere — PHP closes the C-style comment at
|
||||||
|
* that token and parses the rest as code. This bit us once on
|
||||||
|
* the cpmove-USER /mysql glob path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
require __DIR__ . '/lib/safety-net.php';
|
||||||
|
|
||||||
|
const SCANNER_VERSION = '1.0.0';
|
||||||
|
|
||||||
|
$opts = getopt('', ['extract:', 'out:', 'report:', 'import-id:', 'username:', 'final-prefix:']);
|
||||||
|
foreach (['extract', 'out', 'report', 'import-id', 'username'] as $k) {
|
||||||
|
if (!isset($opts[$k])) {
|
||||||
|
fwrite(STDERR, "usage: scan-dbs.php --extract DIR --out DIR --report OUT.json --import-id ID --username USER [--final-prefix PATH]\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$extractDir = rtrim($opts['extract'], '/');
|
||||||
|
$outDir = rtrim($opts['out'], '/');
|
||||||
|
$reportPath = $opts['report'];
|
||||||
|
$importId = $opts['import-id'];
|
||||||
|
$username = $opts['username'];
|
||||||
|
// --final-prefix is the path .sql files will live at AFTER the rsync to
|
||||||
|
// /host/sanitized/<importid>/mysql/. We record that path in the report
|
||||||
|
// so the panel doesn't have to translate /tmp/... paths.
|
||||||
|
$finalPrefix = isset($opts['final-prefix']) ? rtrim($opts['final-prefix'], '/') : $outDir;
|
||||||
|
|
||||||
|
@mkdir($outDir, 0750, true);
|
||||||
|
|
||||||
|
fwrite(STDERR, "scan-dbs: starting (extract=$extractDir, out=$outDir)\n");
|
||||||
|
|
||||||
|
// -- find all cpmove-*/mysql/*.sql dumps -----------------------------------
|
||||||
|
|
||||||
|
$sqlFiles = [];
|
||||||
|
foreach (glob($extractDir . '/cpmove-*/mysql/*.sql') ?: [] as $f) {
|
||||||
|
if (is_file($f)) $sqlFiles[] = $f;
|
||||||
|
}
|
||||||
|
// Some cpmove layouts use cpmove-<user>/mysql/<db>.create + <db>.sql;
|
||||||
|
// glob above already covers <db>.sql which is what we care about.
|
||||||
|
|
||||||
|
if (empty($sqlFiles)) {
|
||||||
|
fwrite(STDERR, "scan-dbs: no .sql dumps found under $extractDir/cpmove-*/mysql/\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- discover the user's allowed-domain list from the cpmove userdata -----
|
||||||
|
|
||||||
|
$allowedDomains = collect_allowed_domains($extractDir, $username);
|
||||||
|
fwrite(STDERR, "scan-dbs: allowed domains for siteurl check: "
|
||||||
|
. (empty($allowedDomains) ? '(none discovered)' : implode(', ', $allowedDomains))
|
||||||
|
. "\n");
|
||||||
|
|
||||||
|
$databases = [];
|
||||||
|
|
||||||
|
foreach ($sqlFiles as $sqlPath) {
|
||||||
|
$dbName = basename($sqlPath, '.sql');
|
||||||
|
fwrite(STDERR, "scan-dbs: processing $dbName ($sqlPath)\n");
|
||||||
|
|
||||||
|
$sizeBytes = filesize($sqlPath) ?: 0;
|
||||||
|
$sql = file_get_contents($sqlPath);
|
||||||
|
if ($sql === false) {
|
||||||
|
fwrite(STDERR, "scan-dbs: WARN failed to read $sqlPath; skipping\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- ENGINE SWAP (always applied) -------------------------------------
|
||||||
|
|
||||||
|
[$rewritten, $engineCounts] = engine_swap($sql);
|
||||||
|
|
||||||
|
// --- WordPress identification + content scan -------------------------
|
||||||
|
|
||||||
|
$isWp = is_wordpress_dump($rewritten);
|
||||||
|
$flags = [];
|
||||||
|
if ($isWp) {
|
||||||
|
$flags = wp_content_scan($rewritten, $allowedDomains);
|
||||||
|
}
|
||||||
|
|
||||||
|
$highConfidence = array_filter($flags, fn($f) => ($f['severity'] ?? '') === 'high');
|
||||||
|
$refused = (bool) count($highConfidence);
|
||||||
|
|
||||||
|
$outName = $dbName . '.sql' . ($refused ? '.flagged' : '');
|
||||||
|
$outPath = $outDir . '/' . $outName;
|
||||||
|
$finalPath = $finalPrefix . '/' . $outName;
|
||||||
|
file_put_contents($outPath, $rewritten);
|
||||||
|
|
||||||
|
$databases[] = [
|
||||||
|
'dbname' => $dbName,
|
||||||
|
'size_bytes'=> $sizeBytes,
|
||||||
|
'engine_changes' => [
|
||||||
|
'myisam_to_innodb' => $engineCounts['myisam_to_innodb'],
|
||||||
|
'row_format_dynamic_applied' => 0, // v1.1
|
||||||
|
'fulltext_indexes_dropped' => 0, // v1.1
|
||||||
|
],
|
||||||
|
'wp_content_scan' => [
|
||||||
|
'is_wordpress' => $isWp,
|
||||||
|
'flags' => $flags,
|
||||||
|
],
|
||||||
|
'imported_into_new_server' => !$refused,
|
||||||
|
'sanitized_sql_path' => $refused ? null : $finalPath,
|
||||||
|
'flagged_sql_path' => $refused ? $finalPath : null,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
$report = [
|
||||||
|
'scanner_version' => SCANNER_VERSION,
|
||||||
|
'import_id' => $importId,
|
||||||
|
'databases' => $databases,
|
||||||
|
];
|
||||||
|
file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
|
||||||
|
|
||||||
|
fwrite(STDERR, "scan-dbs: done — " . count($databases) . " database(s) processed\n");
|
||||||
|
exit(0);
|
||||||
|
|
||||||
|
// ---- helpers --------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rewrite ENGINE=MyISAM to ENGINE=InnoDB everywhere it appears as a
|
||||||
|
* table-options token. Returns [string $newSql, array $counts].
|
||||||
|
*
|
||||||
|
* The regex is intentionally narrow:
|
||||||
|
* - case-insensitive (cpmove dumps vary)
|
||||||
|
* - anchored on word boundaries so we don't rewrite, say,
|
||||||
|
* a TEXT field that contains the literal string "ENGINE=MyISAM"
|
||||||
|
* (extremely unlikely but possible)
|
||||||
|
*/
|
||||||
|
function engine_swap(string $sql): array {
|
||||||
|
$count = 0;
|
||||||
|
$rewritten = preg_replace_callback(
|
||||||
|
'/\bENGINE\s*=\s*MyISAM\b/i',
|
||||||
|
function () use (&$count) { $count++; return 'ENGINE=InnoDB'; },
|
||||||
|
$sql
|
||||||
|
);
|
||||||
|
return [$rewritten ?? $sql, ['myisam_to_innodb' => $count]];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Identify WordPress by the canonical core-table CREATE statements.
|
||||||
|
*
|
||||||
|
* cPanel exports respect the customer's prefix, so we accept any
|
||||||
|
* prefix as long as the three core tables exist in this dump.
|
||||||
|
*/
|
||||||
|
function is_wordpress_dump(string $sql): bool {
|
||||||
|
$hasOptions = (bool) preg_match('/CREATE TABLE [`"]?\w*options[`"]?\s*\(/i', $sql);
|
||||||
|
$hasPosts = (bool) preg_match('/CREATE TABLE [`"]?\w*posts[`"]?\s*\(/i', $sql);
|
||||||
|
$hasUsers = (bool) preg_match('/CREATE TABLE [`"]?\w*users[`"]?\s*\(/i', $sql);
|
||||||
|
// Bonus signal: the dump also references the standard wp_options
|
||||||
|
// option_names. Cheap to check, drops a few false positives where
|
||||||
|
// an app shares table names with WP.
|
||||||
|
$optionsSentinel = (bool) preg_match("/'siteurl'|'home'|'template'|'stylesheet'/", $sql);
|
||||||
|
return $hasOptions && $hasPosts && $hasUsers && $optionsSentinel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the WP content scan. v1.0 ships ONE check:
|
||||||
|
*
|
||||||
|
* siteurl_external_domain — wp_options.siteurl or .home points at a
|
||||||
|
* host not in the allow list (cpanel main + addons).
|
||||||
|
*
|
||||||
|
* Returns an array of flag dicts; an empty array means "clean."
|
||||||
|
*
|
||||||
|
* v1.1 add: post_content script-injection signature, theme/stylesheet
|
||||||
|
* known-malware patterns, user_pass leaked-hash check, Wordfence regex.
|
||||||
|
*/
|
||||||
|
function wp_content_scan(string $sql, array $allowedDomains): array {
|
||||||
|
$flags = [];
|
||||||
|
|
||||||
|
// Pull every (option_name, option_value) row from any INSERT INTO
|
||||||
|
// <prefix>options. We use a forgiving regex because cPanel dumps
|
||||||
|
// use both single-row INSERTs and chunked multi-row INSERTs.
|
||||||
|
$optionValues = extract_wp_options($sql);
|
||||||
|
|
||||||
|
foreach (['siteurl', 'home'] as $optName) {
|
||||||
|
if (!isset($optionValues[$optName])) continue;
|
||||||
|
$val = $optionValues[$optName];
|
||||||
|
$host = parse_url($val, PHP_URL_HOST);
|
||||||
|
if ($host === null || $host === false || $host === '') continue;
|
||||||
|
|
||||||
|
// localhost / IP literals are not external domains; let the
|
||||||
|
// panel handle them on the rewrite-wp-config pass.
|
||||||
|
if ($host === 'localhost' || filter_var($host, FILTER_VALIDATE_IP)) continue;
|
||||||
|
|
||||||
|
if (!domain_in_allowlist($host, $allowedDomains)) {
|
||||||
|
$flags[] = [
|
||||||
|
'severity' => 'high',
|
||||||
|
'code' => 'siteurl_external_domain',
|
||||||
|
'details' => sprintf(
|
||||||
|
"wp_options.%s = %s — host '%s' not in allowed domain list (%s)",
|
||||||
|
$optName,
|
||||||
|
json_encode($val),
|
||||||
|
$host,
|
||||||
|
empty($allowedDomains) ? 'NONE; could not discover from cpmove userdata' : implode(', ', $allowedDomains)
|
||||||
|
),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pull a map of option_name => option_value from any INSERT into the
|
||||||
|
* options table. Returns ['siteurl' => '...', 'home' => '...', ...].
|
||||||
|
*
|
||||||
|
* Best-effort — multi-row INSERTs with weird quoting can defeat the
|
||||||
|
* regex, in which case we report no values and the scan returns clean.
|
||||||
|
* That's acceptable because the panel will still rewrite siteurl on its
|
||||||
|
* own pass and any malicious siteurl that survives WILL show up in the
|
||||||
|
* customer-facing rendered URL — admin can spot it post-import.
|
||||||
|
*/
|
||||||
|
function extract_wp_options(string $sql): array {
|
||||||
|
$map = [];
|
||||||
|
|
||||||
|
// Match INSERT INTO `..options` [(col, col, ...)] VALUES (rows...);
|
||||||
|
// The optional column list contains the literal "value" (lowercase
|
||||||
|
// via `option_value`) and uppercase V too, so we can't use [^V]
|
||||||
|
// as a delimiter — instead match a balanced parens column list
|
||||||
|
// followed by VALUES.
|
||||||
|
if (!preg_match_all(
|
||||||
|
'/INSERT\s+INTO\s+[`"]?\w*options[`"]?\s*(?:\([^)]*\)\s*)?VALUES\s*(.+?);\s*$/ims',
|
||||||
|
$sql,
|
||||||
|
$stmts
|
||||||
|
)) {
|
||||||
|
return $map;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($stmts[1] as $body) {
|
||||||
|
// Split on `),(` between rows; first row has the leading `(`,
|
||||||
|
// last row has the trailing `)` — handled by trim below.
|
||||||
|
$body = trim($body);
|
||||||
|
$body = preg_replace('/^\(/', '', $body);
|
||||||
|
$body = preg_replace('/\)$/', '', $body);
|
||||||
|
$rows = preg_split('/\)\s*,\s*\(/', $body);
|
||||||
|
foreach ($rows as $row) {
|
||||||
|
$cells = parse_sql_row($row);
|
||||||
|
// wp_options columns: option_id, option_name, option_value, autoload
|
||||||
|
if (count($cells) >= 3) {
|
||||||
|
$name = $cells[1];
|
||||||
|
$value = $cells[2];
|
||||||
|
if (is_string($name) && is_string($value) && $name !== '') {
|
||||||
|
$map[$name] = $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $map;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse one row of a MySQL INSERT VALUES tuple — comma-separated,
|
||||||
|
* strings single-quoted with backslash escapes.
|
||||||
|
*
|
||||||
|
* Not bulletproof (no comment handling, no DOUBLE-QUOTE strings) but
|
||||||
|
* good enough for cpmove dumps, which mysqldump produces in a
|
||||||
|
* predictable format.
|
||||||
|
*/
|
||||||
|
function parse_sql_row(string $row): array {
|
||||||
|
$cells = [];
|
||||||
|
$i = 0;
|
||||||
|
$n = strlen($row);
|
||||||
|
while ($i < $n) {
|
||||||
|
// Skip leading whitespace + commas.
|
||||||
|
while ($i < $n && (ctype_space($row[$i]) || $row[$i] === ',')) $i++;
|
||||||
|
if ($i >= $n) break;
|
||||||
|
$c = $row[$i];
|
||||||
|
if ($c === "'") {
|
||||||
|
// Quoted string.
|
||||||
|
$i++;
|
||||||
|
$buf = '';
|
||||||
|
while ($i < $n) {
|
||||||
|
$cc = $row[$i];
|
||||||
|
if ($cc === '\\' && $i + 1 < $n) {
|
||||||
|
$next = $row[$i + 1];
|
||||||
|
$buf .= match ($next) {
|
||||||
|
'n' => "\n",
|
||||||
|
't' => "\t",
|
||||||
|
'r' => "\r",
|
||||||
|
'0' => "\0",
|
||||||
|
default => $next,
|
||||||
|
};
|
||||||
|
$i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($cc === "'") {
|
||||||
|
// MySQL `''` -> literal '
|
||||||
|
if ($i + 1 < $n && $row[$i + 1] === "'") {
|
||||||
|
$buf .= "'";
|
||||||
|
$i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$i++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$buf .= $cc;
|
||||||
|
$i++;
|
||||||
|
}
|
||||||
|
$cells[] = $buf;
|
||||||
|
} else {
|
||||||
|
// Bareword / number / NULL — read until next comma.
|
||||||
|
$start = $i;
|
||||||
|
while ($i < $n && $row[$i] !== ',') $i++;
|
||||||
|
$tok = trim(substr($row, $start, $i - $start));
|
||||||
|
$cells[] = (strcasecmp($tok, 'NULL') === 0) ? null : $tok;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $cells;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Discover the user's allowed-domain set by reading the cpmove
|
||||||
|
* userdata. cPanel writes:
|
||||||
|
* cpmove-<user>/userdata/<domain> — per-domain config
|
||||||
|
* cpmove-<user>/userdata/main — the main domain
|
||||||
|
* cpmove-<user>/addons — addon-domain list
|
||||||
|
* cpmove-<user>/sds — subdomain list
|
||||||
|
*
|
||||||
|
* Best-effort. If we can't find any, the siteurl check still runs but
|
||||||
|
* will flag everything as external — surface up to admin.
|
||||||
|
*/
|
||||||
|
function collect_allowed_domains(string $extractDir, string $username): array {
|
||||||
|
$domains = [];
|
||||||
|
|
||||||
|
foreach (glob($extractDir . '/cpmove-*/userdata') as $userdataDir) {
|
||||||
|
if (!is_dir($userdataDir)) continue;
|
||||||
|
foreach (scandir($userdataDir) ?: [] as $entry) {
|
||||||
|
if ($entry === '.' || $entry === '..' || $entry === 'main') continue;
|
||||||
|
// userdata/<domain> is a file or dir keyed by the domain.
|
||||||
|
if (preg_match('/^[a-z0-9._-]+\.[a-z]{2,}$/i', $entry)) {
|
||||||
|
$domains[] = strtolower($entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// userdata/main is a YAML-ish file with main_domain: <d>
|
||||||
|
$mainFile = $userdataDir . '/main';
|
||||||
|
if (is_file($mainFile)) {
|
||||||
|
$content = file_get_contents($mainFile);
|
||||||
|
if ($content !== false && preg_match('/^main_domain:\s*(\S+)/m', $content, $m)) {
|
||||||
|
$domains[] = strtolower($m[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (glob($extractDir . '/cpmove-*/addons') ?: [] as $addonsFile) {
|
||||||
|
if (!is_file($addonsFile)) continue;
|
||||||
|
$content = file_get_contents($addonsFile);
|
||||||
|
if ($content === false) continue;
|
||||||
|
// cPanel writes "addon.tld=parent.tld" lines.
|
||||||
|
foreach (preg_split('/\R/', $content) as $line) {
|
||||||
|
if (preg_match('/^([a-z0-9._-]+\.[a-z]{2,})/i', $line, $m)) {
|
||||||
|
$domains[] = strtolower($m[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique($domains));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if $host is in the allow-list, including subdomain matches.
|
||||||
|
*
|
||||||
|
* e.g. allowed=['example.com'], host='www.example.com' -> true.
|
||||||
|
* allowed=['example.com'], host='malicious.tld' -> false.
|
||||||
|
* allowed=[], host='*' -> false (refuse-all).
|
||||||
|
*/
|
||||||
|
function domain_in_allowlist(string $host, array $allowed): bool {
|
||||||
|
if (empty($allowed)) return false;
|
||||||
|
$host = strtolower($host);
|
||||||
|
foreach ($allowed as $d) {
|
||||||
|
$d = strtolower($d);
|
||||||
|
if ($host === $d) return true;
|
||||||
|
if (str_ends_with($host, '.' . $d)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
216
scripts/scan-files.php
Executable file
216
scripts/scan-files.php
Executable file
@@ -0,0 +1,216 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* scan-files.php — ClamAV scan + classify-and-action orchestrator.
|
||||||
|
*
|
||||||
|
* v1.0: quarantine-on-every-hit. No auto-cleaners enabled. The cleaner
|
||||||
|
* registry (KNOWN_REMOVABLE / REMOVABLE_WITH_BACKUP) is stubbed below
|
||||||
|
* for v1.1 expansion; see CONTRIBUTING.md for how to wire one in.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* scan-files.php --extract <dir> --quarantine <dir> --report <out.json> --import-id <id>
|
||||||
|
*
|
||||||
|
* Exit codes:
|
||||||
|
* 0 — scan completed (regardless of how many hits)
|
||||||
|
* 1 — fatal scanner error (clamscan binary missing, signature DB unreadable)
|
||||||
|
* 2 — usage error
|
||||||
|
*
|
||||||
|
* Report shape: matches spec §3, e.g.:
|
||||||
|
* {
|
||||||
|
* "files_scanned": N,
|
||||||
|
* "files_clean": N,
|
||||||
|
* "files_cleaned": 0, // always 0 in v1.0 — no cleaners yet
|
||||||
|
* "files_quarantined": N,
|
||||||
|
* "actions": [ { path, signature, action, cleaner, backup } ]
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
|
||||||
|
require __DIR__ . '/lib/safety-net.php';
|
||||||
|
|
||||||
|
const SCANNER_VERSION = '1.0.0';
|
||||||
|
|
||||||
|
$opts = getopt('', ['extract:', 'quarantine:', 'report:', 'import-id:']);
|
||||||
|
foreach (['extract', 'quarantine', 'report', 'import-id'] as $k) {
|
||||||
|
if (!isset($opts[$k])) {
|
||||||
|
fwrite(STDERR, "usage: scan-files.php --extract <dir> --quarantine <dir> --report <out.json> --import-id <id>\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$extractDir = rtrim($opts['extract'], '/');
|
||||||
|
$quarantineDir = rtrim($opts['quarantine'], '/');
|
||||||
|
$reportPath = $opts['report'];
|
||||||
|
$importId = $opts['import-id'];
|
||||||
|
|
||||||
|
if (!is_dir($extractDir)) {
|
||||||
|
fwrite(STDERR, "scan-files: extract dir does not exist: $extractDir\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@mkdir($quarantineDir, 0750, true);
|
||||||
|
|
||||||
|
fwrite(STDERR, "scan-files: starting (extract=$extractDir, quarantine=$quarantineDir)\n");
|
||||||
|
|
||||||
|
// -- v1.0 cleaner registry (intentionally empty) ----------------------------
|
||||||
|
//
|
||||||
|
// Each entry maps a ClamAV signature substring -> classification +
|
||||||
|
// cleaner callable. v1.0 ships empty so EVERY hit is classified as
|
||||||
|
// QUARANTINE_ONLY. See CONTRIBUTING.md "Adding an auto-cleaner pattern"
|
||||||
|
// for how to add a tested entry.
|
||||||
|
//
|
||||||
|
// Shape (v1.1+):
|
||||||
|
// $cleaners = [
|
||||||
|
// 'php-eval-base64-prefix' => [
|
||||||
|
// 'class' => 'KNOWN_REMOVABLE',
|
||||||
|
// 'match' => fn(string $sig): bool => str_contains($sig, 'PHP.Trojan.EvalB64'),
|
||||||
|
// 'clean' => fn(string $path): bool => /* rewrite file in place; return ok */,
|
||||||
|
// ],
|
||||||
|
// ];
|
||||||
|
$cleaners = [];
|
||||||
|
|
||||||
|
// -- run clamscan recursively over the extract dir --------------------------
|
||||||
|
|
||||||
|
// We use --infected so the output is only hits, and --recursive so we
|
||||||
|
// walk subdirectories. We deliberately do NOT use --remove (we never want
|
||||||
|
// clamscan unlinking files — we control quarantine).
|
||||||
|
//
|
||||||
|
// Output format per line on a hit:
|
||||||
|
// /tmp/extract/foo/bar.php: Some.Signature.Name FOUND
|
||||||
|
$cmd = sprintf(
|
||||||
|
'clamscan --infected --recursive --no-summary --stdout %s 2>/dev/null',
|
||||||
|
escapeshellarg($extractDir)
|
||||||
|
);
|
||||||
|
|
||||||
|
$fh = popen($cmd, 'r');
|
||||||
|
if (!$fh) {
|
||||||
|
fwrite(STDERR, "scan-files: failed to spawn clamscan\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$hits = [];
|
||||||
|
while (($line = fgets($fh)) !== false) {
|
||||||
|
$line = rtrim($line, "\r\n");
|
||||||
|
if ($line === '' || !str_ends_with($line, ' FOUND')) continue;
|
||||||
|
// Strip trailing ' FOUND'.
|
||||||
|
$body = substr($line, 0, -6);
|
||||||
|
$colon = strrpos($body, ': ');
|
||||||
|
if ($colon === false) continue;
|
||||||
|
$path = substr($body, 0, $colon);
|
||||||
|
$sig = substr($body, $colon + 2);
|
||||||
|
if (!str_starts_with($path, $extractDir)) {
|
||||||
|
// Defensive: shouldn't happen with our invocation.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$hits[] = ['path' => $path, 'signature' => $sig];
|
||||||
|
}
|
||||||
|
pclose($fh);
|
||||||
|
|
||||||
|
// File count — we need files_scanned for the report. clamscan's summary
|
||||||
|
// counting is suppressed; do a fast file count ourselves.
|
||||||
|
$filesScanned = 0;
|
||||||
|
$rdi = new RecursiveDirectoryIterator($extractDir, FilesystemIterator::SKIP_DOTS);
|
||||||
|
$it = new RecursiveIteratorIterator($rdi);
|
||||||
|
foreach ($it as $entry) {
|
||||||
|
/** @var SplFileInfo $entry */
|
||||||
|
if ($entry->isFile()) $filesScanned++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- classify + action each hit --------------------------------------------
|
||||||
|
|
||||||
|
$actions = [];
|
||||||
|
$cleaned = 0;
|
||||||
|
$quarantined = 0;
|
||||||
|
|
||||||
|
foreach ($hits as $h) {
|
||||||
|
$path = $h['path'];
|
||||||
|
$sig = $h['signature'];
|
||||||
|
|
||||||
|
// v1.0 — every hit is QUARANTINE_ONLY because the cleaner registry
|
||||||
|
// is empty. Future work in v1.1 will iterate $cleaners and pick a
|
||||||
|
// matching cleaner.
|
||||||
|
$classification = 'QUARANTINE_ONLY';
|
||||||
|
foreach ($cleaners as $name => $entry) {
|
||||||
|
if (($entry['match'])($sig)) {
|
||||||
|
$classification = $entry['class'];
|
||||||
|
$cleanerName = $name;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$relPath = ltrim(substr($path, strlen($extractDir)), '/');
|
||||||
|
$qPath = $quarantineDir . '/' . $relPath;
|
||||||
|
|
||||||
|
if ($classification === 'QUARANTINE_ONLY') {
|
||||||
|
// Move the whole file to quarantine; remove from extract dir so
|
||||||
|
// the rsync to /host/sanitized/ does not include it.
|
||||||
|
@mkdir(dirname($qPath), 0750, true);
|
||||||
|
if (!@rename($path, $qPath)) {
|
||||||
|
// Fall back to copy + unlink (rename across mount boundaries
|
||||||
|
// sometimes EXDEVs even though /tmp and /host are both ours).
|
||||||
|
if (@copy($path, $qPath)) {
|
||||||
|
@unlink($path);
|
||||||
|
} else {
|
||||||
|
fwrite(STDERR, "scan-files: WARN failed to quarantine $path -> $qPath\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$quarantined++;
|
||||||
|
$actions[] = [
|
||||||
|
'path' => $relPath,
|
||||||
|
'signature' => $sig,
|
||||||
|
'action' => 'quarantined',
|
||||||
|
'cleaner' => null,
|
||||||
|
'backup' => $qPath,
|
||||||
|
];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// v1.1+ paths:
|
||||||
|
if ($classification === 'KNOWN_REMOVABLE' || $classification === 'REMOVABLE_WITH_BACKUP') {
|
||||||
|
// Backup first, then run the cleaner.
|
||||||
|
@mkdir(dirname($qPath), 0750, true);
|
||||||
|
$backup = $qPath . '.original';
|
||||||
|
if (!@copy($path, $backup)) {
|
||||||
|
fwrite(STDERR, "scan-files: backup before clean failed: $path; quarantining instead\n");
|
||||||
|
@rename($path, $qPath);
|
||||||
|
$quarantined++;
|
||||||
|
$actions[] = [
|
||||||
|
'path' => $relPath, 'signature' => $sig,
|
||||||
|
'action' => 'quarantined', 'cleaner' => null, 'backup' => $qPath,
|
||||||
|
];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$cleanerOk = ($cleaners[$cleanerName]['clean'])($path);
|
||||||
|
if (!$cleanerOk) {
|
||||||
|
// Cleaner refused; fall back to quarantine.
|
||||||
|
@rename($path, $qPath);
|
||||||
|
$quarantined++;
|
||||||
|
$actions[] = [
|
||||||
|
'path' => $relPath, 'signature' => $sig,
|
||||||
|
'action' => 'quarantined', 'cleaner' => $cleanerName, 'backup' => $qPath,
|
||||||
|
];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$cleaned++;
|
||||||
|
$actions[] = [
|
||||||
|
'path' => $relPath, 'signature' => $sig,
|
||||||
|
'action' => 'cleaned', 'cleaner' => $cleanerName, 'backup' => $backup,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$report = [
|
||||||
|
'scanner_version' => SCANNER_VERSION,
|
||||||
|
'import_id' => $importId,
|
||||||
|
'files_scanned' => $filesScanned,
|
||||||
|
'files_clean' => max(0, $filesScanned - count($hits)),
|
||||||
|
'files_cleaned' => $cleaned,
|
||||||
|
'files_quarantined' => $quarantined,
|
||||||
|
'actions' => $actions,
|
||||||
|
];
|
||||||
|
|
||||||
|
@file_put_contents($reportPath, json_encode($report, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n");
|
||||||
|
|
||||||
|
fwrite(STDERR, sprintf(
|
||||||
|
"scan-files: done — scanned=%d clean=%d cleaned=%d quarantined=%d\n",
|
||||||
|
$filesScanned, $report['files_clean'], $cleaned, $quarantined
|
||||||
|
));
|
||||||
|
exit(0);
|
||||||
113
tests/build-fixtures.sh
Executable file
113
tests/build-fixtures.sh
Executable file
@@ -0,0 +1,113 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# build-fixtures.sh — generate synthetic cpmove tarballs for testing.
|
||||||
|
#
|
||||||
|
# Two fixtures are built:
|
||||||
|
#
|
||||||
|
# cpmove-clean.tar.gz — a minimal cpmove with a benign homedir, one
|
||||||
|
# wp-style SQL dump with ENGINE=MyISAM tables
|
||||||
|
# and a clean siteurl, and a user-internal
|
||||||
|
# relative symlink (must not trigger).
|
||||||
|
#
|
||||||
|
# cpmove-alfa.tar.gz — same shape PLUS an ALFA-style symlink:
|
||||||
|
# `cpmove-testuser/homedir/.../alfasymlink -> /etc`
|
||||||
|
# — the pre-extract scan MUST refuse this.
|
||||||
|
#
|
||||||
|
# Run: bash tests/build-fixtures.sh
|
||||||
|
# Output: tests/fixtures/cpmove-clean.tar.gz, tests/fixtures/cpmove-alfa.tar.gz
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
FIXTURES_DIR="$(cd "$(dirname "$0")" && pwd)/fixtures"
|
||||||
|
mkdir -p "$FIXTURES_DIR"
|
||||||
|
|
||||||
|
USER=testuser
|
||||||
|
DOMAIN=example.com
|
||||||
|
|
||||||
|
build_common_tree() {
|
||||||
|
local root="$1"
|
||||||
|
mkdir -p "$root/cpmove-$USER"/{homedir/public_html,mysql,userdata,addons,sds,ssl}
|
||||||
|
|
||||||
|
# main userdata
|
||||||
|
cat > "$root/cpmove-$USER/userdata/main" <<EOF
|
||||||
|
main_domain: $DOMAIN
|
||||||
|
user: $USER
|
||||||
|
EOF
|
||||||
|
# per-domain userdata file
|
||||||
|
cat > "$root/cpmove-$USER/userdata/$DOMAIN" <<EOF
|
||||||
|
servername: $DOMAIN
|
||||||
|
documentroot: /home/$USER/public_html
|
||||||
|
user: $USER
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# benign content
|
||||||
|
echo "<?php phpinfo();" > "$root/cpmove-$USER/homedir/public_html/index.php"
|
||||||
|
echo "Hello world." > "$root/cpmove-$USER/homedir/public_html/about.txt"
|
||||||
|
|
||||||
|
# benign user-internal relative symlink — must NOT trigger the scan
|
||||||
|
ln -sf "../public_html/about.txt" "$root/cpmove-$USER/homedir/about-shortcut"
|
||||||
|
|
||||||
|
# one synthetic WordPress mysql dump with ENGINE=MyISAM + a clean siteurl
|
||||||
|
cat > "$root/cpmove-$USER/mysql/${USER}_wp.sql" <<EOF
|
||||||
|
-- Synthetic WP dump for cpanel-importer fixtures.
|
||||||
|
CREATE TABLE \`wp_options\` (
|
||||||
|
option_id bigint(20) NOT NULL,
|
||||||
|
option_name varchar(191) NOT NULL,
|
||||||
|
option_value longtext NOT NULL,
|
||||||
|
autoload varchar(20) NOT NULL DEFAULT 'yes'
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
|
||||||
|
|
||||||
|
CREATE TABLE \`wp_posts\` (
|
||||||
|
ID bigint(20) NOT NULL,
|
||||||
|
post_content longtext NOT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
|
||||||
|
|
||||||
|
CREATE TABLE \`wp_users\` (
|
||||||
|
ID bigint(20) NOT NULL,
|
||||||
|
user_login varchar(60) NOT NULL,
|
||||||
|
user_pass varchar(255) NOT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
|
||||||
|
|
||||||
|
INSERT INTO \`wp_options\` (option_id, option_name, option_value, autoload) VALUES
|
||||||
|
(1, 'siteurl', 'https://$DOMAIN', 'yes'),
|
||||||
|
(2, 'home', 'https://$DOMAIN', 'yes'),
|
||||||
|
(3, 'blogname', 'Hello', 'yes'),
|
||||||
|
(4, 'template', 'twentytwentyfour', 'yes'),
|
||||||
|
(5, 'stylesheet', 'twentytwentyfour', 'yes');
|
||||||
|
|
||||||
|
INSERT INTO \`wp_users\` VALUES (1, 'admin', 'doesnotmatter');
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- cpmove-clean.tar.gz --------------------------------------------------
|
||||||
|
|
||||||
|
CLEAN_TMP="$(mktemp -d)"
|
||||||
|
trap 'rm -rf "$CLEAN_TMP" "$ALFA_TMP" 2>/dev/null || true' EXIT
|
||||||
|
build_common_tree "$CLEAN_TMP"
|
||||||
|
|
||||||
|
tar -C "$CLEAN_TMP" -czf "$FIXTURES_DIR/cpmove-clean.tar.gz" "cpmove-$USER"
|
||||||
|
echo "wrote $FIXTURES_DIR/cpmove-clean.tar.gz ($(stat -c%s "$FIXTURES_DIR/cpmove-clean.tar.gz") bytes)"
|
||||||
|
|
||||||
|
# ---- cpmove-alfa.tar.gz ---------------------------------------------------
|
||||||
|
#
|
||||||
|
# Build the SAME tree, then add an ALFA-shell-style symlink pointing at
|
||||||
|
# /etc. This is the exact vector that wiped whp02 — the importer's
|
||||||
|
# recursive walker followed the symlink and unlink()'d every file in
|
||||||
|
# /etc. Our pre-extract scan MUST refuse to extract this tarball.
|
||||||
|
|
||||||
|
ALFA_TMP="$(mktemp -d)"
|
||||||
|
build_common_tree "$ALFA_TMP"
|
||||||
|
|
||||||
|
mkdir -p "$ALFA_TMP/cpmove-$USER/homedir/public_html/$DOMAIN/ALFA_DATA"
|
||||||
|
echo "<?php /* ALFA shell stub */ ?>" \
|
||||||
|
> "$ALFA_TMP/cpmove-$USER/homedir/public_html/$DOMAIN/ALFA_DATA/index.php"
|
||||||
|
|
||||||
|
# THE attack: absolute-target symlink to /etc.
|
||||||
|
ln -sf "/etc" "$ALFA_TMP/cpmove-$USER/homedir/public_html/$DOMAIN/ALFA_DATA/root"
|
||||||
|
|
||||||
|
tar -C "$ALFA_TMP" -czf "$FIXTURES_DIR/cpmove-alfa.tar.gz" "cpmove-$USER"
|
||||||
|
echo "wrote $FIXTURES_DIR/cpmove-alfa.tar.gz ($(stat -c%s "$FIXTURES_DIR/cpmove-alfa.tar.gz") bytes)"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "fixtures built:"
|
||||||
|
ls -la "$FIXTURES_DIR"
|
||||||
0
tests/fixtures/.gitkeep
vendored
Normal file
0
tests/fixtures/.gitkeep
vendored
Normal file
Reference in New Issue
Block a user