From e4c506bcd92debf0e798c989cc5966942a16d550 Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Tue, 12 May 2026 16:28:44 -0700 Subject: [PATCH] PR 1/3: add coraza-spoa sidecar image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-contained sidecar that runs Coraza-SPOA v0.7.1 (latest upstream as of 2026-05-08, with OWASP CRS bundled in the binary). HAProxy will consult it per-request via SPOE in PR 2; for now this PR ships the image only. Defines: - coraza-spoa/Dockerfile — multi-stage build (golang:1.25 -> distroless), pinned to v0.7.1, ARG-overridable - coraza-spoa/config.yaml — single application "haproxy", JSON audit log to /var/log/coraza/audit.log, SecRuleEngine DetectionOnly globally - coraza-spoa/overrides.conf — day-one enforce list: scanner UAs (913xxx), RCE shell injection (932100-932160), webshell paths (933170-933200), targeted LFI (930120), Log4Shell/JNDI (944100-944300). Rationale per-range documented inline. Detect-only for XSS/SQLi/protocol (high FP on WP/WooCommerce/Divi customer mix). - coraza-spoa/README.md — deployment shape, audit log location, pin upgrade procedure, false-positive tuning. - .gitea/workflows/build-push-coraza.yaml — Gitea Action triggered on coraza-spoa/** changes, publishes repo.anhonesthost.net/cloud-hosting-platform/ coraza-spoa:latest. Path-scoped so it doesn't fire on every haproxy-manager push. No changes to haproxy-manager-base itself in this PR — the existing image stays bit-identical, used standalone in home networks and other projects without dependency on this sidecar. PR 2 will add the OPT-IN template plumbing that lets haproxy-manager call out to this agent when an env var is set. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitea/workflows/build-push-coraza.yaml | 44 ++++++++++++++ coraza-spoa/Dockerfile | 53 +++++++++++++++++ coraza-spoa/README.md | 78 +++++++++++++++++++++++++ coraza-spoa/config.yaml | 61 +++++++++++++++++++ coraza-spoa/overrides.conf | 68 +++++++++++++++++++++ 5 files changed, 304 insertions(+) create mode 100644 .gitea/workflows/build-push-coraza.yaml create mode 100644 coraza-spoa/Dockerfile create mode 100644 coraza-spoa/README.md create mode 100644 coraza-spoa/config.yaml create mode 100644 coraza-spoa/overrides.conf diff --git a/.gitea/workflows/build-push-coraza.yaml b/.gitea/workflows/build-push-coraza.yaml new file mode 100644 index 0000000..83ff63c --- /dev/null +++ b/.gitea/workflows/build-push-coraza.yaml @@ -0,0 +1,44 @@ +name: Build and push coraza-spoa +run-name: ${{ gitea.actor }} pushed a change to coraza-spoa/ + +# Triggers only on changes to the coraza-spoa subdirectory or this workflow +# file itself — keeps the main haproxy-manager-base build and the coraza-spoa +# build independent. workflow_dispatch lets us trigger manually after bumping +# the upstream coraza-spoa version pin. +on: + push: + branches: + - main + paths: + - 'coraza-spoa/**' + - '.gitea/workflows/build-push-coraza.yaml' + workflow_dispatch: + +jobs: + Build-and-Push: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: https://github.com/docker/setup-buildx-action@v3 + + - name: Login to Gitea + uses: docker/login-action@v3 + with: + registry: repo.anhonesthost.net + username: ${{ secrets.CI_USER }} + password: ${{ secrets.CI_TOKEN }} + + - name: Build Image + uses: docker/build-push-action@v6 + with: + context: ./coraza-spoa + platforms: linux/amd64 + push: true + tags: | + repo.anhonesthost.net/cloud-hosting-platform/coraza-spoa:latest diff --git a/coraza-spoa/Dockerfile b/coraza-spoa/Dockerfile new file mode 100644 index 0000000..b6830f3 --- /dev/null +++ b/coraza-spoa/Dockerfile @@ -0,0 +1,53 @@ +# Coraza-SPOA sidecar for haproxy-manager. +# +# Layout: built from upstream source. main.go is at the repo root; CRS rules +# are bundled into the binary at build time (referenced as @owasp_crs/), so +# the CRS version is whatever ships with the pinned coraza-spoa tag. +# +# Pin: review the upstream CHANGELOG (https://github.com/corazawaf/coraza-spoa/releases) +# before bumping. New tags can ship newer CRS, which can introduce new rules +# whose IDs fall into the "enforce day-one" ranges in overrides.conf — verify +# those are still high-confidence before promoting a new tag to prod. + +ARG CORAZA_SPOA_VERSION=v0.7.1 + +# golang:1.25 from docker.io. Mirror to repo.anhonesthost.net if Cloudflare +# reliability becomes a recurring concern (the 2026-05-12 incident drove +# the same mirror for python:3.12-slim in the parent Dockerfile). +FROM golang:1.25 AS build +ARG CORAZA_SPOA_VERSION +WORKDIR /src +RUN apt-get update \ + && apt-get install -y --no-install-recommends git \ + && rm -rf /var/lib/apt/lists/* +RUN git clone --depth 1 --branch "${CORAZA_SPOA_VERSION}" \ + https://github.com/corazawaf/coraza-spoa.git . \ + && go mod download \ + && CGO_ENABLED=0 go build -trimpath -ldflags='-s -w' -o /out/coraza-spoa . + +# Distroless runtime: no shell, no package manager, no /tmp by default — +# smallest attack surface for an exposed service. Audit log directory is +# bind-mounted; coraza-spoa writes to it via direct file I/O (no shell needed). +FROM gcr.io/distroless/static-debian12:nonroot + +LABEL org.opencontainers.image.title="coraza-spoa-whp" \ + org.opencontainers.image.description="Coraza WAF SPOA agent configured for WHP haproxy-manager integration" \ + org.opencontainers.image.source="https://repo.anhonesthost.net/cloud-hosting-platform/haproxy-manager-base" + +COPY --from=build /out/coraza-spoa /coraza-spoa +COPY config.yaml /etc/coraza-spoa/config.yaml +COPY overrides.conf /etc/coraza/overrides.conf + +# Audit log directory — bind-mount /var/log/coraza:/var/log/coraza from host +# so logs persist across container restarts and AI Monitor can tail them. +# Distroless nonroot user has UID 65532; the host directory must be writable +# by that UID (install script will chown it appropriately). +VOLUME ["/var/log/coraza"] + +# SPOE TCP port — bound on 0.0.0.0:9000 inside the container. The host-side +# port mapping is controlled by `docker run -p` (typically not exposed beyond +# the internal docker network, since haproxy-manager reaches it by container +# name on client-net). +EXPOSE 9000 + +ENTRYPOINT ["/coraza-spoa", "--config", "/etc/coraza-spoa/config.yaml"] diff --git a/coraza-spoa/README.md b/coraza-spoa/README.md new file mode 100644 index 0000000..9be13ea --- /dev/null +++ b/coraza-spoa/README.md @@ -0,0 +1,78 @@ +# coraza-spoa sidecar + +A sidecar container that runs [Coraza-SPOA](https://github.com/corazawaf/coraza-spoa) as a WAF engine for `haproxy-manager`. HAProxy consults it per-request via the SPOE/SPOP protocol; Coraza evaluates the request against OWASP CRS rules and tells HAProxy whether to allow or block. + +## Design constraints + +- **`haproxy-manager` does NOT depend on this sidecar.** The base image works standalone (used in other projects and home networks) without WAF. SPOE config in the generated `haproxy.cfg` is opt-in via an env var on `haproxy-manager`. +- **Fail-open when the sidecar is unhealthy.** `option set-on-error continue` in the HAProxy SPOE config means request flow continues uninspected if coraza-spoa is unreachable, rather than 503-ing customer traffic. +- **Detect-only globally; enforce explicitly.** See `overrides.conf` for the day-one enforce list. Most CRS rules log without blocking until we've tuned per-customer false positives. + +## Deployment shape + +Two containers per host, both on the `client-net` docker network: + +``` +haproxy-manager (existing) — ports 80, 443, 8000 + │ SPOE TCP/9000 → reach coraza-spoa by container DNS + ▼ +coraza-spoa (this image) + port 9000 (SPOE) — NOT exposed on host; internal network only + /var/log/coraza — bind-mounted to host for AI Monitor consumption +``` + +Typical `docker run`: + +```bash +mkdir -p /var/log/coraza +chown 65532:65532 /var/log/coraza # distroless nonroot UID + +docker run -d \ + --name coraza-spoa \ + --network client-net \ + --restart unless-stopped \ + -v /var/log/coraza:/var/log/coraza \ + repo.anhonesthost.net/cloud-hosting-platform/coraza-spoa:latest +``` + +Then on the `haproxy-manager` container, add the env var: + +``` +-e HAPROXY_CORAZA_SPOE_BACKEND=coraza-spoa:9000 +``` + +The haproxy-manager template engine sees the env var and renders the SPOE config block pointing at this sidecar. Without the env var, no SPOE blocks render — the haproxy-manager image's behavior is unchanged. + +## Files + +| File | Purpose | +|---|---| +| `Dockerfile` | Multi-stage build (golang:1.25 → distroless), pinned to upstream coraza-spoa tag | +| `config.yaml` | SPOA listener config + one named application `haproxy` | +| `overrides.conf` | Day-one enforce list (`ctl:ruleEngine=On` for high-confidence rule IDs) | +| `README.md` | This file | + +## Audit log + +`/var/log/coraza/audit.log` — JSON, one event per line, RelevantOnly (only requests that triggered ≥1 rule are logged). AI Monitor should be configured to tail this on each host. + +Entries include rule IDs, matched patterns, request metadata, and action taken (`log` for detect-only, `deny` for enforced). Use the JSON `action` field to filter blocked vs. observed. + +## Upgrading the pin + +CRS rules are bundled into the coraza-spoa binary at build time, so the CRS version is whatever ships with the pinned coraza-spoa tag. To upgrade: + +1. Check upstream releases: +2. Skim the CHANGELOG for new/changed rules in the `overrides.conf` ID ranges. +3. Bump `ARG CORAZA_SPOA_VERSION` in the Dockerfile. +4. Push to `main` — the Gitea workflow at `.gitea/workflows/build-push-coraza.yaml` rebuilds + pushes `:latest`. +5. On each host, run `container-manager.sh recreate coraza-spoa` to pull the new image. + +## Tuning false positives + +When a legitimate request triggers a blocked rule, the audit log shows the rule ID. Two ways to silence it: + +1. **Per-rule exception** in `overrides.conf`: `SecRuleRemoveById ` (full disable) or `SecRuleRemoveTargetById ""` (targeted exception). +2. **Drop from the enforce list**: remove the rule's ID range from the `ctl:ruleEngine=On` overrides; it falls back to detect-only. + +After tuning, push the change — CI rebuilds, then `recreate coraza-spoa` on each host to apply. diff --git a/coraza-spoa/config.yaml b/coraza-spoa/config.yaml new file mode 100644 index 0000000..a4004e7 --- /dev/null +++ b/coraza-spoa/config.yaml @@ -0,0 +1,61 @@ +# Coraza-SPOA configuration for WHP haproxy-manager integration. +# +# One named application "haproxy" — the haproxy-manager spoe template +# references this same name in its spoe-agent block, so the SPOA knows +# which rules to apply when HAProxy dispatches a request. +# +# Mode: SecRuleEngine DetectionOnly globally; overrides.conf promotes +# specific high-confidence rule ID ranges to enforcement individually. +# This is the safest posture for v1 — every rule logs, but only the +# unambiguous ones (scanner UAs, RCE, LFI, webshells, Log4Shell) block. + +bind: 0.0.0.0:9000 + +# Process-level logging (separate from per-request audit logging below) +log_level: info +log_file: /dev/stdout +log_format: json + +# Fallback when the request doesn't match a named application — we only +# have one, so it's also the default. +default_application: haproxy + +applications: + - name: haproxy + directives: | + # CRS-bundled defaults: recommended Coraza settings + CRS setup + + # the rule pack itself (~16 MB of rules embedded in the binary). + Include @coraza.conf-recommended + Include @crs-setup.conf.example + Include @owasp_crs/*.conf + + # WHP-specific overrides — day-one enforce list, plus tuning for + # the customer mix (WordPress, WooCommerce, Divi). Read this file + # to see exactly what blocks vs what's detect-only. + Include /etc/coraza/overrides.conf + + # Global mode: log all alerts, block only what overrides.conf + # explicitly promotes via ctl:ruleEngine=On. + SecRuleEngine DetectionOnly + + # Audit log: JSON to a bind-mounted file so AI Monitor + log + # rotation can pick it up. RelevantOnly means we don't log every + # passing request, only ones that triggered at least one rule. + SecAuditEngine RelevantOnly + SecAuditLog /var/log/coraza/audit.log + SecAuditLogFormat JSON + SecAuditLogParts ABIJDEFHKZ + + # HAProxy sends request-only events for v1. Response inspection adds + # latency on every page render with marginal additional protection + # for our customer mix; can be turned on later if we want it. + response_check: false + + # Transactions cache for 60s. SPOE protocol is fire-and-forget per + # request, so this is just how long Coraza holds context for any + # multi-stage processing. + transaction_ttl_ms: 60000 + + log_level: info + log_file: /var/log/coraza/spoa.log + log_format: json diff --git a/coraza-spoa/overrides.conf b/coraza-spoa/overrides.conf new file mode 100644 index 0000000..ebc2022 --- /dev/null +++ b/coraza-spoa/overrides.conf @@ -0,0 +1,68 @@ +# WHP day-one enforce overrides for coraza-spoa. +# +# Global mode in config.yaml is SecRuleEngine DetectionOnly. The rule ID +# ranges below are promoted to enforcement individually, chosen for very +# low false-positive rate on the kinds of customer traffic seen on WHP +# (WordPress, WooCommerce, Divi page builders). +# +# When bumping the upstream coraza-spoa pin (and thus the bundled CRS): +# 1. Skim the CRS CHANGELOG for new/changed rules in these ID ranges. +# 2. Verify they're still high-confidence before promoting the new image. +# 3. Smoke-test in staging detect-only mode for 24h before flipping enforce. +# +# Per-customer false-positive tuning lives in a future per-customer +# override mechanism; v1 is server-wide. + +# --------------------------------------------------------------------------- +# 913xxx — Scanner User-Agents +# (sqlmap, nikto, nmap-scripts, dirbuster, masscan, gobuster, ZAP, w3af, etc.) +# Legitimate browsers and apps never send these UAs. Pure recon/exploit +# tooling. Highest signal-to-noise rule family in CRS. +# --------------------------------------------------------------------------- +SecRuleUpdateActionById 913100-913199 "ctl:ruleEngine=On" + +# --------------------------------------------------------------------------- +# 930120 — LFI: explicit traversal to sensitive system files +# (/etc/passwd, /proc/self/, /.ssh/, /etc/shadow, /etc/group, etc.) +# Unambiguous probe pattern; no legitimate site path leads here. +# Note: 930xxx as a whole includes broader traversal patterns that can FP +# on legitimate relative-path file browsers — keep those detect-only. +# --------------------------------------------------------------------------- +SecRuleUpdateActionById 930120 "ctl:ruleEngine=On" + +# --------------------------------------------------------------------------- +# 932100-932160 — RCE: Unix shell command injection +# Patterns like `; cat /etc/passwd`, `|whoami`, backtick `\`uname\``, +# $(...) substitution, &&/|| chaining with shell builtins. +# Don't appear in normal POST bodies, URL params, or headers. Targeting +# these is unambiguous attempted command execution. +# --------------------------------------------------------------------------- +SecRuleUpdateActionById 932100-932160 "ctl:ruleEngine=On" + +# --------------------------------------------------------------------------- +# 933170-933200 — PHP Webshell access patterns +# Direct requests to known webshell paths: c99.php, r57.php, b374k.php, +# wso.php, alfa.php, mini.php, etc. Almost universally reconnaissance +# scanning for post-exploitation. Even legitimate WordPress installs +# never serve these paths. +# --------------------------------------------------------------------------- +SecRuleUpdateActionById 933170-933200 "ctl:ruleEngine=On" + +# --------------------------------------------------------------------------- +# 944100-944300 — Log4Shell / JNDI injection +# `${jndi:ldap://}`, `${jndi:rmi://}`, and obfuscated variants thereof +# in headers, query strings, or bodies. Even our PHP/Node stack isn't +# vulnerable, but blocking at the edge keeps logs clean and protects +# any future Java workloads. +# --------------------------------------------------------------------------- +SecRuleUpdateActionById 944100-944300 "ctl:ruleEngine=On" + +# --------------------------------------------------------------------------- +# Rule families intentionally kept at DETECT-ONLY for v1 — high FP rate +# on customer mix. Promote individually after observation: +# +# 941xxx (XSS) — Divi rich-text editor saves, TinyMCE submissions +# 942xxx (SQLi) — WP admin queries reflected in params +# 920xxx (Protocol) — Cloudflare-in-front sometimes injects odd headers +# 950xxx-953xxx — Data leakage / backup-file disclosure (mixed FP) +# ---------------------------------------------------------------------------