A 10s postgresql restart took down transcribe.shadowdao.com-01 for ~17h because pm2 gave up after 5 fast retries, the entrypoint's trailing tail -f kept PID 1 alive, and the healthcheck (wget --spider on nginx port 80) succeeded on the 301-to-https redirect regardless of whether Node was alive. Three coordinated fixes to the cnoc image: - HEALTHCHECK: replace the redirect-passing wget probe with TCP-level checks on 127.0.0.1:3000 (Node) and :80 (nginx). Tenant-agnostic, no /ping dependency — catches the exact incident scenario (port 3000 closed when pm2 exits). - entrypoint.sh: exec pm2 via tini so it becomes PID 1. When pm2 exhausts max_restarts and exits, the container exits and the unless-stopped restart policy brings it back. Logs are tailed in the background with -F (logrotate-safe). - Dockerfile: install tini from EPEL for proper signal forwarding and zombie reaping of nginx/crond children that reparent to PID 1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
106 lines
3.3 KiB
Bash
Executable File
106 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
if [ -z "$NODEVER" ]; then
|
|
NODEVER="20";
|
|
fi
|
|
|
|
if [ -z "$environment" ]; then
|
|
environment="PROD"
|
|
fi
|
|
|
|
# Create user with proper error handling
|
|
if ! id -u $user >/dev/null 2>&1; then
|
|
echo "Creating user $user with UID $uid"
|
|
adduser -u $uid -m -s /bin/bash $user || {
|
|
echo "Failed to create user $user with UID $uid"
|
|
exit 1
|
|
}
|
|
else
|
|
echo "User $user already exists"
|
|
fi
|
|
|
|
mkdir -p /home/$user/app
|
|
mkdir -p /home/$user/logs/{nginx,nodejs}
|
|
|
|
# Link log directories
|
|
rm -rf /var/log/nginx
|
|
ln -s /home/$user/logs/nginx /var/log/nginx
|
|
ln -s /home/$user/logs/nodejs /var/log/nodejs
|
|
|
|
# Configure nginx for reverse proxy
|
|
/scripts/create-nginx-config.sh
|
|
|
|
# Set ownership and permissions
|
|
chown -R $user:$user /home/$user
|
|
chmod -R 755 /home/$user
|
|
|
|
# Start nginx
|
|
nginx
|
|
|
|
if [[ $environment == 'DEV' ]]; then
|
|
echo "Starting Dev Deployment"
|
|
|
|
# Ensure microdnf is available for installing additional packages in DEV mode
|
|
if ! command -v microdnf &> /dev/null; then
|
|
echo "microdnf not found, installing with dnf..."
|
|
dnf install -y microdnf && dnf clean all
|
|
fi
|
|
|
|
# Install Memcached for session storage in DEV mode with memory limit
|
|
microdnf install -y memcached
|
|
# Start memcached with 32MB memory limit
|
|
nohup memcached -d -u $user -p 11211 -m 32
|
|
|
|
fi
|
|
|
|
# Start cron for log rotation and backups
|
|
/usr/sbin/crond
|
|
|
|
# Create app directory if it doesn't exist
|
|
if [ ! -d /home/$user/app ]; then
|
|
echo "Creating app directory at /home/$user/app"
|
|
mkdir -p /home/$user/app
|
|
chown -R $user:$user /home/$user/app
|
|
fi
|
|
|
|
# If app directory is empty, copy the simple-website example
|
|
if [ -z "$(ls -A /home/$user/app)" ]; then
|
|
echo "App directory is empty, copying simple-website example..."
|
|
cp -r /examples/simple-website/* /home/$user/app/
|
|
chown -R $user:$user /home/$user/app
|
|
echo "Copied simple-website example to provide a working application"
|
|
fi
|
|
|
|
# Now there's always an app in the user directory (either user's or example)
|
|
cd /home/$user/app
|
|
|
|
# Verify user exists and show info
|
|
echo "Verifying user setup:"
|
|
id $user || { echo "ERROR: User $user does not exist!"; exit 1; }
|
|
|
|
# Install dependencies as the user
|
|
echo "Installing npm dependencies as user $user..."
|
|
su -c "npm install" $user
|
|
|
|
# Check if ecosystem.config.js exists, if not generate it
|
|
if [ ! -f /home/$user/app/ecosystem.config.js ]; then
|
|
echo "No ecosystem.config.js found, generating from package.json..."
|
|
/scripts/generate-ecosystem-config.sh "$user" "/home/$user/app"
|
|
chown $user:$user /home/$user/app/ecosystem.config.js
|
|
fi
|
|
|
|
# Mirror logs to docker logs in the background.
|
|
# Use -F (capital) so logrotate-recreated files keep streaming.
|
|
tail -F /home/$user/logs/nginx/access.log \
|
|
/home/$user/logs/nginx/error.log \
|
|
/home/$user/logs/nodejs/app.log \
|
|
/home/$user/logs/nodejs/out.log \
|
|
/home/$user/logs/nodejs/error.log 2>/dev/null &
|
|
|
|
# Start PM2 under tini so it becomes PID 1 (with proper signal forwarding
|
|
# and zombie reaping for nginx/crond/memcached children that reparent here).
|
|
# When pm2 exits (e.g. max_restarts exhausted), tini exits and Docker's
|
|
# restart policy brings the container back.
|
|
echo "Starting PM2 as user $user (under tini as PID 1)..."
|
|
cd /home/$user/app
|
|
exec tini -- su - $user -c "cd /home/$user/app && NODE_ENV=production pm2 start ecosystem.config.js --no-daemon" |