Files
runaway-process-killer/install.sh
Ryan T. Murphy cc9c571c46 Fix install.sh hanging on apt-get install
Add DEBIAN_FRONTEND=noninteractive to prevent dpkg from waiting
for interactive prompts during package configuration.
2026-01-22 18:57:28 -05:00

184 lines
5.6 KiB
Bash
Executable File

#!/bin/bash
set -e
# Runaway Process Killer - Installation Script
# Protects against CPU and RAM runaway processes
echo "============================================"
echo "Runaway Process Killer - Installation"
echo "============================================"
echo ""
# Check root
if [ "$EUID" -ne 0 ]; then
echo "Error: Please run as root (sudo ./install.sh)"
exit 1
fi
# Detect script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Configuration
CPU_CYCLES="${CPU_CYCLES:-5}" # 5 minutes default (5 cycles * 60s)
RAM_THRESHOLD="${RAM_THRESHOLD:-5}" # 5% free memory threshold
SWAP_THRESHOLD="${SWAP_THRESHOLD:-5}" # 5% free swap threshold
echo "Configuration:"
echo " CPU kill threshold: ${CPU_CYCLES} minutes of 95%+ CPU"
echo " RAM kill threshold: <${RAM_THRESHOLD}% free memory"
echo ""
# Install packages (noninteractive to prevent hanging on prompts)
echo "[1/8] Installing packages..."
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y -qq earlyoom monit
# Configure earlyoom
echo "[2/8] Configuring earlyoom (RAM protection)..."
cat > /etc/default/earlyoom << EOF
# earlyoom configuration - managed by runaway-process-killer
EARLYOOM_ARGS="-m ${RAM_THRESHOLD} -s ${SWAP_THRESHOLD} --avoid '(^|/)(init|systemd|sshd)\$' -r 60"
EOF
# Configure monit daemon interval
echo "[3/8] Configuring monit daemon..."
if grep -q "^set daemon" /etc/monit/monitrc; then
sed -i 's/^set daemon.*/set daemon 60/' /etc/monit/monitrc
else
sed -i '1i set daemon 60' /etc/monit/monitrc
fi
# Enable monit HTTP interface (required for monit status)
if ! grep -q "^set httpd port 2812" /etc/monit/monitrc; then
cat >> /etc/monit/monitrc << 'EOF'
# HTTP interface for monit status command
set httpd port 2812 and
use address localhost
allow localhost
allow admin:monit
EOF
fi
# Create CPU killer config
echo "[4/8] Creating CPU monitoring rule..."
cat > /etc/monit/conf.d/cpu-killer << EOF
check system \$HOST
if cpu usage > 95% for ${CPU_CYCLES} cycles then exec "/usr/local/bin/kill-top-cpu.sh"
EOF
# Create kill script
echo "[5/8] Installing CPU kill script..."
cat > /usr/local/bin/kill-top-cpu.sh << 'EOF'
#!/bin/bash
# Kill the process tree using the most CPU (excluding critical ones)
# Find the top CPU consumer (excluding protected and transient processes)
TOP_LINE=$(ps -eo pid,comm,%cpu --sort=-%cpu | grep -v -E '(PID|systemd|sshd|monit|earlyoom|bash|ps|awk|grep|head)' | head -1)
TARGET_PID=$(echo "$TOP_LINE" | awk '{print $1}')
COMM=$(echo "$TOP_LINE" | awk '{print $2}')
if [ -n "$TARGET_PID" ] && [ -n "$COMM" ]; then
logger "monit cpu-killer: Killing all '$COMM' processes (detected high CPU on PID $TARGET_PID)"
# Kill all processes with this command name
pkill -9 -x "$COMM"
fi
EOF
chmod +x /usr/local/bin/kill-top-cpu.sh
# Create orphan Claude killer script
echo "[6/8] Installing orphan Claude killer script..."
cat > /usr/local/bin/kill-orphan-claude.sh << 'EOF'
#!/bin/bash
# Kill orphaned Claude processes (no TTY or parent is init)
# An orphaned Claude process is one where:
# - TTY is "?" (no terminal attached), OR
# - Parent PID is 1 (adopted by init)
LOG_TAG="orphan-claude-killer"
# Find all claude processes
CLAUDE_PIDS=$(pgrep -x claude 2>/dev/null)
if [ -z "$CLAUDE_PIDS" ]; then
exit 0
fi
for PID in $CLAUDE_PIDS; do
# Get TTY and PPID for this process
PROC_INFO=$(ps -o tty=,ppid= -p "$PID" 2>/dev/null)
if [ -z "$PROC_INFO" ]; then
# Process already gone
continue
fi
PROC_TTY=$(echo "$PROC_INFO" | awk '{print $1}')
PARENT_PID=$(echo "$PROC_INFO" | awk '{print $2}')
ORPHANED=false
REASON=""
# Check if TTY is "?" (no terminal)
if [ "$PROC_TTY" = "?" ]; then
ORPHANED=true
REASON="no TTY attached"
fi
# Check if parent PID is 1 (adopted by init)
if [ "$PARENT_PID" = "1" ]; then
ORPHANED=true
REASON="parent is init (PPID=1)"
fi
if [ "$ORPHANED" = true ]; then
# Get process start time for logging
START_TIME=$(ps -o lstart= -p "$PID" 2>/dev/null)
CPU=$(ps -o %cpu= -p "$PID" 2>/dev/null)
MEM=$(ps -o %mem= -p "$PID" 2>/dev/null)
logger "$LOG_TAG: Killing orphaned claude process PID=$PID ($REASON) started='$START_TIME' cpu=$CPU% mem=$MEM%"
# Kill the process tree (claude may have child processes)
pkill -9 -P "$PID" 2>/dev/null
kill -9 "$PID" 2>/dev/null
fi
done
EOF
chmod +x /usr/local/bin/kill-orphan-claude.sh
# Create orphan Claude killer config
echo "[7/8] Creating orphan Claude monitoring rule..."
cat > /etc/monit/conf.d/orphan-claude-killer << 'EOF'
check program orphan-claude-killer with path "/usr/local/bin/kill-orphan-claude.sh"
every 15 cycles
if status != 0 then alert
EOF
# Enable and start services
echo "[8/8] Enabling and starting services..."
systemctl enable earlyoom monit
systemctl restart earlyoom monit
# Verify
echo ""
echo "============================================"
echo "Installation Complete!"
echo "============================================"
echo ""
echo "Status:"
systemctl is-active earlyoom && echo " earlyoom: running" || echo " earlyoom: FAILED"
systemctl is-active monit && echo " monit: running" || echo " monit: FAILED"
echo ""
echo "Configuration:"
echo " CPU: Kill after ${CPU_CYCLES} minutes of 95%+ usage"
echo " RAM: Kill when free memory < ${RAM_THRESHOLD}%"
echo " Orphan Claude: Check every 15 minutes"
echo ""
echo "Commands:"
echo " sudo monit status # Check monit status"
echo " journalctl -u earlyoom # View earlyoom logs"
echo " tail -f /var/log/monit.log # View monit logs"
echo ""