#!/bin/bash set -e # Runaway Process Killer - Installation Script # Protects against CPU and RAM runaway processes echo "============================================" echo "Runaway Process Killer - Installation" echo "============================================" echo "" # Check root if [ "$EUID" -ne 0 ]; then echo "Error: Please run as root (sudo ./install.sh)" exit 1 fi # Detect script directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Configuration CPU_CYCLES="${CPU_CYCLES:-5}" # 5 minutes default (5 cycles * 60s) RAM_THRESHOLD="${RAM_THRESHOLD:-5}" # 5% free memory threshold SWAP_THRESHOLD="${SWAP_THRESHOLD:-5}" # 5% free swap threshold echo "Configuration:" echo " CPU kill threshold: ${CPU_CYCLES} minutes of 95%+ CPU" echo " RAM kill threshold: <${RAM_THRESHOLD}% free memory" echo "" # Install packages (noninteractive to prevent hanging on prompts) echo "[1/8] Installing packages..." export DEBIAN_FRONTEND=noninteractive dpkg --configure -a 2>/dev/null || true # Fix any interrupted dpkg state apt-get update -qq apt-get install -y -qq earlyoom monit # Configure earlyoom echo "[2/8] Configuring earlyoom (RAM protection)..." cat > /etc/default/earlyoom << EOF # earlyoom configuration - managed by runaway-process-killer EARLYOOM_ARGS="-m ${RAM_THRESHOLD} -s ${SWAP_THRESHOLD} --avoid '(^|/)(init|systemd|sshd)\$' -r 60" EOF # Configure monit daemon interval echo "[3/8] Configuring monit daemon..." if grep -q "^set daemon" /etc/monit/monitrc; then sed -i 's/^set daemon.*/set daemon 60/' /etc/monit/monitrc else sed -i '1i set daemon 60' /etc/monit/monitrc fi # Enable monit HTTP interface (required for monit status) if ! grep -q "^set httpd port 2812" /etc/monit/monitrc; then cat >> /etc/monit/monitrc << 'EOF' # HTTP interface for monit status command set httpd port 2812 and use address localhost allow localhost allow admin:monit EOF fi # Create CPU killer config echo "[4/8] Creating CPU monitoring rule..." cat > /etc/monit/conf.d/cpu-killer << EOF check system \$HOST if cpu usage > 95% for ${CPU_CYCLES} cycles then exec "/usr/local/bin/kill-top-cpu.sh" EOF # Create kill script echo "[5/8] Installing CPU kill script..." cat > /usr/local/bin/kill-top-cpu.sh << 'EOF' #!/bin/bash # Kill the process tree using the most CPU (excluding critical ones) # Find the top CPU consumer (excluding protected and transient processes) TOP_LINE=$(ps -eo pid,comm,%cpu --sort=-%cpu | grep -v -E '(PID|systemd|sshd|monit|earlyoom|bash|ps|awk|grep|head)' | head -1) TARGET_PID=$(echo "$TOP_LINE" | awk '{print $1}') COMM=$(echo "$TOP_LINE" | awk '{print $2}') if [ -n "$TARGET_PID" ] && [ -n "$COMM" ]; then logger "monit cpu-killer: Killing all '$COMM' processes (detected high CPU on PID $TARGET_PID)" # Kill all processes with this command name pkill -9 -x "$COMM" fi EOF chmod +x /usr/local/bin/kill-top-cpu.sh # Create orphan Claude killer script echo "[6/8] Installing orphan Claude killer script..." cat > /usr/local/bin/kill-orphan-claude.sh << 'EOF' #!/bin/bash # Kill orphaned Claude processes (no TTY or parent is init) # An orphaned Claude process is one where: # - TTY is "?" (no terminal attached), OR # - Parent PID is 1 (adopted by init) LOG_TAG="orphan-claude-killer" # Find all claude processes CLAUDE_PIDS=$(pgrep -x claude 2>/dev/null) if [ -z "$CLAUDE_PIDS" ]; then exit 0 fi for PID in $CLAUDE_PIDS; do # Get TTY and PPID for this process PROC_INFO=$(ps -o tty=,ppid= -p "$PID" 2>/dev/null) if [ -z "$PROC_INFO" ]; then # Process already gone continue fi PROC_TTY=$(echo "$PROC_INFO" | awk '{print $1}') PARENT_PID=$(echo "$PROC_INFO" | awk '{print $2}') ORPHANED=false REASON="" # Check if TTY is "?" (no terminal) if [ "$PROC_TTY" = "?" ]; then ORPHANED=true REASON="no TTY attached" fi # Check if parent PID is 1 (adopted by init) if [ "$PARENT_PID" = "1" ]; then ORPHANED=true REASON="parent is init (PPID=1)" fi if [ "$ORPHANED" = true ]; then # Get process start time for logging START_TIME=$(ps -o lstart= -p "$PID" 2>/dev/null) CPU=$(ps -o %cpu= -p "$PID" 2>/dev/null) MEM=$(ps -o %mem= -p "$PID" 2>/dev/null) logger "$LOG_TAG: Killing orphaned claude process PID=$PID ($REASON) started='$START_TIME' cpu=$CPU% mem=$MEM%" # Kill the process tree (claude may have child processes) pkill -9 -P "$PID" 2>/dev/null kill -9 "$PID" 2>/dev/null fi done EOF chmod +x /usr/local/bin/kill-orphan-claude.sh # Create orphan Claude killer config echo "[7/8] Creating orphan Claude monitoring rule..." cat > /etc/monit/conf.d/orphan-claude-killer << 'EOF' check program orphan-claude-killer with path "/usr/local/bin/kill-orphan-claude.sh" every 15 cycles if status != 0 then alert EOF # Enable and start services echo "[8/8] Enabling and starting services..." systemctl enable earlyoom monit systemctl restart earlyoom monit # Verify echo "" echo "============================================" echo "Installation Complete!" echo "============================================" echo "" echo "Status:" systemctl is-active earlyoom && echo " earlyoom: running" || echo " earlyoom: FAILED" systemctl is-active monit && echo " monit: running" || echo " monit: FAILED" echo "" echo "Configuration:" echo " CPU: Kill after ${CPU_CYCLES} minutes of 95%+ usage" echo " RAM: Kill when free memory < ${RAM_THRESHOLD}%" echo " Orphan Claude: Check every 15 minutes" echo "" echo "Commands:" echo " sudo monit status # Check monit status" echo " journalctl -u earlyoom # View earlyoom logs" echo " tail -f /var/log/monit.log # View monit logs" echo ""