183 lines
5.5 KiB
Bash
Executable File
183 lines
5.5 KiB
Bash
Executable File
#!/bin/bash
|
|
set -e
|
|
|
|
# Runaway Process Killer - Installation Script
|
|
# Protects against CPU and RAM runaway processes
|
|
|
|
echo "============================================"
|
|
echo "Runaway Process Killer - Installation"
|
|
echo "============================================"
|
|
echo ""
|
|
|
|
# Check root
|
|
if [ "$EUID" -ne 0 ]; then
|
|
echo "Error: Please run as root (sudo ./install.sh)"
|
|
exit 1
|
|
fi
|
|
|
|
# Detect script directory
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
# Configuration
|
|
CPU_CYCLES="${CPU_CYCLES:-5}" # 5 minutes default (5 cycles * 60s)
|
|
RAM_THRESHOLD="${RAM_THRESHOLD:-5}" # 5% free memory threshold
|
|
SWAP_THRESHOLD="${SWAP_THRESHOLD:-5}" # 5% free swap threshold
|
|
|
|
echo "Configuration:"
|
|
echo " CPU kill threshold: ${CPU_CYCLES} minutes of 95%+ CPU"
|
|
echo " RAM kill threshold: <${RAM_THRESHOLD}% free memory"
|
|
echo ""
|
|
|
|
# Install packages
|
|
echo "[1/8] Installing packages..."
|
|
apt-get update -qq
|
|
apt-get install -y -qq earlyoom monit
|
|
|
|
# Configure earlyoom
|
|
echo "[2/8] Configuring earlyoom (RAM protection)..."
|
|
cat > /etc/default/earlyoom << EOF
|
|
# earlyoom configuration - managed by runaway-process-killer
|
|
EARLYOOM_ARGS="-m ${RAM_THRESHOLD} -s ${SWAP_THRESHOLD} --avoid '(^|/)(init|systemd|sshd)\$' -r 60"
|
|
EOF
|
|
|
|
# Configure monit daemon interval
|
|
echo "[3/8] Configuring monit daemon..."
|
|
if grep -q "^set daemon" /etc/monit/monitrc; then
|
|
sed -i 's/^set daemon.*/set daemon 60/' /etc/monit/monitrc
|
|
else
|
|
sed -i '1i set daemon 60' /etc/monit/monitrc
|
|
fi
|
|
|
|
# Enable monit HTTP interface (required for monit status)
|
|
if ! grep -q "^set httpd port 2812" /etc/monit/monitrc; then
|
|
cat >> /etc/monit/monitrc << 'EOF'
|
|
|
|
# HTTP interface for monit status command
|
|
set httpd port 2812 and
|
|
use address localhost
|
|
allow localhost
|
|
allow admin:monit
|
|
EOF
|
|
fi
|
|
|
|
# Create CPU killer config
|
|
echo "[4/8] Creating CPU monitoring rule..."
|
|
cat > /etc/monit/conf.d/cpu-killer << EOF
|
|
check system \$HOST
|
|
if cpu usage > 95% for ${CPU_CYCLES} cycles then exec "/usr/local/bin/kill-top-cpu.sh"
|
|
EOF
|
|
|
|
# Create kill script
|
|
echo "[5/8] Installing CPU kill script..."
|
|
cat > /usr/local/bin/kill-top-cpu.sh << 'EOF'
|
|
#!/bin/bash
|
|
# Kill the process tree using the most CPU (excluding critical ones)
|
|
|
|
# Find the top CPU consumer (excluding protected and transient processes)
|
|
TOP_LINE=$(ps -eo pid,comm,%cpu --sort=-%cpu | grep -v -E '(PID|systemd|sshd|monit|earlyoom|bash|ps|awk|grep|head)' | head -1)
|
|
TARGET_PID=$(echo "$TOP_LINE" | awk '{print $1}')
|
|
COMM=$(echo "$TOP_LINE" | awk '{print $2}')
|
|
|
|
if [ -n "$TARGET_PID" ] && [ -n "$COMM" ]; then
|
|
logger "monit cpu-killer: Killing all '$COMM' processes (detected high CPU on PID $TARGET_PID)"
|
|
# Kill all processes with this command name
|
|
pkill -9 -x "$COMM"
|
|
fi
|
|
EOF
|
|
chmod +x /usr/local/bin/kill-top-cpu.sh
|
|
|
|
# Create orphan Claude killer script
|
|
echo "[6/8] Installing orphan Claude killer script..."
|
|
cat > /usr/local/bin/kill-orphan-claude.sh << 'EOF'
|
|
#!/bin/bash
|
|
# Kill orphaned Claude processes (no TTY or parent is init)
|
|
# An orphaned Claude process is one where:
|
|
# - TTY is "?" (no terminal attached), OR
|
|
# - Parent PID is 1 (adopted by init)
|
|
|
|
LOG_TAG="orphan-claude-killer"
|
|
|
|
# Find all claude processes
|
|
CLAUDE_PIDS=$(pgrep -x claude 2>/dev/null)
|
|
|
|
if [ -z "$CLAUDE_PIDS" ]; then
|
|
exit 0
|
|
fi
|
|
|
|
for PID in $CLAUDE_PIDS; do
|
|
# Get TTY and PPID for this process
|
|
PROC_INFO=$(ps -o tty=,ppid= -p "$PID" 2>/dev/null)
|
|
|
|
if [ -z "$PROC_INFO" ]; then
|
|
# Process already gone
|
|
continue
|
|
fi
|
|
|
|
PROC_TTY=$(echo "$PROC_INFO" | awk '{print $1}')
|
|
PARENT_PID=$(echo "$PROC_INFO" | awk '{print $2}')
|
|
|
|
ORPHANED=false
|
|
REASON=""
|
|
|
|
# Check if TTY is "?" (no terminal)
|
|
if [ "$PROC_TTY" = "?" ]; then
|
|
ORPHANED=true
|
|
REASON="no TTY attached"
|
|
fi
|
|
|
|
# Check if parent PID is 1 (adopted by init)
|
|
if [ "$PARENT_PID" = "1" ]; then
|
|
ORPHANED=true
|
|
REASON="parent is init (PPID=1)"
|
|
fi
|
|
|
|
if [ "$ORPHANED" = true ]; then
|
|
# Get process start time for logging
|
|
START_TIME=$(ps -o lstart= -p "$PID" 2>/dev/null)
|
|
CPU=$(ps -o %cpu= -p "$PID" 2>/dev/null)
|
|
MEM=$(ps -o %mem= -p "$PID" 2>/dev/null)
|
|
|
|
logger "$LOG_TAG: Killing orphaned claude process PID=$PID ($REASON) started='$START_TIME' cpu=$CPU% mem=$MEM%"
|
|
|
|
# Kill the process tree (claude may have child processes)
|
|
pkill -9 -P "$PID" 2>/dev/null
|
|
kill -9 "$PID" 2>/dev/null
|
|
fi
|
|
done
|
|
EOF
|
|
chmod +x /usr/local/bin/kill-orphan-claude.sh
|
|
|
|
# Create orphan Claude killer config
|
|
echo "[7/8] Creating orphan Claude monitoring rule..."
|
|
cat > /etc/monit/conf.d/orphan-claude-killer << 'EOF'
|
|
check program orphan-claude-killer with path "/usr/local/bin/kill-orphan-claude.sh"
|
|
every 15 cycles
|
|
if status != 0 then alert
|
|
EOF
|
|
|
|
# Enable and start services
|
|
echo "[8/8] Enabling and starting services..."
|
|
systemctl enable earlyoom monit
|
|
systemctl restart earlyoom monit
|
|
|
|
# Verify
|
|
echo ""
|
|
echo "============================================"
|
|
echo "Installation Complete!"
|
|
echo "============================================"
|
|
echo ""
|
|
echo "Status:"
|
|
systemctl is-active earlyoom && echo " earlyoom: running" || echo " earlyoom: FAILED"
|
|
systemctl is-active monit && echo " monit: running" || echo " monit: FAILED"
|
|
echo ""
|
|
echo "Configuration:"
|
|
echo " CPU: Kill after ${CPU_CYCLES} minutes of 95%+ usage"
|
|
echo " RAM: Kill when free memory < ${RAM_THRESHOLD}%"
|
|
echo " Orphan Claude: Check every 15 minutes"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " sudo monit status # Check monit status"
|
|
echo " journalctl -u earlyoom # View earlyoom logs"
|
|
echo " tail -f /var/log/monit.log # View monit logs"
|
|
echo ""
|