Files
Work-configuration-file/sh/weblessie-server-01/check_memory_and_restart.sh

106 lines
2.5 KiB
Bash
Raw Normal View History

2025-11-26 14:11:03 +08:00
#!/bin/bash
# 阈值10G 单位 MB
THRESHOLD_MB=10240
# 两个服务配置
declare -A SERVICE_DIRS=(
["8000"]="/data/webapps/lessie_sourcing_agents"
["8001"]="/data/webapps/lessie_sourcing_agents_s4"
)
declare -A SERVICE_ENVS=(
["8000"]="s1"
["8001"]="s4"
)
# 获取当前时间
now() {
date +"%Y-%m-%d %H:%M:%S"
}
# 检查端口对应所有进程的内存RSS 和 VMS 都行,这里用 RES 实际内存占用)
get_memory_usage_mb() {
PORT=$1
# 获取所有 PID
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -z "$PIDS" ]; then
echo 0
return
fi
# 总内存 MB
total=0
for pid in $PIDS; do
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
[ -n "$mem" ] && total=$((total + mem))
done
echo $(( total / 1024 ))
}
# 启动服务
start_service() {
PORT=$1
APP_DIR=${SERVICE_DIRS[$PORT]}
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
cd "$APP_DIR" || exit 1
source "$APP_DIR/.venv/bin/activate"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
if [ "$PORT" = "8000" ]; then
APP_ENV="s1"
else
APP_ENV="s4"
fi
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
--max-requests 200 --max-requests-jitter 50 \
> "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
echo "$(now) 服务 $PORT 已重新启动"
}
# 主循环(两个服务)
for PORT in 8000 8001; do
echo "---------------------------"
echo "$(now) 检查端口 $PORT 的服务"
usage=$(get_memory_usage_mb "$PORT")
echo "$(now) 当前内存占用: ${usage}MB"
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
# 调用杀进程脚本
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
sleep 2
# 重启服务
start_service "$PORT"
# 飞书告警
APP_ENV=${SERVICE_ENVS[$PORT]}
sh /data/sh/feishu_notify.sh \
"Python 内存告警" \
"$(hostname)" \
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
"warning" \
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
else
echo "$(now) 内存正常,无需处理。"
fi
done
echo "$(now) 检查结束"