Files
Work-configuration-file/sh/weblessie-server-01/check_memory_and_restart.sh
2025-11-26 14:11:03 +08:00

106 lines
2.5 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 阈值10G 单位 MB
THRESHOLD_MB=10240
# 两个服务配置
declare -A SERVICE_DIRS=(
["8000"]="/data/webapps/lessie_sourcing_agents"
["8001"]="/data/webapps/lessie_sourcing_agents_s4"
)
declare -A SERVICE_ENVS=(
["8000"]="s1"
["8001"]="s4"
)
# 获取当前时间
now() {
date +"%Y-%m-%d %H:%M:%S"
}
# 检查端口对应所有进程的内存RSS 和 VMS 都行,这里用 RES 实际内存占用)
get_memory_usage_mb() {
PORT=$1
# 获取所有 PID
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -z "$PIDS" ]; then
echo 0
return
fi
# 总内存 MB
total=0
for pid in $PIDS; do
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
[ -n "$mem" ] && total=$((total + mem))
done
echo $(( total / 1024 ))
}
# 启动服务
start_service() {
PORT=$1
APP_DIR=${SERVICE_DIRS[$PORT]}
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
cd "$APP_DIR" || exit 1
source "$APP_DIR/.venv/bin/activate"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
if [ "$PORT" = "8000" ]; then
APP_ENV="s1"
else
APP_ENV="s4"
fi
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
--max-requests 200 --max-requests-jitter 50 \
> "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
echo "$(now) 服务 $PORT 已重新启动"
}
# 主循环(两个服务)
for PORT in 8000 8001; do
echo "---------------------------"
echo "$(now) 检查端口 $PORT 的服务"
usage=$(get_memory_usage_mb "$PORT")
echo "$(now) 当前内存占用: ${usage}MB"
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
# 调用杀进程脚本
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
sleep 2
# 重启服务
start_service "$PORT"
# 飞书告警
APP_ENV=${SERVICE_ENVS[$PORT]}
sh /data/sh/feishu_notify.sh \
"Python 内存告警" \
"$(hostname)" \
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
"warning" \
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
else
echo "$(now) 内存正常,无需处理。"
fi
done
echo "$(now) 检查结束"