106 lines
2.5 KiB
Bash
106 lines
2.5 KiB
Bash
#!/bin/bash
|
||
|
||
# 阈值:10G 单位 MB
|
||
THRESHOLD_MB=10240
|
||
|
||
# 两个服务配置
|
||
declare -A SERVICE_DIRS=(
|
||
["8000"]="/data/webapps/lessie_sourcing_agents_s5"
|
||
["8001"]="/data/webapps/lessie_sourcing_agents_s6"
|
||
)
|
||
|
||
declare -A SERVICE_ENVS=(
|
||
["8000"]="s5"
|
||
["8001"]="s6"
|
||
)
|
||
|
||
# 获取当前时间
|
||
now() {
|
||
date +"%Y-%m-%d %H:%M:%S"
|
||
}
|
||
|
||
# 检查端口对应所有进程的内存(RSS 和 VMS 都行,这里用 RES 实际内存占用)
|
||
get_memory_usage_mb() {
|
||
PORT=$1
|
||
# 获取所有 PID
|
||
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
|
||
|
||
if [ -z "$PIDS" ]; then
|
||
echo 0
|
||
return
|
||
fi
|
||
|
||
# 总内存 MB
|
||
total=0
|
||
for pid in $PIDS; do
|
||
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
|
||
[ -n "$mem" ] && total=$((total + mem))
|
||
done
|
||
|
||
echo $(( total / 1024 ))
|
||
}
|
||
|
||
# 启动服务
|
||
start_service() {
|
||
PORT=$1
|
||
APP_DIR=${SERVICE_DIRS[$PORT]}
|
||
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
|
||
|
||
cd "$APP_DIR" || exit 1
|
||
|
||
source "$APP_DIR/.venv/bin/activate"
|
||
|
||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
|
||
|
||
if [ "$PORT" = "8000" ]; then
|
||
APP_ENV="s5"
|
||
else
|
||
APP_ENV="s6"
|
||
fi
|
||
|
||
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
|
||
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
|
||
--max-requests 200 --max-requests-jitter 50 \
|
||
> "$LOGFILE" 2>&1 &
|
||
|
||
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
|
||
|
||
echo "$(now) 服务 $PORT 已重新启动"
|
||
}
|
||
|
||
# 主循环(两个服务)
|
||
for PORT in 8000 8001; do
|
||
echo "---------------------------"
|
||
echo "$(now) 检查端口 $PORT 的服务"
|
||
|
||
usage=$(get_memory_usage_mb "$PORT")
|
||
echo "$(now) 当前内存占用: ${usage}MB"
|
||
|
||
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
|
||
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
|
||
|
||
# 调用杀进程脚本
|
||
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
|
||
|
||
sleep 2
|
||
|
||
# 重启服务
|
||
start_service "$PORT"
|
||
|
||
# 飞书告警
|
||
APP_ENV=${SERVICE_ENVS[$PORT]}
|
||
sh /data/sh/feishu_notify.sh \
|
||
"Python 内存告警" \
|
||
"$(hostname)" \
|
||
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
|
||
"warning" \
|
||
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
|
||
|
||
else
|
||
echo "$(now) 内存正常,无需处理。"
|
||
fi
|
||
done
|
||
|
||
echo "$(now) 检查结束"
|