2025-11-26 14:11:03 +08:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
|
|
# 阈值:10G 单位 MB
|
2025-11-26 18:01:58 +08:00
|
|
|
|
THRESHOLD_MB=12288
|
2025-11-26 14:11:03 +08:00
|
|
|
|
|
|
|
|
|
|
# 两个服务配置
|
|
|
|
|
|
declare -A SERVICE_DIRS=(
|
|
|
|
|
|
["8000"]="/data/webapps/lessie_sourcing_agents"
|
|
|
|
|
|
["8001"]="/data/webapps/lessie_sourcing_agents_s4"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
declare -A SERVICE_ENVS=(
|
|
|
|
|
|
["8000"]="s1"
|
|
|
|
|
|
["8001"]="s4"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 获取当前时间
|
|
|
|
|
|
now() {
|
|
|
|
|
|
date +"%Y-%m-%d %H:%M:%S"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 检查端口对应所有进程的内存(RSS 和 VMS 都行,这里用 RES 实际内存占用)
|
|
|
|
|
|
get_memory_usage_mb() {
|
|
|
|
|
|
PORT=$1
|
|
|
|
|
|
# 获取所有 PID
|
|
|
|
|
|
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
|
|
|
|
|
|
|
|
|
|
|
|
if [ -z "$PIDS" ]; then
|
|
|
|
|
|
echo 0
|
|
|
|
|
|
return
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# 总内存 MB
|
|
|
|
|
|
total=0
|
|
|
|
|
|
for pid in $PIDS; do
|
|
|
|
|
|
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
|
|
|
|
|
|
[ -n "$mem" ] && total=$((total + mem))
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
echo $(( total / 1024 ))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 启动服务
|
|
|
|
|
|
start_service() {
|
|
|
|
|
|
PORT=$1
|
|
|
|
|
|
APP_DIR=${SERVICE_DIRS[$PORT]}
|
|
|
|
|
|
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
|
|
|
|
|
|
|
|
|
|
|
|
cd "$APP_DIR" || exit 1
|
|
|
|
|
|
|
|
|
|
|
|
source "$APP_DIR/.venv/bin/activate"
|
|
|
|
|
|
|
|
|
|
|
|
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
|
|
|
|
|
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "$PORT" = "8000" ]; then
|
|
|
|
|
|
APP_ENV="s1"
|
|
|
|
|
|
else
|
|
|
|
|
|
APP_ENV="s4"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
|
|
|
|
|
|
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
|
|
|
|
|
|
--max-requests 200 --max-requests-jitter 50 \
|
|
|
|
|
|
> "$LOGFILE" 2>&1 &
|
|
|
|
|
|
|
|
|
|
|
|
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
|
|
|
|
|
|
|
|
|
|
|
|
echo "$(now) 服务 $PORT 已重新启动"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 主循环(两个服务)
|
|
|
|
|
|
for PORT in 8000 8001; do
|
|
|
|
|
|
echo "---------------------------"
|
|
|
|
|
|
echo "$(now) 检查端口 $PORT 的服务"
|
|
|
|
|
|
|
|
|
|
|
|
usage=$(get_memory_usage_mb "$PORT")
|
|
|
|
|
|
echo "$(now) 当前内存占用: ${usage}MB"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
|
|
|
|
|
|
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
|
|
|
|
|
|
|
|
|
|
|
|
# 调用杀进程脚本
|
|
|
|
|
|
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
|
|
|
|
|
|
|
|
|
|
|
|
sleep 2
|
|
|
|
|
|
|
|
|
|
|
|
# 重启服务
|
|
|
|
|
|
start_service "$PORT"
|
|
|
|
|
|
|
|
|
|
|
|
# 飞书告警
|
|
|
|
|
|
APP_ENV=${SERVICE_ENVS[$PORT]}
|
|
|
|
|
|
sh /data/sh/feishu_notify.sh \
|
|
|
|
|
|
"Python 内存告警" \
|
|
|
|
|
|
"$(hostname)" \
|
|
|
|
|
|
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
|
|
|
|
|
|
"warning" \
|
|
|
|
|
|
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
|
|
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
echo "$(now) 内存正常,无需处理。"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
echo "$(now) 检查结束"
|