From c0b6381910bac351ebd52ad2d3250d932fb94ced Mon Sep 17 00:00:00 2001 From: dxin Date: Wed, 26 Nov 2025 14:11:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0sh=E5=86=85=E5=AD=98=E6=A3=80?= =?UTF-8?q?=E6=9F=A5python=E7=9A=84=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nginx/nexus.deeplink.media.conf | 31 ++++++ sh/crontab -e | 4 + sh/kill_lessie_sourcing_agents.sh | 23 ++++ sh/start_lessie_sourcing_agents.sh | 16 +++ .../check_memory_and_restart.sh | 105 ++++++++++++++++++ sh/weblessie-server-01/feishu_notify.sh | 50 +++++++++ .../kill_lessie_sourcing_agents.sh | 23 ++++ .../check_memory_and_restart.sh | 105 ++++++++++++++++++ sh/weblessie-server-02/feishu_notify.sh | 50 +++++++++ .../kill_lessie_sourcing_agents.sh | 23 ++++ 10 files changed, 430 insertions(+) create mode 100644 nginx/nexus.deeplink.media.conf create mode 100644 sh/crontab -e create mode 100644 sh/kill_lessie_sourcing_agents.sh create mode 100644 sh/start_lessie_sourcing_agents.sh create mode 100644 sh/weblessie-server-01/check_memory_and_restart.sh create mode 100644 sh/weblessie-server-01/feishu_notify.sh create mode 100644 sh/weblessie-server-01/kill_lessie_sourcing_agents.sh create mode 100644 sh/weblessie-server-02/check_memory_and_restart.sh create mode 100644 sh/weblessie-server-02/feishu_notify.sh create mode 100644 sh/weblessie-server-02/kill_lessie_sourcing_agents.sh diff --git a/nginx/nexus.deeplink.media.conf b/nginx/nexus.deeplink.media.conf new file mode 100644 index 0000000..5623db7 --- /dev/null +++ b/nginx/nexus.deeplink.media.conf @@ -0,0 +1,31 @@ +server { + listen 80; + server_name nexus.deeplink.media; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name nexus.deeplink.media; + + ssl_certificate /data/tengine/conf/certificate/nexus.deeplink.media_bundle.crt; + ssl_certificate_key /data/tengine/conf/certificate/nexus.deeplink.media.key; + + ssl_protocols TLSv1.2 TLSv1.3; + + access_log /data/tengine/logs/nexus_access.log; + error_log /data/tengine/logs/nexus_error.log; + + location / { + proxy_pass http://127.0.0.1:8081; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_request_buffering off; + + client_max_body_size 2G; + } +} diff --git a/sh/crontab -e b/sh/crontab -e new file mode 100644 index 0000000..34e072b --- /dev/null +++ b/sh/crontab -e @@ -0,0 +1,4 @@ +crontab -e +* * * * * /bin/bash /data/sh/check_memory_and_restart.sh >> /data/sh/logs/agents_memcheck.log 2>&1 + + diff --git a/sh/kill_lessie_sourcing_agents.sh b/sh/kill_lessie_sourcing_agents.sh new file mode 100644 index 0000000..dfbed37 --- /dev/null +++ b/sh/kill_lessie_sourcing_agents.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# 从参数获取端口号 +PORT=$1 + +# 判断是否传入参数 +if [ -z "$PORT" ]; then + echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000" + exit 1 +fi + +# 查找占用端口的进程 PID +# PID=$(lsof -t -i:$PORT) +PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u) + +if [ -n "$PID" ]; then + echo "发现端口 $PORT 的进程,PID=$PID" + echo "正在关闭进程..." + kill -9 $PID + echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。" +else + echo "端口 $PORT 没有正在运行的进程。" +fi diff --git a/sh/start_lessie_sourcing_agents.sh b/sh/start_lessie_sourcing_agents.sh new file mode 100644 index 0000000..279515b --- /dev/null +++ b/sh/start_lessie_sourcing_agents.sh @@ -0,0 +1,16 @@ +cd /data/webapps/lessie_sourcing_agents_s5 +uv sync +source /data/webapps/lessie_sourcing_agents_s5/.venv/bin/activate +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +LOGFILE="/data/webapps/lessie_sourcing_agents_s5/logs/lessie_sourcing_agents_${TIMESTAMP}.log" +nohup env APP_ENV=s5 gunicorn -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 --timeout 300 dialogue.app:app --max-requests 200 --max-requests-jitter 50 > "$LOGFILE" 2>&1 & +ln -sf "$LOGFILE" /data/webapps/lessie_sourcing_agents_s5/logs/lessie_sourcing_agents_latest.log + + +cd /data/webapps/lessie_sourcing_agents_s6 +uv sync +source /data/webapps/lessie_sourcing_agents_s6/.venv/bin/activate +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +LOGFILE="/data/webapps/lessie_sourcing_agents_s6/logs/lessie_sourcing_agents_${TIMESTAMP}.log" +nohup env APP_ENV=s6 gunicorn -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8001 --timeout 300 dialogue.app:app --max-requests 200 --max-requests-jitter 50 > "$LOGFILE" 2>&1 & +ln -sf "$LOGFILE" /data/webapps/lessie_sourcing_agents_s6/logs/lessie_sourcing_agents_latest.log \ No newline at end of file diff --git a/sh/weblessie-server-01/check_memory_and_restart.sh b/sh/weblessie-server-01/check_memory_and_restart.sh new file mode 100644 index 0000000..594b15f --- /dev/null +++ b/sh/weblessie-server-01/check_memory_and_restart.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +# 阈值:10G 单位 MB +THRESHOLD_MB=10240 + +# 两个服务配置 +declare -A SERVICE_DIRS=( + ["8000"]="/data/webapps/lessie_sourcing_agents" + ["8001"]="/data/webapps/lessie_sourcing_agents_s4" +) + +declare -A SERVICE_ENVS=( + ["8000"]="s1" + ["8001"]="s4" +) + +# 获取当前时间 +now() { + date +"%Y-%m-%d %H:%M:%S" +} + +# 检查端口对应所有进程的内存(RSS 和 VMS 都行,这里用 RES 实际内存占用) +get_memory_usage_mb() { + PORT=$1 + # 获取所有 PID + PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u) + + if [ -z "$PIDS" ]; then + echo 0 + return + fi + + # 总内存 MB + total=0 + for pid in $PIDS; do + mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB + [ -n "$mem" ] && total=$((total + mem)) + done + + echo $(( total / 1024 )) +} + +# 启动服务 +start_service() { + PORT=$1 + APP_DIR=${SERVICE_DIRS[$PORT]} + echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..." + + cd "$APP_DIR" || exit 1 + + source "$APP_DIR/.venv/bin/activate" + + TIMESTAMP=$(date +"%Y%m%d_%H%M%S") + LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log" + + if [ "$PORT" = "8000" ]; then + APP_ENV="s1" + else + APP_ENV="s4" + fi + + nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \ + -b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \ + --max-requests 200 --max-requests-jitter 50 \ + > "$LOGFILE" 2>&1 & + + ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log" + + echo "$(now) 服务 $PORT 已重新启动" +} + +# 主循环(两个服务) +for PORT in 8000 8001; do + echo "---------------------------" + echo "$(now) 检查端口 $PORT 的服务" + + usage=$(get_memory_usage_mb "$PORT") + echo "$(now) 当前内存占用: ${usage}MB" + + if [ "$usage" -gt "$THRESHOLD_MB" ]; then + echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启" + + # 调用杀进程脚本 + sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT" + + sleep 2 + + # 重启服务 + start_service "$PORT" + + # 飞书告警 + APP_ENV=${SERVICE_ENVS[$PORT]} + sh /data/sh/feishu_notify.sh \ + "Python 内存告警" \ + "$(hostname)" \ + "(${APP_ENV})lessie_sourcing_agents(${PORT})" \ + "warning" \ + "**内存占用**: ${usage}MB\n已自动 kill 并重启。" + + else + echo "$(now) 内存正常,无需处理。" + fi +done + +echo "$(now) 检查结束" diff --git a/sh/weblessie-server-01/feishu_notify.sh b/sh/weblessie-server-01/feishu_notify.sh new file mode 100644 index 0000000..4fa9781 --- /dev/null +++ b/sh/weblessie-server-01/feishu_notify.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# 使用方法: +# sh feishu_notify.sh "" "<host>" "<program>" "<level>" "<detail>" + +WEBHOOK="https://open.feishu.cn/open-apis/bot/v2/hook/c14d9964-3b5e-402a-866e-42768aa45e5e" + +TITLE="$1" # 标题 +HOST="$2" # 主机 +PROGRAM="$3" # 程序 +LEVEL="$4" # 级别,飞书卡片 header 颜色(info / warning / danger) +DETAIL="$5" # 详情内容(Markdown) + +TIME=$(date +"%Y-%m-%d %H:%M:%S") + +curl -s -X POST \ + -H "Content-Type: application/json" \ + -d "{ + \"msg_type\": \"interactive\", + \"card\": { + \"header\": { + \"template\": \"${LEVEL}\", + \"title\": { + \"content\": \"${TITLE}\", + \"tag\": \"plain_text\" + } + }, + \"elements\": [ + { + \"tag\": \"div\", + \"text\": { + \"tag\": \"lark_md\", + \"content\": \"**主机:** ${HOST}\n**程序:** ${PROGRAM}\n**级别:** ${LEVEL}\n**时间:** ${TIME}\n\n${DETAIL}\" + } + } + ] + } + }" \ + "$WEBHOOK" >/dev/null 2>&1 + + + + +# 调用示例 +# sh /data/sh/feishu_notify.sh \ +# "⚠️ Python 内存告警" \ +# "$(hostname)" \ +# "lessie_sourcing_agents_s5(8000)" \ +# "danger" \ +# "**内存占用**: ${usage}MB\n已自动 kill 并重启。" \ No newline at end of file diff --git a/sh/weblessie-server-01/kill_lessie_sourcing_agents.sh b/sh/weblessie-server-01/kill_lessie_sourcing_agents.sh new file mode 100644 index 0000000..dfbed37 --- /dev/null +++ b/sh/weblessie-server-01/kill_lessie_sourcing_agents.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# 从参数获取端口号 +PORT=$1 + +# 判断是否传入参数 +if [ -z "$PORT" ]; then + echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000" + exit 1 +fi + +# 查找占用端口的进程 PID +# PID=$(lsof -t -i:$PORT) +PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u) + +if [ -n "$PID" ]; then + echo "发现端口 $PORT 的进程,PID=$PID" + echo "正在关闭进程..." + kill -9 $PID + echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。" +else + echo "端口 $PORT 没有正在运行的进程。" +fi diff --git a/sh/weblessie-server-02/check_memory_and_restart.sh b/sh/weblessie-server-02/check_memory_and_restart.sh new file mode 100644 index 0000000..c2dd030 --- /dev/null +++ b/sh/weblessie-server-02/check_memory_and_restart.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +# 阈值:10G 单位 MB +THRESHOLD_MB=10240 + +# 两个服务配置 +declare -A SERVICE_DIRS=( + ["8000"]="/data/webapps/lessie_sourcing_agents_s5" + ["8001"]="/data/webapps/lessie_sourcing_agents_s6" +) + +declare -A SERVICE_ENVS=( + ["8000"]="s5" + ["8001"]="s6" +) + +# 获取当前时间 +now() { + date +"%Y-%m-%d %H:%M:%S" +} + +# 检查端口对应所有进程的内存(RSS 和 VMS 都行,这里用 RES 实际内存占用) +get_memory_usage_mb() { + PORT=$1 + # 获取所有 PID + PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u) + + if [ -z "$PIDS" ]; then + echo 0 + return + fi + + # 总内存 MB + total=0 + for pid in $PIDS; do + mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB + [ -n "$mem" ] && total=$((total + mem)) + done + + echo $(( total / 1024 )) +} + +# 启动服务 +start_service() { + PORT=$1 + APP_DIR=${SERVICE_DIRS[$PORT]} + echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..." + + cd "$APP_DIR" || exit 1 + + source "$APP_DIR/.venv/bin/activate" + + TIMESTAMP=$(date +"%Y%m%d_%H%M%S") + LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log" + + if [ "$PORT" = "8000" ]; then + APP_ENV="s5" + else + APP_ENV="s6" + fi + + nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \ + -b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \ + --max-requests 200 --max-requests-jitter 50 \ + > "$LOGFILE" 2>&1 & + + ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log" + + echo "$(now) 服务 $PORT 已重新启动" +} + +# 主循环(两个服务) +for PORT in 8000 8001; do + echo "---------------------------" + echo "$(now) 检查端口 $PORT 的服务" + + usage=$(get_memory_usage_mb "$PORT") + echo "$(now) 当前内存占用: ${usage}MB" + + if [ "$usage" -gt "$THRESHOLD_MB" ]; then + echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启" + + # 调用杀进程脚本 + sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT" + + sleep 2 + + # 重启服务 + start_service "$PORT" + + # 飞书告警 + APP_ENV=${SERVICE_ENVS[$PORT]} + sh /data/sh/feishu_notify.sh \ + "Python 内存告警" \ + "$(hostname)" \ + "(${APP_ENV})lessie_sourcing_agents(${PORT})" \ + "warning" \ + "**内存占用**: ${usage}MB\n已自动 kill 并重启。" + + else + echo "$(now) 内存正常,无需处理。" + fi +done + +echo "$(now) 检查结束" diff --git a/sh/weblessie-server-02/feishu_notify.sh b/sh/weblessie-server-02/feishu_notify.sh new file mode 100644 index 0000000..4fa9781 --- /dev/null +++ b/sh/weblessie-server-02/feishu_notify.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# 使用方法: +# sh feishu_notify.sh "<title>" "<host>" "<program>" "<level>" "<detail>" + +WEBHOOK="https://open.feishu.cn/open-apis/bot/v2/hook/c14d9964-3b5e-402a-866e-42768aa45e5e" + +TITLE="$1" # 标题 +HOST="$2" # 主机 +PROGRAM="$3" # 程序 +LEVEL="$4" # 级别,飞书卡片 header 颜色(info / warning / danger) +DETAIL="$5" # 详情内容(Markdown) + +TIME=$(date +"%Y-%m-%d %H:%M:%S") + +curl -s -X POST \ + -H "Content-Type: application/json" \ + -d "{ + \"msg_type\": \"interactive\", + \"card\": { + \"header\": { + \"template\": \"${LEVEL}\", + \"title\": { + \"content\": \"${TITLE}\", + \"tag\": \"plain_text\" + } + }, + \"elements\": [ + { + \"tag\": \"div\", + \"text\": { + \"tag\": \"lark_md\", + \"content\": \"**主机:** ${HOST}\n**程序:** ${PROGRAM}\n**级别:** ${LEVEL}\n**时间:** ${TIME}\n\n${DETAIL}\" + } + } + ] + } + }" \ + "$WEBHOOK" >/dev/null 2>&1 + + + + +# 调用示例 +# sh /data/sh/feishu_notify.sh \ +# "⚠️ Python 内存告警" \ +# "$(hostname)" \ +# "lessie_sourcing_agents_s5(8000)" \ +# "danger" \ +# "**内存占用**: ${usage}MB\n已自动 kill 并重启。" \ No newline at end of file diff --git a/sh/weblessie-server-02/kill_lessie_sourcing_agents.sh b/sh/weblessie-server-02/kill_lessie_sourcing_agents.sh new file mode 100644 index 0000000..dfbed37 --- /dev/null +++ b/sh/weblessie-server-02/kill_lessie_sourcing_agents.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# 从参数获取端口号 +PORT=$1 + +# 判断是否传入参数 +if [ -z "$PORT" ]; then + echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000" + exit 1 +fi + +# 查找占用端口的进程 PID +# PID=$(lsof -t -i:$PORT) +PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u) + +if [ -n "$PID" ]; then + echo "发现端口 $PORT 的进程,PID=$PID" + echo "正在关闭进程..." + kill -9 $PID + echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。" +else + echo "端口 $PORT 没有正在运行的进程。" +fi