Compare commits

...

11 Commits

Author SHA1 Message Date
bd2656037b 增加 2025-12-12 00:20:13 +08:00
dxin
6f7b24926d 更改filebast配置 2025-12-11 15:17:03 +08:00
dxin
7ad9956c5d 更改filebast 2025-12-11 11:11:16 +08:00
8288aad918 新增lessie-react 2025-12-02 23:16:38 +08:00
dxin
d42ab824bc Merge branch 'main' of http://8.134.11.35:3000/dxin/Work-configuration-file 2025-12-02 14:22:19 +08:00
dxin
a6aa4c6b72 新增nodejs-email 流水线脚本 2025-12-02 14:22:15 +08:00
b07b715392 增加 2025-12-01 22:11:40 +08:00
dxin
dab9078477 + 2025-11-29 19:11:29 +08:00
dxin
ff383ea31e 新增对python内存告警发送到飞书 2025-11-26 18:01:58 +08:00
dxin
313c1a27df Merge branch 'main' of http://8.134.11.35:3000/dxin/Work-configuration-file 2025-11-26 14:11:35 +08:00
dxin
c0b6381910 增加sh内存检查python的脚本 2025-11-26 14:11:03 +08:00
37 changed files with 1486 additions and 120 deletions

30
1.yml
View File

@@ -240,36 +240,6 @@ docker run -d -p 8080:8080 --network 1panel-network --name praeco \
-e ELASTALERT_HOST=http://192.168.60.21:3030 \ -e ELASTALERT_HOST=http://192.168.60.21:3030 \
johnsusek/praeco johnsusek/praeco
echo "slack_webhook_url: 'https://open.feishu.cn/open-apis/bot/v2/hook/8bd6a15d-90f0-4f4f-a1b1-bd105f31ea06'" | sudo tee -a rules/BaseRule.config >/dev/null
export PRAECO_ELASTICSEARCH=192.168.1.7
{
"msg_type": "interactive",
"card": {
"header": {
"title": {
"content": "[ INFINI Platform Alerting ]",
"tag": "plain_text"
},
"template":"{{if eq .priority "critical"}}red{{else if eq .priority "high"}}orange{{else if eq .priority "medium"}}yellow{{else if eq .priority "low"}}grey{{else}}blue{{end}}"
},
"elements": [
{
"tag": "markdown",
"content": "🔥 告警事件 [#{{.event_id}}]({{$.env.INFINI_CONSOLE_ENDPOINT}}/#/alerting/message/{{.event_id}}) 正在进行中\n **{{.title}}**\n 优先级: {{.priority}}\n 事件ID: {{.event_id}}\n 目标: {{.resource_name}}-{{.objects}}\n 触发时间: {{.trigger_at | datetime}}"
},
{
"tag": "hr"
},
{
"tag": "markdown",
"content": "**具体错误行内容**: {{ if. hits.hits.0._source.message }}{{.hits.hits.0._source.message }}{{ else }}{{.message | str_replace \"\\n\" \"\\\\n\" }}{{ end }}\n **触发 error 的时间**: {{.trigger_at | datetime }}"
}
]
}
}

143
ES/单节点/安装es.conf Normal file
View File

@@ -0,0 +1,143 @@
# 前置 & 准备工作
sudo dnf update -y
sudo dnf install -y nano wget curl unzip
# 安全组防火墙开放9200端口、5601端口
# 安装 Elasticsearch 9.2.2
# 导入官方 GPG key
sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch
# 新建 yum repo 文件
sudo tee /etc/yum.repos.d/elasticsearch.repo <<-'EOF'
[elasticsearch]
name=Elasticsearch repository for 9.x packages
baseurl=https://artifacts.elastic.co/packages/9.x/yum
gpgcheck=1
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch
enabled=1
autorefresh=1
type=rpm-md
EOF
# 安装 Elasticsearch
sudo dnf install elasticsearch --enablerepo=elasticsearch
# 先不管直接启动、报错再查看日志,有可能是权限问题
sudo systemctl daemon-reload
sudo systemctl enable elasticsearch
sudo systemctl start elasticsearch
sudo systemctl status elasticsearch
sudo journalctl -u elasticsearch -f
# 手动创建日志目录 + 设置权限
sudo mkdir -p /usr/share/elasticsearch/logs
sudo chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/logs
sudo chmod 750 /usr/share/elasticsearch/logs
# 设置 elastic 超级用户密码 (推荐立即设定)
sudo /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
# 查看自签名证书,有则正常
ll /etc/elasticsearch/certs/
# 查看 HTTP CA 证书指纹(用于其他客户端配置)
sudo openssl x509 -fingerprint -sha256 -in /etc/elasticsearch/certs/http_ca.crt -noout
# 设置环境变量(替换为你的实际密码)
export ELASTIC_PASSWORD='MyElastic123!'
# 测试 HTTPS 请求(必须用 --cacert因启用了 TLS
curl --cacert /etc/elasticsearch/certs/http_ca.crt \
-u elastic:$ELASTIC_PASSWORD \
https://localhost:9200
# 查看默认的配置文件
grep -v '^\s*#\|^\s*$' /etc/elasticsearch/elasticsearch.yml
# 按实际情况修改配置文件集群名、非本地访问等
cluster.name: my-test-es
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
network.host: 0.0.0.0
xpack.security.enabled: true
xpack.security.enrollment.enabled: true
xpack.security.http.ssl:
enabled: true
keystore.path: certs/http.p12
xpack.security.transport.ssl:
enabled: true
verification_mode: certificate
keystore.path: certs/transport.p12
truststore.path: certs/transport.p12
cluster.initial_master_nodes: ["weblessie-server-02"]
http.host: 0.0.0.0
# 更改es的jvm大小
vim /etc/elasticsearch/jvm.options
-Xms4g
-Xmx4g
# 重启
sudo systemctl restart elasticsearch
# 准备token后续在Kibana中使用
sudo /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
# 准备安装 Kibana 9.2.2
# 新建 repo /etc/yum.repos.d/kibana.repo
sudo tee /etc/yum.repos.d/kibana.repo <<-'EOF'
[kibana]
name=Kibana repository for 9.x packages
baseurl=https://artifacts.elastic.co/packages/9.x/yum
gpgcheck=1
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch
enabled=1
autorefresh=1
type=rpm-md
EOF
# 安装 Kibana
sudo dnf install kibana --enablerepo=kibana
# 启动
sudo systemctl daemon-reload
sudo systemctl enable --now kibana
# 访问 Kibana输入生成的token
http://ip:5601
# 获取 “verification code”
/usr/share/kibana/bin/kibana-verification-code
# 使用官方工具生成加密密钥(最规范)
sudo /usr/share/kibana/bin/kibana-encryption-keys generate --force
# 输出应类似:
# ✔ Encryption keys generated and written to /etc/kibana/kibana.yml:
# xpack.encryptedSavedObjects.encryptionKey
# xpack.reporting.encryptionKey
# xpack.security.encryptionKey
# 修改配置文件
grep -v '^\s*#\|^\s*$' /etc/kibana/kibana.yml
server.host: "0.0.0.0"
logging:
appenders:
file:
type: file
fileName: /var/log/kibana/kibana.log
layout:
type: json
root:
appenders:
- default
- file
pid.file: /run/kibana/kibana.pid
i18n.locale: "zh-CN"
elasticsearch.hosts: [https://10.0.0.38:9200]
elasticsearch.serviceAccountToken: AAEAAWVsYXN0aWMva2liYW5hL2Vucm9sbC1wcm9jZXNzLXRva2VuLTE3NjUzNDE4OTI3MjY6Um9KdUo2N1hSZVNPeGNzOXFDaUh2dw
elasticsearch.ssl.certificateAuthorities: [/var/lib/kibana/ca_1765341893683.crt]
xpack.fleet.outputs: [{id: fleet-default-output, name: default, is_default: true, is_default_monitoring: true, type: elasticsearch, hosts: [https://10.0.0.38:9200], ca_trusted_fingerprint: 80af64db043e12ebda11c10f70042af91306a705fdcb6285814a84b420c734a5}]
xpack.encryptedSavedObjects.encryptionKey: f10166c761265d5ca61e7fa2c1acac73
xpack.reporting.encryptionKey: 1772a5152522675d5a38470e905b2817
xpack.security.encryptionKey: d4b30e82e47f530a998e29cb0b8e5295

View File

@@ -0,0 +1,41 @@
# 获取ES 的证书指纹
sudo openssl x509 -fingerprint -sha256 -in /etc/elasticsearch/certs/http_ca.crt -noout
sha256 Fingerprint=80:AF:64:DB:04:3E:12:EB:DA:11:C1:0F:70:04:2A:F9:13:06:A7:05:FD:CB:62:85:81:4A:84:B4:20:C7:34:A5
# kibana web创建的用户
admin
G7ZSKFM4AQwHQpwA
# Filebeat
output.elasticsearch:
hosts: ["https://49.51.33.153:9200"]
username: "elastic"
password: "-0NiIBOJGn2CATuPWzNc"
# 用指纹验证(代替证书文件)
ssl.verification_mode: "certificate"
ssl.certificate_authorities: [] # 留空(不校验完整链)
ssl.supported_protocols: [TLSv1.2, TLSv1.3]
# 关键:指定 CA 指纹(必须全大写,无 0x带冒号
ssl.ca_trusted_fingerprint: "80AF64DB043E12EBDA11C10F70042AF91306A705FD2CB6285814A84B420C734A5"
# python
from elasticsearch import Elasticsearch
es = Elasticsearch(
hosts=["https://49.51.33.153:9200"],
basic_auth=("elastic", "-0NiIBOJGn2CATuPWzNc"),
# 指纹必须去掉冒号,全大写
ssl_assert_fingerprint="80AF64DB043E12EBDA11C10F70042AF91306A705FD2CB6285814A84B420C734A5",
verify_certs=True # 必须为 True
)
print(es.info())

View File

@@ -37,6 +37,7 @@ processors:
ignore_missing: true ignore_missing: true
overwrite_keys: true overwrite_keys: true
- dissect: - dissect:
when: when:
equals: equals:
@@ -47,6 +48,7 @@ processors:
ignore_missing: true ignore_missing: true
overwrite_keys: true overwrite_keys: true
- dissect: - dissect:
when: when:
equals: equals:
@@ -60,7 +62,7 @@ processors:
#输出 #输出
output.elasticsearch: output.elasticsearch:
hosts: ["http://192.168.60.21:9200"] hosts: ["http://192.168.70.16:9200"]
username: "admin" username: "admin"
password: "123456" password: "123456"
index: "%{[environment]}-%{[application]}-%{+yyyy.MM.dd}" # 按天分割索引 index: "%{[environment]}-%{[application]}-%{+yyyy.MM.dd}" # 按天分割索引

View File

@@ -1,13 +1,13 @@
- type: log - type: log
id: sit_flymoon-payment id: sit_lymoon-payment
enabled: true enabled: true
paths: paths:
- /root/logs/flymoon-payment/sys-info.log - /root/logs/flymoon-payment/app.log
fields: fields:
application: flymoon-payment application: flymoon-payment
log_type: payment.log log_type: payment.log
environment: sit environment: sit
instance: sit-server instance: sit
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}'
multiline.negate: true multiline.negate: true
@@ -18,4 +18,3 @@
close_inactive: 5m # 文件超过5分钟无更新则关闭 close_inactive: 5m # 文件超过5分钟无更新则关闭
close_renamed: true # 处理被重命名的文件 close_renamed: true # 处理被重命名的文件
start_position: beginning # 从文件的开头读取 start_position: beginning # 从文件的开头读取

View File

@@ -2,12 +2,12 @@
id: sit_flymoon-admin id: sit_flymoon-admin
enabled: true enabled: true
paths: paths:
- /root/logs/flymoon-admin/sys-info.log - /root/logs/flymoon-admin/app.log
fields: fields:
application: flymoon-admin # 自定义字段,标识应用名称 application: flymoon-admin # 自定义字段,标识应用名称
log_type: admin.log # 自定义字段,标识日志类型 log_type: admin.log # 自定义字段,标识日志类型
environment: sit # 自定义字段,标识机器环境名称 environment: sit # 自定义字段,标识机器环境名称
instance: sit-server # 自定义字段,标识机器名称 instance: sit # 自定义字段,标识机器名称
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对info的日志格式 multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对info的日志格式
multiline.negate: true multiline.negate: true

View File

@@ -2,12 +2,12 @@
id: sit_flymoon-agent id: sit_flymoon-agent
enabled: true enabled: true
paths: paths:
- /root/logs/flymoon-agent/sys-info.log - /root/logs/flymoon-agent/app.log
fields: fields:
application: flymoon-agent # 自定义字段,标识应用名称 application: flymoon-agent # 自定义字段,标识应用名称
log_type: agent.log # 自定义字段,标识日志类型 log_type: agent.log # 自定义字段,标识日志类型
environment: sit # 自定义字段,标识机器环境名称 environment: sit # 自定义字段,标识机器环境名称
instance: sit-server # 自定义字段,标识机器名称 instance: sit # 自定义字段,标识机器名称
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对email的sys-info.log的日志格式多行 multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对email的sys-info.log的日志格式多行
multiline.negate: true multiline.negate: true
@@ -18,6 +18,3 @@
close_inactive: 5m # 文件超过5分钟无更新则关闭 close_inactive: 5m # 文件超过5分钟无更新则关闭
close_renamed: true # 处理被重命名的文件 close_renamed: true # 处理被重命名的文件
start_position: beginning # 从文件的开头读取 start_position: beginning # 从文件的开头读取

View File

@@ -18,11 +18,11 @@ processors:
when: when:
equals: equals:
log_type: admin.log log_type: admin.log
tokenizer: '%{timestamp} [%{thread}] %{log_level} %{log_message}' tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
field: "message" field: "message"
target_prefix: "parsed_sys_info" target_prefix: "mylog"
ignore_missing: true ignore_missing: true
overwrite_keys: false overwrite_keys: true

View File

@@ -2,14 +2,14 @@
id: us_pord_01_flymoon-admin id: us_pord_01_flymoon-admin
enabled: true enabled: true
paths: paths:
- /root/logs/flymoon-admin/sys-info.log - /root/logs/flymoon-admin/app.log
fields: fields:
application: flymoon-admin # 自定义字段,标识应用名称 application: flymoon-admin # 自定义字段,标识应用名称
log_type: admin.log # 自定义字段,标识日志类型 log_type: admin.log # 自定义字段,标识日志类型
environment: us-pord # 自定义字段,标识机器环境名称 environment: us-pord # 自定义字段,标识机器环境名称
instance: us-prod-01 # 自定义字段,标识机器名称 instance: us-prod-01 # 自定义字段,标识机器名称
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对info的日志格式 multiline.pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
multiline.negate: true multiline.negate: true
multiline.match: after multiline.match: after
ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志) ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志)

View File

@@ -27,7 +27,7 @@ processors:
when: when:
equals: equals:
log_type: email.log log_type: email.log
tokenizer: '%{timestamp} [%{thread}] %{level} %{class} - [%{method_line}] - %{message}' tokenizer: '%{timestamp} %{level} %{pid} --- \\[%{thread}\\] %{message}'
field: "message" field: "message"
target_prefix: "mylog" target_prefix: "mylog"
ignore_missing: true ignore_missing: true
@@ -37,7 +37,7 @@ processors:
when: when:
equals: equals:
log_type: agent.log log_type: agent.log
tokenizer: '%{timestamp} %{level} - [%{method},%{line}] - %{message}' tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
field: "message" field: "message"
target_prefix: "mylog" target_prefix: "mylog"
ignore_missing: true ignore_missing: true
@@ -45,6 +45,7 @@ processors:
#输出 #输出
output.elasticsearch: output.elasticsearch:
hosts: ["http://106.53.194.199:9200"] hosts: ["http://106.53.194.199:9200"]

View File

@@ -9,7 +9,7 @@
environment: us-pord # 自定义字段,标识机器环境名称 environment: us-pord # 自定义字段,标识机器环境名称
instance: us-prod-02 # 自定义字段,标识机器名称 instance: us-prod-02 # 自定义字段,标识机器名称
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对email的sys-info.log的日志格式多行 multiline.pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
multiline.negate: true multiline.negate: true
multiline.match: after multiline.match: after
ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志) ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志)

View File

@@ -20,4 +20,3 @@
start_position: beginning # 从文件的开头读取 start_position: beginning # 从文件的开头读取

View File

@@ -25,18 +25,20 @@ processors:
- dissect: - dissect:
when: when:
equals: equals:
log_type: payment.log log_type: agent.log
tokenizer: '%{timestamp} [%{thread}] %{level} %{class} - [%{method},%{line}] - %{message}' tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
field: "message" field: "message"
target_prefix: "mylog" target_prefix: "mylog"
ignore_missing: true ignore_missing: true
overwrite_keys: true overwrite_keys: true
- dissect: - dissect:
when: when:
equals: equals:
log_type: agent.log log_type: payment.log
tokenizer: '%{timestamp} %{level} - [%{method},%{line}] - %{message}' tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
field: "message" field: "message"
target_prefix: "mylog" target_prefix: "mylog"
ignore_missing: true ignore_missing: true
@@ -44,6 +46,7 @@ processors:
#输出 #输出
output.elasticsearch: output.elasticsearch:
hosts: ["http://106.53.194.199:9200"] hosts: ["http://106.53.194.199:9200"]

View File

@@ -9,7 +9,7 @@
environment: us-pord # 自定义字段,标识机器环境名称 environment: us-pord # 自定义字段,标识机器环境名称
instance: us-prod-03 # 自定义字段,标识机器名称 instance: us-prod-03 # 自定义字段,标识机器名称
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' # 针对email的sys-info.log的日志格式多行 multiline.pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
multiline.negate: true multiline.negate: true
multiline.match: after multiline.match: after
ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志) ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志)

View File

@@ -2,14 +2,14 @@
id: us_pord_03_flymoon-payment id: us_pord_03_flymoon-payment
enabled: true enabled: true
paths: paths:
- /root/logs/flymoon-payment/sys-info.log - /root/logs/flymoon-payment/app.log
fields: fields:
application: flymoon-payment application: flymoon-payment
log_type: payment.log log_type: payment.log
environment: us-pord environment: us-pord
instance: us-prod-03 instance: us-prod-03
fields_under_root: true fields_under_root: true
multiline.pattern: '^\d{2}:\d{2}:\d{2}\.\d{3}' multiline.pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
multiline.negate: true multiline.negate: true
multiline.match: after multiline.match: after
ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志) ignore_older: 24h # 忽略旧日志文件(避免处理已归档的日志)

View File

@@ -0,0 +1,66 @@
pipeline {
agent any
parameters {
gitParameter(
branchFilter: 'origin/(.*)',
defaultValue: 'release',
name: 'GIT_BRANCH',
type: 'PT_BRANCH_TAG',
selectedValue: 'DEFAULT',
sortMode: 'NONE',
description: '选择代码分支: ',
quickFilterEnabled: true,
tagFilter: '*',
listSize: "5"
)
}
environment {
REMOTE_HOST = '192.168.70.15'
REMOTE_PROJECT_PATH = '/data/webapps/lessie-react'
}
stages {
stage('Checkout 代码') {
steps {
git branch: "${params.GIT_BRANCH}", credentialsId: 'fly_gitlab_auth', url: 'http://172.24.16.20/web/lessie-react.git'
}
}
stage('同步') {
steps {
sh """
ssh ${REMOTE_HOST} 'mkdir -p ${REMOTE_PROJECT_PATH}'
rsync -avz --delete --exclude='node_modules' ${WORKSPACE}/ ${REMOTE_HOST}:${REMOTE_PROJECT_PATH}/
"""
}
}
stage('安装启动') {
steps {
sh """
ssh ${REMOTE_HOST} '
cd ${REMOTE_PROJECT_PATH} &&
pm2 delete lessie-react || true &&
pm2 list &&
nvm use 22.21.1 &&
npm install &&
npm run build &&
pm2 start ecosystem.config.cjs &&
pm2 save
'
"""
}
}
}
post {
success {
echo '部署成功'
}
failure {
echo '部署失败,请检查日志'
}
}
}

View File

@@ -0,0 +1,93 @@
pipeline {
agent any
parameters {
gitParameter(
branchFilter: 'origin/(.*)',
defaultValue: 'release',
name: 'GIT_BRANCH',
type: 'PT_BRANCH_TAG',
selectedValue: 'DEFAULT',
sortMode: 'NONE',
description: '选择代码分支: ',
quickFilterEnabled: true,
tagFilter: '*',
listSize: "5"
)
}
environment {
REMOTE_HOST = '43.130.56.138'
REMOTE_HOST_B = '43.153.21.64'
REMOTE_PROJECT_PATH = '/data/webapps/lessie-react'
}
stages {
stage('Checkout 代码') {
steps {
git branch: "${params.GIT_BRANCH}", credentialsId: 'fly_gitlab_auth', url: 'http://172.24.16.20/web/lessie-react.git'
}
}
stage('同步A') {
steps {
sh """
ssh ${REMOTE_HOST} 'mkdir -p ${REMOTE_PROJECT_PATH}'
rsync -avz --delete --exclude='node_modules' ${WORKSPACE}/ ${REMOTE_HOST}:${REMOTE_PROJECT_PATH}/
"""
}
}
stage('安装启动A') {
steps {
sh """
ssh ${REMOTE_HOST} '
cd ${REMOTE_PROJECT_PATH} &&
pm2 delete lessie-react || true &&
pm2 list &&
nvm use 22.21.1 &&
npm install &&
npm run build &&
pm2 start ecosystem.config.cjs &&
pm2 save
'
"""
}
}
stage('同步B') {
steps {
sh """
ssh ${REMOTE_HOST_B} 'mkdir -p ${REMOTE_PROJECT_PATH}'
rsync -avz --delete --exclude='node_modules' ${WORKSPACE}/ ${REMOTE_HOST_B}:${REMOTE_PROJECT_PATH}/
"""
}
}
stage('安装启动B') {
steps {
sh """
ssh ${REMOTE_HOST_B} '
cd ${REMOTE_PROJECT_PATH} &&
pm2 delete lessie-react || true &&
pm2 list &&
nvm use 22.21.1 &&
npm install &&
npm run build &&
pm2 start ecosystem.config.cjs &&
pm2 save
'
"""
}
}
}
post {
success {
echo '部署成功'
}
failure {
echo '部署失败,请检查日志'
}
}
}

View File

@@ -0,0 +1,86 @@
pipeline {
agent any
parameters {
gitParameter(
branchFilter: 'origin/(.*)',
defaultValue: 'main',
name: 'GIT_BRANCH',
type: 'PT_BRANCH_TAG',
selectedValue: 'DEFAULT',
sortMode: 'NONE',
description: '选择代码分支: ',
quickFilterEnabled: true,
tagFilter: '*',
listSize: "5"
)
}
environment {
REMOTE_HOST = '43.130.56.138'
REMOTE_PROJECT_PATH = '/data/webapps/nodejs-email'
}
stages {
stage('Checkout 代码') {
steps {
git branch: "${params.GIT_BRANCH}", credentialsId: 'fly_gitlab_auth', url: 'http://172.24.16.20/nodejs/nodejs-email.git'
}
}
stage('生成 .env 文件') {
steps {
sh """
cd ${WORKSPACE}
# 清空或创建文件
cat << 'EOF' > .env
GOOGLE_CLIENT_ID=184829050868-h21pf2mj6c51e1hveop30arbt5mqqr0v.apps.googleusercontent.com
GOOGLE_SECRET_ID=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
PORT=3005
DATABASE_URL="mysql://system:System8888!@10.0.0.10:3306/creator_contact_record?charset=utf8mb4&parseTime=true&loc=Local"
EOF
echo '查看 .env 文件内容'
cat .env
"""
}
}
stage('同步文件') {
steps {
sh """
ssh ${REMOTE_HOST} 'mkdir -p ${REMOTE_PROJECT_PATH}'
rsync -avz --delete --exclude='node_modules' ${WORKSPACE}/ ${REMOTE_HOST}:${REMOTE_PROJECT_PATH}/
"""
}
}
stage('安装 & 启动服务') {
steps {
sh """
ssh ${REMOTE_HOST} '
cd ${REMOTE_PROJECT_PATH} &&
pm2 delete index || true &&
pm2 list &&
nvm use 20.19.6 &&
npm install &&
npx prisma generate &&
pm2 start index.js &&
pm2 save
'
"""
}
}
}
post {
success {
echo '部署成功'
}
failure {
echo '部署失败,请检查日志'
}
}
}

View File

@@ -1,64 +1,90 @@
pipeline { pipeline {
agent any agent any
environment { environment {
DEPLOY_HOST = '49.51.46.148' // 目标机 IP 或域名 // CODE_BRANCH = "dev"
DEPLOY_DIR = '/data/tengine/html/lessie_official' // 目标机部署目录 LOCKHOST_IP = "192.168.70.15"
LOCKHOST_PROJECT_PATH = "/root/cdx/scalelink-frontend"
WEB_HOST_IP_1 = "43.130.56.138"
WEB_HOST_IP_2 = "43.153.21.64"
WEB_HOST_PROJECT_PATH = "/data/webapps/lessie_official_web"
} }
stages { stages {
stage('Checkout 代码') { stage('拉取代码') {
steps { steps {
git branch: 'dev', credentialsId: 'fly_gitlab_auth', url: 'http://172.24.16.20/web/scalelink-frontend.git' git branch: "${params.Code_branch}",
credentialsId: 'fly_gitlab_auth',
url: 'http://172.24.16.20/web/scalelink-frontend.git'
} }
} }
stage('Install & Build') {
stage('同步到本地机器') {
steps { steps {
sh """ sh """
cd ${WORKSPACE}/projects/lessie' && ssh ${LOCKHOST_IP} 'rm -rf ${LOCKHOST_PROJECT_PATH}/*' &&
npm install --frozen-lockfile && rsync -az ${WORKSPACE}/ ${LOCKHOST_IP}:${LOCKHOST_PROJECT_PATH}/
npm run build
""" """
} }
} }
stage('同步文件') {
stage('依赖和构建') {
steps { steps {
// 打包必要文件:.output、package.json、pnpm-lock.yaml
# sh '''
# rm -rf deploy.tar.gz
# tar czf deploy.tar.gz \
# .output package.json pnpm-lock.yaml
# '''
# archiveArtifacts artifacts: 'deploy.tar.gz'
}
}
stage('Deploy to Target') {
steps {
sshagent([SSH_CRED]) {
// 1) 传输压缩包
sh """ sh """
scp deploy.tar.gz ${DEPLOY_USER}@${DEPLOY_HOST}:/tmp/ ssh ${LOCKHOST_IP} "bash -lc \\"
cd ${LOCKHOST_PROJECT_PATH}/projects/lessie &&
pnpm install --force && pnpm build &&
tar -czf output.tar.gz .output
\\""
""" """
// 2) 解压、安装依赖、重启 }
}
stage('上传产物到机器1') {
steps {
sh """ sh """
ssh ${DEPLOY_USER}@${DEPLOY_HOST} ' ssh ${LOCKHOST_IP} "bash -lc \\"
mkdir -p ${DEPLOY_DIR} && scp -r ${LOCKHOST_PROJECT_PATH}/projects/lessie/output.tar.gz ${WEB_HOST_IP_2}:${WEB_HOST_PROJECT_PATH}/
tar xzf /tmp/deploy.tar.gz -C ${DEPLOY_DIR} && \\""
cd ${DEPLOY_DIR} && """
# 安装生产依赖 }
npm install --production && }
# 重启 pm2 服务
pm2 reload nuxt-app || pm2 start .output/server/index.mjs --name nuxt-app stage('启动机器1') {
steps {
sh """
ssh ${WEB_HOST_IP_2} '
cd ${WEB_HOST_PROJECT_PATH} && pm2 list &&
pm2 delete lessie-official-web --silent || true &&
tar -zxf output.tar.gz &&
pm2 start .output/server/index.mjs --name lessie-official-web --output ./nuxt-out.log --error ./nuxt-error.log
'
"""
}
}
stage('上传产物到机器2') {
steps {
sh """
ssh ${LOCKHOST_IP} "bash -lc \\"
scp -r ${LOCKHOST_PROJECT_PATH}/projects/lessie/output.tar.gz ${WEB_HOST_IP_1}:${WEB_HOST_PROJECT_PATH}/
\\""
"""
}
}
stage('启动机器2') {
steps {
sh """
ssh ${WEB_HOST_IP_1} '
bash &&
cd ${WEB_HOST_PROJECT_PATH} && pm2 list &&
pm2 delete lessie-official-web --silent || true &&
tar -zxf output.tar.gz &&
pm2 start .output/server/index.mjs --name lessie-official-web --output ./nuxt-out.log --error ./nuxt-error.log
' '
""" """
} }
} }
} }
}
post {
success {
echo '部署成功 🎉'
}
failure {
echo '部署失败,请检查日志 ❌'
}
}
} }

43
nginx/es.jennie.im.conf Normal file
View File

@@ -0,0 +1,43 @@
server {
listen 80;
server_name es.jennie.im;
# 强制跳转 HTTPS
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name es.jennie.im;
# 证书
ssl_certificate /data/tengine/conf/certificate/jennie.im.crt;
ssl_certificate_key /data/tengine/conf/certificate/jennie.im.key;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 推荐安全配置
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
access_log /data/tengine/logs/es_jennie_im_access.log;
error_log /data/tengine/logs/es_jennie_im_error.log;
location / {
proxy_pass https://10.0.0.38:9200; # ES 内网地址HTTPS
# 关闭后端证书校验(必须,否则 Nginx 不认 ES 自签证书)
proxy_ssl_verify off;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
# ES 大响应时需要提高 buffer
proxy_buffer_size 16k;
proxy_buffers 4 32k;
proxy_busy_buffers_size 64k;
}
}

View File

@@ -0,0 +1,71 @@
# 定义 Jenkins 后端服务器组(可选,便于负载均衡或健康检查)
upstream jenkins_backend {
server 127.0.0.1:8080; # Jenkins 主服务地址
keepalive 32; # 连接池,提高性能
}
# HTTP → HTTPS 重定向服务器
server {
listen 80;
server_name jenkins.deeplink.media;
# 可选:添加安全头,防止 CRLF 注入
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
return 301 https://$server_name$request_uri;
}
# HTTPS 服务器(核心配置)
server {
listen 443 ssl;
server_name jenkins.deeplink.media;
# SSL 证书配置
ssl_certificate /data/tengine/conf/certificate/jenkins.deeplink.media_bundle.crt;
ssl_certificate_key /data/tengine/conf/certificate/jenkins.deeplink.media.key;
# SSL 安全协议和加密套件(推荐)
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
ssl_prefer_server_ciphers off;
# SSL 会话缓存(可选,提升性能)
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# 访问和错误日志
access_log /data/tengine/logs/jenkins_access.log;
error_log /data/tengine/logs/jenkins_error.log;
# 核心代理配置块
location / {
proxy_pass http://jenkins_backend; # 指向 upstream
# --- 关键:传递请求头给 Jenkins让 Jenkins 知道原始请求信息 ---
proxy_set_header Host $host:$server_port; # 传递原始 Host避免 Jenkins 重定向到 8080
proxy_set_header X-Real-IP $remote_addr; # 传递真实客户端 IP
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; # 传递代理链 IP
proxy_set_header X-Forwarded-Proto $scheme; # 传递原始协议 (https)
proxy_set_header X-Forwarded-Port $server_port; # 传递原始端口 (443)
# --- 关键WebSocket 支持Jenkins 控制台输出、实时日志等需要) ---
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade; # 升级协议头
proxy_set_header Connection "upgrade"; # 连接类型头
# --- 性能与安全 ---
proxy_request_buffering off; # 关闭请求体缓冲,对大文件上传友好
client_max_body_size 2G; # 保持你设置的大文件上传限制
# --- 超时设置 ---
proxy_connect_timeout 60s; # 连接超时
proxy_send_timeout 60s; # 发送超时
proxy_read_timeout 60s; # 读取超时
# --- 可选:安全头 ---
# add_header X-Frame-Options "SAMEORIGIN" always; # 防止点击劫持
# add_header X-Content-Type-Options "nosniff" always; # 防止 MIME 类型嗅探
# add_header Referrer-Policy "strict-origin-when-cross-origin" always; # Referrer 策略
}
# 可选:如果 Jenkins 使用了 AJP 连接器,可能需要取消注释下行(通常不需要)
# proxy_pass_header Server;
}

View File

@@ -0,0 +1,43 @@
server {
listen 80;
server_name kibana.jennie.im;
# 强制跳转到 HTTPS
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name kibana.jennie.im;
# 公网 HTTPS 证书
ssl_certificate /data/tengine/conf/certificate/jennie.im.crt;
ssl_certificate_key /data/tengine/conf/certificate/jennie.im.key;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
access_log /data/tengine/logs/kibana_jennie_im_access.log;
error_log /data/tengine/logs/kibana_jennie_im_error.log;
# Kibana 的反代配置
location / {
proxy_pass http://10.0.0.38:5601;
proxy_http_version 1.1;
proxy_set_header Connection "keep-alive";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
# 防止 WebSocket 断开Kibana 控制台需要)
proxy_read_timeout 300s;
proxy_send_timeout 300s;
}
}

View File

@@ -0,0 +1,69 @@
upstream profile_backend {
server 10.0.0.5:3001; # 机器A的内网地址
server 10.0.0.15:3001; # 机器B的内网地址
}
log_format profile_log '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" '
'upstream_addr=$upstream_addr '
'upstream_status=$upstream_status '
'upstream_response_time=$upstream_response_time '
'request_time=$request_time';
# 1. 强制 HTTP 转 HTTPS统一跳转到 https://profile.lessie.ai
server {
listen 80;
server_name profile.lessie.ai;
return 301 https://profile.lessie.ai$request_uri;
}
# 2. 正式服务站点https://profile.lessie.ai
server {
listen 443 ssl;
server_name profile.lessie.ai;
ssl_certificate /data/tengine/certificate/lessie.ai.pem;
ssl_certificate_key /data/tengine/certificate/lessie.ai.key;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
access_log /data/tengine/logs/lessie_profile_log.access.log profile_log;
error_log /data/tengine/logs/lessie_profile_log.error.log;
# 反向代理到后端服务器渲染的nxut项目3001端口
location / {
proxy_pass http://profile_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
# 禁止logo走缓存
location = /favicon.svg {
proxy_pass http://official_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
add_header Cache-Control "no-cache, no-store, must-revalidate" always;
add_header Pragma "no-cache" always;
add_header Expires 0 always;
}
# 禁止logo走缓存
location = /favicon.ico {
proxy_pass http://official_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
add_header Cache-Control "no-cache, no-store, must-revalidate" always;
add_header Pragma "no-cache" always;
add_header Expires 0 always;
}
}

View File

@@ -0,0 +1,31 @@
server {
listen 80;
server_name nexus.deeplink.media;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl;
server_name nexus.deeplink.media;
ssl_certificate /data/tengine/conf/certificate/nexus.deeplink.media_bundle.crt;
ssl_certificate_key /data/tengine/conf/certificate/nexus.deeplink.media.key;
ssl_protocols TLSv1.2 TLSv1.3;
access_log /data/tengine/logs/nexus_access.log;
error_log /data/tengine/logs/nexus_error.log;
location / {
proxy_pass http://127.0.0.1:8081;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_request_buffering off;
client_max_body_size 2G;
}
}

View File

@@ -883,6 +883,14 @@ server {
} }
} }
location /api/gmail {
proxy_pass http://10.0.0.10:3005;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
# 错误页面 # 错误页面
error_page 500 502 503 504 /50x.html; error_page 500 502 503 504 /50x.html;
location = /50x.html { location = /50x.html {

196
nginx/安装.md Normal file
View File

@@ -0,0 +1,196 @@
1、下载安装包`https://tengine.taobao.org/`
2、上传安装位置在`/data/tengine`
3、解压缩`tar -zxvf tengine-3.1.0.tar.gz`
4、安装编译环境`yum -y install gcc-c++`
5、安装依赖`yum -y install pcre-devel zlib zlib-devel openssl openssl-devel`
6、创建安装目录`mkdir /data/tengine`
7、进入解压好的文件夹`cd tengine-3.1.0`
8、执行并指定安装路径`./configure --prefix=/data/tengine``make``make install`
```编译
./configure --prefix=/data/tengine \
--conf-path=/data/tengine/conf/nginx.conf \
--error-log-path=/data/tengine/logs/error.log \
--http-log-path=/data/tengine/logs/access.log \
--pid-path=/data/tengine/logs/nginx.pid \
--lock-path=/data/tengine/logs/nginx.lock \
--with-http_ssl_module \
--with-http_gzip_static_module \
--with-pcre \
--with-http_stub_status_module
#解释:
--prefix=/data/tengine → 指定安装到 /data/tengine/
--conf-path=/data/tengine/conf/nginx.conf → 指定 nginx.conf 配置文件位置
--error-log-path=/data/tengine/logs/error.log → 错误日志存放目录
--http-log-path=/data/tengine/logs/access.log → 访问日志存放目录
--pid-path=/data/tengine/logs/nginx.pid → 指定 nginx 进程 ID 存放路径
--lock-path=/data/tengine/logs/nginx.lock → 进程锁文件路径
--with-http_ssl_module → 开启 HTTPS 支持
--with-http_gzip_static_module → 开启 Gzip 压缩
--with-pcre → 支持 正则表达式(用于 Rewrite
--with-http_stub_status_module → 启用 Nginx 状态监控
#安装
make -j$(nproc)
make install
#=============加上四层代理==============
./configure --prefix=/data/tengine \
--conf-path=/data/tengine/conf/nginx.conf \
--error-log-path=/data/tengine/logs/error.log \
--http-log-path=/data/tengine/logs/access.log \
--pid-path=/data/tengine/logs/nginx.pid \
--lock-path=/data/tengine/logs/nginx.lock \
--with-http_ssl_module \
--with-http_gzip_static_module \
--with-pcre \
--with-http_stub_status_module \
--with-stream
```
9、查看目录是否安装成功`ls /data/tengine`
10、启动tengine`cd /data/tengine/sbin``./nginx`
11、添加后续目录
mkdir -p /data/tengine/conf/vhosts
mkdir -p /data/tengine/conf/certificate
`/data/tengine/conf/nginx.conf``http {}` 块cc的内添加引用虚拟主机
http {
include mime.types;
default_type application/octet-stream;
# 引入虚拟主机配置
include /data/tengine/conf/vhosts/*.conf;
# 其他配置...
}
---
/data/tengine/sbin/nginx
/data/tengine/sbin/nginx -s reload
1、启动命令 2、重新加载配置文件命令
全局使用nginx
方式一
1. 执行以下命令创建软链接:
bash
```bash
ln -s /data/tengine/sbin/nginx /usr/local/bin/nginx
```
`/usr/local/bin` 通常已在系统环境变量 `$PATH` 中,优先选择此目录)
2. 验证是否生效:
bash
```bash
nginx -v #
```
方式二
1. 编辑环境变量配置文件(以 `bash` 为例):
bash
```bash
vi /etc/profile # 全局生效(所有用户),或编辑 ~/.bashrc当前用户
```
2. 在文件末尾添加一行,将 Nginx 所在目录加入 `PATH`
bash
```bash
export PATH=$PATH:/data/tengine/sbin
```
3. 使配置立即生效:
bash
```bash
source /etc/profile # 对应全局配置文件,或 source ~/.bashrc
```
4. 验证:
bash
```bash
nginx -v # 直接执行命令测试
```
---
配置nginx systemctl
```gitlab
vim /etc/systemd/system/tengine.service
[Unit]
Description=Tengine Web Server
After=network.target
[Service]
Type=forking
PIDFile=/data/tengine/logs/nginx.pid
ExecStart=/data/tengine/sbin/nginx
ExecReload=/data/tengine/sbin/nginx -s reload
ExecStop=/data/tengine/sbin/nginx -s stop
# 防止被 killall/nginx 杀掉
KillMode=process
# 自动重启(如果你希望 Nginx 意外退出后自动拉起)
Restart=on-failure
RestartSec=2s
[Install]
WantedBy=multi-user.target
```
```gitlab
检查配置 使用 nginx -t
热加载 使用 systemctl reload tengine
启动服务 使用 systemctl start tengine
停止服务 使用 systemctl stop tengine
重启服务 使用 systemctl restart tengine
紧急操作 使用 nginx -s reload/stop
```
容器的:
docker exec -it my-nginx nginx -t # 检查配置文件语法
docker exec -it my-nginx nginx -s reload # 重载配置

4
sh/crontab -e Normal file
View File

@@ -0,0 +1,4 @@
crontab -e
* * * * * /bin/bash /data/sh/check_memory_and_restart.sh >> /data/sh/logs/agents_memcheck.log 2>&1

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# 从参数获取端口号
PORT=$1
# 判断是否传入参数
if [ -z "$PORT" ]; then
echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000"
exit 1
fi
# 查找占用端口的进程 PID
# PID=$(lsof -t -i:$PORT)
PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -n "$PID" ]; then
echo "发现端口 $PORT 的进程PID=$PID"
echo "正在关闭进程..."
kill -9 $PID
echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。"
else
echo "端口 $PORT 没有正在运行的进程。"
fi

View File

@@ -0,0 +1,16 @@
cd /data/webapps/lessie_sourcing_agents_s5
uv sync
source /data/webapps/lessie_sourcing_agents_s5/.venv/bin/activate
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="/data/webapps/lessie_sourcing_agents_s5/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
nohup env APP_ENV=s5 gunicorn -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000 --timeout 300 dialogue.app:app --max-requests 200 --max-requests-jitter 50 > "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" /data/webapps/lessie_sourcing_agents_s5/logs/lessie_sourcing_agents_latest.log
cd /data/webapps/lessie_sourcing_agents_s6
uv sync
source /data/webapps/lessie_sourcing_agents_s6/.venv/bin/activate
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="/data/webapps/lessie_sourcing_agents_s6/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
nohup env APP_ENV=s6 gunicorn -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8001 --timeout 300 dialogue.app:app --max-requests 200 --max-requests-jitter 50 > "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" /data/webapps/lessie_sourcing_agents_s6/logs/lessie_sourcing_agents_latest.log

View File

@@ -0,0 +1,105 @@
#!/bin/bash
# 阈值10G 单位 MB
THRESHOLD_MB=12288
# 两个服务配置
declare -A SERVICE_DIRS=(
["8000"]="/data/webapps/lessie_sourcing_agents"
["8001"]="/data/webapps/lessie_sourcing_agents_s4"
)
declare -A SERVICE_ENVS=(
["8000"]="s1"
["8001"]="s4"
)
# 获取当前时间
now() {
date +"%Y-%m-%d %H:%M:%S"
}
# 检查端口对应所有进程的内存RSS 和 VMS 都行,这里用 RES 实际内存占用)
get_memory_usage_mb() {
PORT=$1
# 获取所有 PID
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -z "$PIDS" ]; then
echo 0
return
fi
# 总内存 MB
total=0
for pid in $PIDS; do
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
[ -n "$mem" ] && total=$((total + mem))
done
echo $(( total / 1024 ))
}
# 启动服务
start_service() {
PORT=$1
APP_DIR=${SERVICE_DIRS[$PORT]}
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
cd "$APP_DIR" || exit 1
source "$APP_DIR/.venv/bin/activate"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
if [ "$PORT" = "8000" ]; then
APP_ENV="s1"
else
APP_ENV="s4"
fi
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
--max-requests 200 --max-requests-jitter 50 \
> "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
echo "$(now) 服务 $PORT 已重新启动"
}
# 主循环(两个服务)
for PORT in 8000 8001; do
echo "---------------------------"
echo "$(now) 检查端口 $PORT 的服务"
usage=$(get_memory_usage_mb "$PORT")
echo "$(now) 当前内存占用: ${usage}MB"
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
# 调用杀进程脚本
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
sleep 2
# 重启服务
start_service "$PORT"
# 飞书告警
APP_ENV=${SERVICE_ENVS[$PORT]}
sh /data/sh/feishu_notify.sh \
"Python 内存告警" \
"$(hostname)" \
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
"warning" \
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
else
echo "$(now) 内存正常,无需处理。"
fi
done
echo "$(now) 检查结束"

View File

@@ -0,0 +1,50 @@
#!/bin/bash
# 使用方法:
# sh feishu_notify.sh "<title>" "<host>" "<program>" "<level>" "<detail>"
WEBHOOK="https://open.feishu.cn/open-apis/bot/v2/hook/c14d9964-3b5e-402a-866e-42768aa45e5e"
TITLE="$1" # 标题
HOST="$2" # 主机
PROGRAM="$3" # 程序
LEVEL="$4" # 级别,飞书卡片 header 颜色info / warning / danger
DETAIL="$5" # 详情内容Markdown
TIME=$(date +"%Y-%m-%d %H:%M:%S")
curl -s -X POST \
-H "Content-Type: application/json" \
-d "{
\"msg_type\": \"interactive\",
\"card\": {
\"header\": {
\"template\": \"${LEVEL}\",
\"title\": {
\"content\": \"${TITLE}\",
\"tag\": \"plain_text\"
}
},
\"elements\": [
{
\"tag\": \"div\",
\"text\": {
\"tag\": \"lark_md\",
\"content\": \"**主机:** ${HOST}\n**程序:** ${PROGRAM}\n**级别:** ${LEVEL}\n**时间:** ${TIME}\n\n${DETAIL}\"
}
}
]
}
}" \
"$WEBHOOK" >/dev/null 2>&1
# 调用示例
# sh /data/sh/feishu_notify.sh \
# "⚠️ Python 内存告警" \
# "$(hostname)" \
# "lessie_sourcing_agents_s5(8000)" \
# "danger" \
# "**内存占用**: ${usage}MB\n已自动 kill 并重启。"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# 从参数获取端口号
PORT=$1
# 判断是否传入参数
if [ -z "$PORT" ]; then
echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000"
exit 1
fi
# 查找占用端口的进程 PID
# PID=$(lsof -t -i:$PORT)
PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -n "$PID" ]; then
echo "发现端口 $PORT 的进程PID=$PID"
echo "正在关闭进程..."
kill -9 $PID
echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。"
else
echo "端口 $PORT 没有正在运行的进程。"
fi

View File

@@ -0,0 +1,105 @@
#!/bin/bash
# 阈值10G 单位 MB
THRESHOLD_MB=12288
# 两个服务配置
declare -A SERVICE_DIRS=(
["8000"]="/data/webapps/lessie_sourcing_agents_s5"
["8001"]="/data/webapps/lessie_sourcing_agents_s6"
)
declare -A SERVICE_ENVS=(
["8000"]="s5"
["8001"]="s6"
)
# 获取当前时间
now() {
date +"%Y-%m-%d %H:%M:%S"
}
# 检查端口对应所有进程的内存RSS 和 VMS 都行,这里用 RES 实际内存占用)
get_memory_usage_mb() {
PORT=$1
# 获取所有 PID
PIDS=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -z "$PIDS" ]; then
echo 0
return
fi
# 总内存 MB
total=0
for pid in $PIDS; do
mem=$(ps -o rss= -p "$pid" 2>/dev/null) # 单位 KB
[ -n "$mem" ] && total=$((total + mem))
done
echo $(( total / 1024 ))
}
# 启动服务
start_service() {
PORT=$1
APP_DIR=${SERVICE_DIRS[$PORT]}
echo "$(now) 重启服务(port=$PORT, dir=$APP_DIR)..."
cd "$APP_DIR" || exit 1
source "$APP_DIR/.venv/bin/activate"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOGFILE="$APP_DIR/logs/lessie_sourcing_agents_${TIMESTAMP}.log"
if [ "$PORT" = "8000" ]; then
APP_ENV="s5"
else
APP_ENV="s6"
fi
nohup env APP_ENV=$APP_ENV gunicorn -w 4 -k uvicorn.workers.UvicornWorker \
-b 0.0.0.0:$PORT --timeout 300 dialogue.app:app \
--max-requests 200 --max-requests-jitter 50 \
> "$LOGFILE" 2>&1 &
ln -sf "$LOGFILE" "$APP_DIR/logs/lessie_sourcing_agents_latest.log"
echo "$(now) 服务 $PORT 已重新启动"
}
# 主循环(两个服务)
for PORT in 8000 8001; do
echo "---------------------------"
echo "$(now) 检查端口 $PORT 的服务"
usage=$(get_memory_usage_mb "$PORT")
echo "$(now) 当前内存占用: ${usage}MB"
if [ "$usage" -gt "$THRESHOLD_MB" ]; then
echo "$(now) ⚠️ 占用超过阈值(${THRESHOLD_MB}MB),执行重启"
# 调用杀进程脚本
sh /data/sh/kill_lessie_sourcing_agents.sh "$PORT"
sleep 2
# 重启服务
start_service "$PORT"
# 飞书告警
APP_ENV=${SERVICE_ENVS[$PORT]}
sh /data/sh/feishu_notify.sh \
"Python 内存告警" \
"$(hostname)" \
"(${APP_ENV})lessie_sourcing_agents(${PORT})" \
"warning" \
"**内存占用**: ${usage}MB\n已自动 kill 并重启。"
else
echo "$(now) 内存正常,无需处理。"
fi
done
echo "$(now) 检查结束"

View File

@@ -0,0 +1,50 @@
#!/bin/bash
# 使用方法:
# sh feishu_notify.sh "<title>" "<host>" "<program>" "<level>" "<detail>"
WEBHOOK="https://open.feishu.cn/open-apis/bot/v2/hook/c14d9964-3b5e-402a-866e-42768aa45e5e"
TITLE="$1" # 标题
HOST="$2" # 主机
PROGRAM="$3" # 程序
LEVEL="$4" # 级别,飞书卡片 header 颜色info / warning / danger
DETAIL="$5" # 详情内容Markdown
TIME=$(date +"%Y-%m-%d %H:%M:%S")
curl -s -X POST \
-H "Content-Type: application/json" \
-d "{
\"msg_type\": \"interactive\",
\"card\": {
\"header\": {
\"template\": \"${LEVEL}\",
\"title\": {
\"content\": \"${TITLE}\",
\"tag\": \"plain_text\"
}
},
\"elements\": [
{
\"tag\": \"div\",
\"text\": {
\"tag\": \"lark_md\",
\"content\": \"**主机:** ${HOST}\n**程序:** ${PROGRAM}\n**级别:** ${LEVEL}\n**时间:** ${TIME}\n\n${DETAIL}\"
}
}
]
}
}" \
"$WEBHOOK" >/dev/null 2>&1
# 调用示例
# sh /data/sh/feishu_notify.sh \
# "⚠️ Python 内存告警" \
# "$(hostname)" \
# "lessie_sourcing_agents_s5(8000)" \
# "danger" \
# "**内存占用**: ${usage}MB\n已自动 kill 并重启。"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
# 从参数获取端口号
PORT=$1
# 判断是否传入参数
if [ -z "$PORT" ]; then
echo "❌ 错误:请在执行时指定端口号,例如: sh kill_lessie_sourcing_agents.sh 8000"
exit 1
fi
# 查找占用端口的进程 PID
# PID=$(lsof -t -i:$PORT)
PID=$(lsof -iTCP -sTCP:LISTEN -nP | awk -v port=":$PORT" '$9 ~ port"$" {print $2}' | sort -u)
if [ -n "$PID" ]; then
echo "发现端口 $PORT 的进程PID=$PID"
echo "正在关闭进程..."
kill -9 $PID
echo "进程 $PID 已经被杀掉,端口 $PORT 已释放。"
else
echo "端口 $PORT 没有正在运行的进程。"
fi

53
系统硬盘扩容.md Normal file
View File

@@ -0,0 +1,53 @@
- **确认系统识别到了新容量**
```bash
lsblk
```
> 查看磁盘(如 `/dev/vda`)已扩到目标大小(例如 200G但分区 `/dev/vda1` 仍保持旧大小(例如 100G
>
- **扩展分区**
- 若未安装 `growpart`,先安装:
```bash
# CentOS/Alma/Rocky
yum install -y cloud-utils-growpart
# Ubuntu/Debian
apt-get update && apt-get install -y cloud-guest-utils
```
- 执行扩分区:
```bash
growpart /dev/vda 1
```
> 将 `/dev/vda1` 从旧大小扩展到整块磁盘。
>
- **确认根分区文件系统类型**
```bash
df -T /
```
记住 `Type` 列(`ext4` 或 `xfs`)。
- **扩展文件系统**
- **如果是 ext4**
```bash
resize2fs /dev/vda1
```
- **如果是 XFS**
```bash
xfs_growfs /
```
> 在线扩容,无需卸载或重启。
>
- **验证扩容结果**
```bash
df -h
```

View File

@@ -341,3 +341,30 @@ Swap: 8.0Gi 3.9Gi 4.1Gi 从工作进程上分析,从内
3、相比上次查询哪个子进程没了哪个子进程出现了 3、相比上次查询哪个子进程没了哪个子进程出现了
====2025-10-01 10:01:00 ==== ====2025-10-01 10:01:00 ====
k8s 日志采集
背景一个项目有完整的前后端pod但是有部署多套测试环境。
比如s1环境、s2环境、s3环境、s4环境、s5环境、s6环境...
以s1环境为例总共有如下pod使用deployment部署。
s1-flymoon-admin-7cf5fcf447-t7p7n
s1-flymoon-admin-web-756b79567d-whllw
s1-flymoon-agent-66485d7b4-mrnqq
s1-flymoon-email-868c885b79-dvsjc
s1-flymoon-payment-84f7fdbfcb-94bhl
s1-lessie-agents-59797c5464-4vwfr
s1-lessie-ai-web-5c86b8d944-vmv72
s1-lessie-go-api-774ddc644c-m4cqc
然后s2环境其中其他flymoon基础付服务使用s1环境的pod通过svc访问过去
s2-lessie-agents-69798c5414-1hvfr
s2-lessie-ai-web-9c8988d914-mrv72
s2-lessie-go-api-47498c641c-4cpqc
s3~s6环境与s2环境类似只有各自的lessie-agents、lessie-ai-web、lessie-go-api
现在我需要采集日志到es中这个es是k8s外部部署的怎么采集日志呢
daemonSet方式每个node运行一个采集器采集该节点的pod的日志使用什么采集器怎么配置能自动发现pod日志能处理日志比如分词json分词
那每个节点上的采集器的pod怎么准确采集呢比如s1环境的s1-lessie-agents pod它有多个副本被调度在不同node上那么采集器怎么采集到A node的s1-lessie-agents 和 B node的s1-lessie-agents pod的日志到同一个es索引中的呢
另外es的索引名称应该如何命名呢设置生命周期、索引模板、按天or周or月分割索引