初始化提交
This commit is contained in:
24
prometheus/alertmanager/alertmanager.yml
Normal file
24
prometheus/alertmanager/alertmanager.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ['instance']
|
||||
group_wait: 30s
|
||||
group_interval: 60s
|
||||
repeat_interval: 5m
|
||||
receiver: 'web.hook.prometheusalert'
|
||||
receivers:
|
||||
- name: 'web.hook.prometheusalert'
|
||||
webhook_configs:
|
||||
- url: 'http://172.24.16.20:9094/prometheusalert?type=fs&tpl=prometheus-fs&fsurl=https://open.feishu.cn/open-apis/bot/v2/hook/8bd6a15d-90f0-4f4f-a1b1-bd105f31ea06'
|
||||
|
||||
# 测试机器人
|
||||
- url: 'http://172.24.16.20:9094/prometheusalert?type=fs&tpl=prometheus-fs&fsurl=https://open.feishu.cn/open-apis/bot/v2/hook/c14d9964-3b5e-402a-866e-42768aa45e5e'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
28
prometheus/alertmanager/muban.conf
Normal file
28
prometheus/alertmanager/muban.conf
Normal file
@@ -0,0 +1,28 @@
|
||||
{{ $var := .externalURL}}{{ range $k,$v:=.alerts }}
|
||||
{{if eq $v.status "resolved"}}
|
||||
Prometheus恢复信息
|
||||
|
||||
【恢复名称】{{$v.labels.alertname}}✅{{if $v.labels.level}}
|
||||
【恢复级别】{{if eq $v.labels.level "0"}}提示{{else if eq $v.labels.level "1"}}警告{{else if eq $v.labels.level "2"}}一般严重{{else if eq $v.labels.level "3"}}严重{{else if eq $v.labels.level "4"}}灾难{{else}}{{$v.labels.level}}{{end}}{{end}}
|
||||
【开始时间】{{GetCSTtime $v.startsAt}}
|
||||
【结束时间】{{GetCSTtime $v.endsAt}}
|
||||
【恢复实例】{{$v.labels.instance}}
|
||||
|
||||
{{$v.annotations.description1}}
|
||||
{{else}}
|
||||
Prometheus告警信息
|
||||
|
||||
【告警名称】{{$v.labels.alertname}}🔥{{if $v.labels.level}}
|
||||
【告警级别】{{if eq $v.labels.level "0"}}提示{{else if eq $v.labels.level "1"}}警告🟡{{else if eq $v.labels.level "2"}}一般严重🔥{{else if eq $v.labels.level "3"}}严重🔥🔥{{else if eq $v.labels.level "4"}}灾难🔥🔥❌{{else}}{{$v.labels.level}}{{end}}{{end}}
|
||||
【开始时间】{{GetCSTtime $v.startsAt}}
|
||||
【告警实例】{{$v.labels.instance}}
|
||||
|
||||
{{$v.annotations.description}}
|
||||
|
||||
{{end}}
|
||||
{{end}}
|
||||
{{ end }}
|
||||
|
||||
|
||||
|
||||
https://open.feishu.cn/open-apis/bot/v2/hook/8bd6a15d-90f0-4f4f-a1b1-bd105f31ea06
|
||||
85
prometheus/docker-compose.yml
Normal file
85
prometheus/docker-compose.yml
Normal file
@@ -0,0 +1,85 @@
|
||||
version: '3'
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
restart: always
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- /root/prometheus/prometheus/config:/etc/prometheus
|
||||
- /root/prometheus/prometheus/data:/prometheus
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--storage.tsdb.retention.time=30d'
|
||||
- '--storage.tsdb.retention.size=10GB'
|
||||
- '--web.enable-lifecycle'
|
||||
|
||||
renderer:
|
||||
image: grafana/grafana-image-renderer:latest
|
||||
container_name: grafana-renderer
|
||||
restart: always
|
||||
ports:
|
||||
- "8081:8081"
|
||||
environment:
|
||||
# 自定义渲染服务安全令牌,请与下方 Grafana 中的 GF_RENDERING_TOKEN 保持一致
|
||||
- AUTH_TOKEN=4cd108857bdd30fbd4991bb146622f9d
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: grafana
|
||||
restart: always
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- /root/prometheus/grafana/data:/var/lib/grafana
|
||||
environment:
|
||||
# Grafana 根访问地址
|
||||
- GF_SERVER_ROOT_URL=http://172.24.16.20:3000/
|
||||
# 匿名访问设置
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
|
||||
# 嵌入和跨站点 Cookie
|
||||
- GF_SECURITY_ALLOW_EMBEDDING=true
|
||||
- GF_SECURITY_COOKIE_SECURE=false
|
||||
- GF_SECURITY_COOKIE_SAMESITE=lax
|
||||
# Image Renderer 远程服务地址
|
||||
- GF_RENDERING_SERVER_URL=http://renderer:8081/render
|
||||
# 回调地址,与根地址保持一致
|
||||
- GF_RENDERING_CALLBACK_URL=http://172.24.16.20:3000/
|
||||
# 与渲染服务一致的安全令牌
|
||||
- GF_RENDERING_TOKEN=4cd108857bdd30fbd4991bb146622f9d
|
||||
# 打开渲染日志调试,便于排查
|
||||
- GF_LOG_FILTERS=rendering:debug
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:latest
|
||||
container_name: alertmanager
|
||||
restart: always
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- /root/prometheus/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
|
||||
- /etc/localtime:/etc/localtime:ro
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
|
||||
prometheus-alert:
|
||||
image: feiyu563/prometheus-alert:latest
|
||||
container_name: prometheus-alert
|
||||
restart: always
|
||||
ports:
|
||||
- "9094:8080"
|
||||
volumes:
|
||||
- /root/prometheus/prometheus-alert/db:/app/db
|
||||
environment:
|
||||
- PA_LOGIN_USER=admin
|
||||
- PA_LOGIN_PASSWORD=admin
|
||||
- PA_TITLE=prometheusAlert
|
||||
- PA_OPEN_FEISHU=1
|
||||
- PA_OPEN_DINGDING=1
|
||||
- PA_OPEN_WEIXIN=1
|
||||
@@ -0,0 +1,45 @@
|
||||
process_names:
|
||||
|
||||
- name: "crawler-server_dialogue.influencer_search" #珍妮的python
|
||||
cmdline:
|
||||
- "/root/miniconda3/envs/search/bin/python"
|
||||
- ".*dialogue.influencer_search.*"
|
||||
|
||||
|
||||
- name: "crawler-server_yt_search_crawler"
|
||||
cmdline:
|
||||
- ".*async_yt.*"
|
||||
|
||||
- name: "crawler-server_check_tiktok_account"
|
||||
cmdline:
|
||||
- ".*check_account.*"
|
||||
|
||||
- name: "crawler-server_yt_data_update_week" # yt_data_update 周更 update_yt_week.py
|
||||
cmdline:
|
||||
- ".*update_yt_week.*"
|
||||
|
||||
- name: "crawler-server_yt_data_update_day" # yt_data_update 日更 update_yt.py
|
||||
cmdline:
|
||||
- ".*update_yt.*"
|
||||
|
||||
- name: "crawler-server_yt_search_crawler" # yt_search_crawler
|
||||
cmdline:
|
||||
- ".*async_yt.*"
|
||||
|
||||
- name: "crawler-server_tk_search.py" # tk_search.py
|
||||
cmdline:
|
||||
- ".*tk_search.*"
|
||||
|
||||
- name: "crawler-server_tiktok_sign_server.jar" # tt_shop.jar
|
||||
cmdline:
|
||||
- ".*tiktok_sign_server.*"
|
||||
|
||||
- name: "crawler-server_tt_shop.py" # tt_shop.py
|
||||
cmdline:
|
||||
- ".*tt_shop.*"
|
||||
|
||||
# 监控 nginx 进程
|
||||
- name: "crawler-server_nginx"
|
||||
cmdline:
|
||||
- "/data/tengine/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
@@ -0,0 +1,6 @@
|
||||
process_names:
|
||||
|
||||
- name: "prod_lessie_sourcing_01_7001"
|
||||
cmdline:
|
||||
- "gunicorn"
|
||||
- ".*0.0.0.0:7001.*"
|
||||
@@ -0,0 +1,6 @@
|
||||
process_names:
|
||||
|
||||
- name: "prod_lessie_sourcing_02_7001"
|
||||
cmdline:
|
||||
- "gunicorn"
|
||||
- ".*0.0.0.0:7001.*"
|
||||
@@ -0,0 +1,6 @@
|
||||
process_names:
|
||||
|
||||
- name: "prod_lessie_sourcing_03_7001"
|
||||
cmdline:
|
||||
- "gunicorn"
|
||||
- ".*0.0.0.0:7001.*"
|
||||
@@ -0,0 +1,6 @@
|
||||
process_names:
|
||||
|
||||
- name: "prod_lessie_sourcing_04_7001"
|
||||
cmdline:
|
||||
- "gunicorn"
|
||||
- ".*0.0.0.0:7001.*"
|
||||
@@ -0,0 +1,6 @@
|
||||
process_names:
|
||||
|
||||
- name: "prod_lessie_sourcing_05_7001"
|
||||
cmdline:
|
||||
- "gunicorn"
|
||||
- ".*0.0.0.0:7001.*"
|
||||
66
prometheus/process-exporter/prod01-process-exporter.yml
Normal file
66
prometheus/process-exporter/prod01-process-exporter.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
process_names:
|
||||
- name: "prod-flymoon_task" # 自定义的进程名称
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java" # Java 进程命令路径
|
||||
- ".*flymoon-task.jar.*" # 正则表达式匹配 JAR 包路径和参数
|
||||
|
||||
- name: "prod-flymoon_sse"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon_sse.jar.*"
|
||||
|
||||
- name: "prod-flymoon_monitor"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*monitor-0.0.1-SNAPSHOT.jar.*"
|
||||
|
||||
- name: "prod-flymoon_partner"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon-partner.jar.*"
|
||||
|
||||
- name: "prod-flymoon_email_prod"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly-moon-email.jar.*"
|
||||
|
||||
- name: "prod-flymoon_admin"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-admin.jar.*"
|
||||
|
||||
- name: "prod-flymoon_agent"
|
||||
cmdline:
|
||||
- "/data/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-agent.jar.*"
|
||||
|
||||
- name: "prod-flymoon-payment"
|
||||
cmdline:
|
||||
- "/data/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-payment.jar.*"
|
||||
|
||||
- name: "prod-flymoon_crawlSpider-0.0.1-SNAPSHOT"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly_moon_crawlSpider-0.0.1-SNAPSHOT.jar.*"
|
||||
|
||||
- name: "prod-flymoon_crawlSpider_shop"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly_moon_crawlSpider_shop.jar.*"
|
||||
|
||||
- name: "prod-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
|
||||
- name: "prod-redis_server"
|
||||
cmdline:
|
||||
- "/data/redis/src/redis-server" # Redis 进程命令
|
||||
- ".*" # 允许其他 Redis 参数
|
||||
|
||||
- name: "prod-nginx"
|
||||
cmdline:
|
||||
- "/data/tengine/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
54
prometheus/process-exporter/prod02-process-exporter.yml
Normal file
54
prometheus/process-exporter/prod02-process-exporter.yml
Normal file
@@ -0,0 +1,54 @@
|
||||
process_names:
|
||||
- name: "prod02-flymoon-partner"
|
||||
cmdline:
|
||||
- ".*flymoon-partner.jar.*"
|
||||
|
||||
- name: "prod02-flymoon-admin"
|
||||
cmdline:
|
||||
- ".*flymoon-admin.jar.*"
|
||||
|
||||
- name: "prod02-flymoon_agent"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-agent.jar.*"
|
||||
|
||||
- name: "prod02-flymoon-payment"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-payment.jar.*"
|
||||
|
||||
- name: "prod02-flymoon_task"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon-task.jar.*"
|
||||
|
||||
- name: "prod02-ycloud-0.0.1-SNAPSHOT.jar"
|
||||
cmdline:
|
||||
- ".*fly_moon_ycloud-0.0.1-SNAPSHOT.jar.*"
|
||||
|
||||
- name: "prod02-ycloud-task"
|
||||
cmdline:
|
||||
- ".*fly_moon_ycloud-task.jar.*"
|
||||
|
||||
- name: "prod02-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
|
||||
- name: "prod02-redis_server"
|
||||
cmdline:
|
||||
- "/data/redis/src/redis-server" # Redis 进程命令
|
||||
- ".*" # 允许其他 Redis 参数
|
||||
|
||||
- name: "prod02-nginx"
|
||||
cmdline:
|
||||
- "/data/tengine/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
|
||||
|
||||
|
||||
- name: "prod02-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
52
prometheus/process-exporter/test-process-exporter.yml
Normal file
52
prometheus/process-exporter/test-process-exporter.yml
Normal file
@@ -0,0 +1,52 @@
|
||||
process_names:
|
||||
- name: "test-flymoon_task" # 自定义的进程名称
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java" # Java 进程命令路径
|
||||
- ".*flymoon-task.jar.*" # 正则表达式匹配 JAR 包路径和参数
|
||||
|
||||
- name: "test-flymoon_sse"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon_sse.jar.*"
|
||||
|
||||
- name: "test-flymoon_monitor"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*monitor-0.0.1-SNAPSHOT.jar.*"
|
||||
|
||||
- name: "test-flymoon_partner"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon-partner.jar.*"
|
||||
|
||||
- name: "test-flymoon_email_test"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly-moon-email-test.jar.*"
|
||||
|
||||
- name: "test-flymoon_admin"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-admin.jar.*"
|
||||
|
||||
- name: "test-flymoon_jenniefy"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*flymoon-jenniefy.jar.*"
|
||||
|
||||
- name: "test-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
# 监控 redis-server 进程
|
||||
- name: "test-redis_server"
|
||||
cmdline:
|
||||
- "/data/redis/src/redis-server" # Redis 进程命令
|
||||
- ".*" # 允许其他 Redis 参数
|
||||
|
||||
# 监控 nginx 进程
|
||||
- name: "test-nginx"
|
||||
cmdline:
|
||||
- "/usr/local/nginx/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
29
prometheus/process-exporter/us-prod-01-process-exporter.yml
Normal file
29
prometheus/process-exporter/us-prod-01-process-exporter.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
process_names:
|
||||
- name: "us-prod-01-GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*lessie-sourcing-api.*"
|
||||
|
||||
- name: "us-prod-01-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
- name: "us-prod-01-flymoon_admin"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*flymoon-admin.jar.*"
|
||||
|
||||
- name: "us-prod-01-xxl-job-admin"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*xxl-job-admin.jar.*"
|
||||
|
||||
- name: "us-prod-01_lessie_official_web"
|
||||
cmdline:
|
||||
- ".*index.mjs.*"
|
||||
|
||||
# 监控 nginx 进程
|
||||
- name: "nginx"
|
||||
cmdline:
|
||||
- "/data/tengine/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
20
prometheus/process-exporter/us-prod-02-process-exporter.yml
Normal file
20
prometheus/process-exporter/us-prod-02-process-exporter.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
process_names:
|
||||
- name: "us-prod-02-GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*lessie-sourcing-api.*"
|
||||
|
||||
- name: "us-prod-02-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
- name: "us-prod-02-flymoon-agent"
|
||||
cmdline:
|
||||
- ".*/java"
|
||||
- "-jar"
|
||||
- ".*/flymoon-agent.jar"
|
||||
|
||||
- name: "us-prod-02_email_prod"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly-moon-email.jar.*"
|
||||
21
prometheus/process-exporter/us-prod-03-process-exporter.yml
Normal file
21
prometheus/process-exporter/us-prod-03-process-exporter.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
process_names:
|
||||
- name: "us-prod-03-GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*lessie-sourcing-api.*"
|
||||
|
||||
- name: "us-prod-03-nacos"
|
||||
cmdline:
|
||||
- "/data/jdk-21.0.7/bin/java"
|
||||
- ".*nacos-server.jar.*"
|
||||
|
||||
- name: "us-prod-03-flymoon-agent"
|
||||
cmdline:
|
||||
- ".*/java"
|
||||
- "-jar"
|
||||
- ".*/flymoon-agent.jar"
|
||||
|
||||
- name: "us-prod-03-flymoon-payment"
|
||||
cmdline:
|
||||
- ".*/java"
|
||||
- "-jar"
|
||||
- ".*/flymoon-payment.jar"
|
||||
@@ -0,0 +1,40 @@
|
||||
process_names:
|
||||
- name: "webdrive-server_prod-flymoon-email-v2"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly-moon-email-prod.jar.*"
|
||||
|
||||
- name: "webdrive-server_test-flymoon-email-v2"
|
||||
cmdline:
|
||||
- "/data/jdk1.8.0_181/bin/java"
|
||||
- ".*fly-moon-email-test.jar.*"
|
||||
|
||||
- name: "webdrive-server_s2_py_lessie_sourcing"
|
||||
cmdline:
|
||||
- "/data/webapps/lessie_sourcing_agents"
|
||||
- ".*server.py.*"
|
||||
|
||||
- name: "webdrive-server_s3_lessie_sourcing_6001"
|
||||
cmdline:
|
||||
- "/data/webapps/qmm_sourcing_agents"
|
||||
- ".*serverqmm.*"
|
||||
|
||||
- name: "webdrive-server_s2_GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*lessie-sourcing-api.*"
|
||||
|
||||
- name: "webdrive-server_s3_GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*s3-lessie-sourcing-api.*"
|
||||
|
||||
|
||||
# 监控 redis-server 进程
|
||||
- name: "webdrive-server_redis-server"
|
||||
cmdline:
|
||||
- "/data/redis/bin/redis-server" # Redis 进程命令
|
||||
- ".*" # 允许其他 Redis 参数
|
||||
|
||||
# 监控 nginx 进程
|
||||
- name: "webdrive-server_nginx"
|
||||
cmdline:
|
||||
- ".*nginx.*" # 允许其他 nginx 参数
|
||||
@@ -0,0 +1,29 @@
|
||||
process_names:
|
||||
|
||||
- name: "weblessie-server_lessie_official_web"
|
||||
cmdline:
|
||||
- ".*index.mjs.*"
|
||||
|
||||
- name: "weblessie-server_lessie_sourcing_8000"
|
||||
cmdline:
|
||||
- "/data/webapps/lessie_sourcing_agents"
|
||||
- ".*server.py*"
|
||||
|
||||
- name: "weblessie-server_lessie_sourcing_8002"
|
||||
cmdline:
|
||||
- "/data/webapps/lessie_sourcing_agents_02"
|
||||
- ".*server8002.*"
|
||||
|
||||
- name: "weblessie-server_lessie_sourcing_7001"
|
||||
cmdline:
|
||||
- "/data/webapps/prod_lessie_sourcing_agents"
|
||||
- ".*server7001.*"
|
||||
|
||||
- name: "weblessie-server_GO-lessie-sourcing-api"
|
||||
cmdline:
|
||||
- ".*lessie-sourcing-api.*"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
process_names:
|
||||
|
||||
- name: "weblessie-server2_s1_py_lessie_sourcing"
|
||||
cmdline:
|
||||
- "/data/webapps/lessie_sourcing_agents"
|
||||
- ".*server.py*"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
process_names:
|
||||
|
||||
- name: "website-server_dialogue.influencer_im"
|
||||
cmdline:
|
||||
- "/data/webapps/test_influencer_search_agent/venv/bin/python"
|
||||
- ".*dialogue.influencer_search.*"
|
||||
|
||||
- name: "website-server_dialogue.inf_5002_lessie.ai"
|
||||
cmdline:
|
||||
- "/data/webapps/influencer_search_agent/venv/bin/python"
|
||||
- ".*dialogue.influencer_5002.*"
|
||||
|
||||
- name: "website-server_tt_shop.py"
|
||||
cmdline:
|
||||
- ".*tt_shop.py.*"
|
||||
|
||||
- name: "website-server_yt_search_crawler"
|
||||
cmdline:
|
||||
- ".*async_yt.*"
|
||||
|
||||
- name: "website-server_check_tiktok_account"
|
||||
cmdline:
|
||||
- ".*check_account.*"
|
||||
|
||||
- name: "website-server_yt_data_update_week" # yt_data_update 周更 update_yt_week.py
|
||||
cmdline:
|
||||
- ".*update_yt_week.*"
|
||||
|
||||
- name: "website-server_yt_data_update_day" # yt_data_update 日更 update_yt.py
|
||||
cmdline:
|
||||
- ".*update_yt.*"
|
||||
|
||||
- name: "website-server_yt_search_crawler" # yt_search_crawler
|
||||
cmdline:
|
||||
- ".*async_yt.*"
|
||||
|
||||
- name: "website-server_tk_shop_crawler" # yt_search_crawler
|
||||
cmdline:
|
||||
- ".*tt_shop.*"
|
||||
|
||||
- name: "website-server_api_server.py" # influencer_search_api
|
||||
cmdline:
|
||||
- ".*api_server.py.*"
|
||||
|
||||
|
||||
|
||||
|
||||
# 监控 nginx 进程
|
||||
- name: "website-server_nginx"
|
||||
cmdline:
|
||||
- "/data/tengine/sbin/nginx" # Nginx 进程命令
|
||||
- ".*" # 允许其他 nginx 参数
|
||||
18
prometheus/process-exporter/启动.txt
Normal file
18
prometheus/process-exporter/启动.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
解压:
|
||||
tar -zxvf
|
||||
|
||||
创建配置文件:
|
||||
process-exporter.yml
|
||||
|
||||
后台启动:
|
||||
nohup /opt/exporter/process-exporter/process-exporter -config.path=/opt/exporter/process-exporter/process-exporter.yml > /opt/exporter/process-exporter/process-exporter.log 2>&1 &
|
||||
|
||||
查看进程:
|
||||
ps aux | grep process-exporter
|
||||
|
||||
查看日志:
|
||||
tail -f /opt/exporter/process-exporter/process-exporter.log
|
||||
|
||||
|
||||
|
||||
|
||||
18
prometheus/process-exporter/查询.conf
Normal file
18
prometheus/process-exporter/查询.conf
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1、namedprocess_namegroup_num_procs{groupname=~"$processes",instance=~"$instance"}
|
||||
2、sum(rate(namedprocess_namegroup_cpu_seconds_total{groupname=~"$processes",instance=~"$instance"}[$interval])) by (instance, groupname)
|
||||
3、sum(rate(namedprocess_namegroup_read_bytes_total{groupname=~"$processes", instance=~"$instance"}[$interval])) by (instance, groupname)
|
||||
4、sum(rate(namedprocess_namegroup_write_bytes_total{groupname=~"$processes", instance=~"$instance"}[$interval])) by (instance, groupname)
|
||||
5、namedprocess_namegroup_memory_bytes{groupname=~"$processes", instance=~"$instance",memtype="resident"}
|
||||
6、namedprocess_namegroup_memory_bytes{groupname=~"$processes", instance=~"$instance",memtype="virtual"}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
302
prometheus/prometheus-alert/app.conf
Normal file
302
prometheus/prometheus-alert/app.conf
Normal file
@@ -0,0 +1,302 @@
|
||||
#---------------------↓全局配置-----------------------
|
||||
appname = PrometheusAlert
|
||||
#登录用户名
|
||||
login_user=prometheusalert
|
||||
#登录密码
|
||||
login_password=prometheusalert
|
||||
#监听地址
|
||||
httpaddr = "0.0.0.0"
|
||||
#监听端口
|
||||
httpport = 8080
|
||||
runmode = dev
|
||||
#设置代理 proxy = http://123.123.123.123:8080
|
||||
proxy =
|
||||
#开启JSON请求
|
||||
copyrequestbody = true
|
||||
#告警消息标题
|
||||
title=PrometheusAlert
|
||||
#链接到告警平台地址
|
||||
GraylogAlerturl=http://graylog.org
|
||||
#钉钉告警 告警logo图标地址
|
||||
logourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/images/alert-center.png
|
||||
#钉钉告警 恢复logo图标地址
|
||||
rlogourl=https://raw.githubusercontent.com/feiyu563/PrometheusAlert/master/doc/images/alert-center.png
|
||||
#短信告警级别(等于3就进行短信告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
|
||||
messagelevel=3
|
||||
#电话告警级别(等于4就进行语音告警) 告警级别定义 0 信息,1 警告,2 一般严重,3 严重,4 灾难
|
||||
phonecalllevel=4
|
||||
#默认拨打号码(页面测试短信和电话功能需要配置此项)
|
||||
defaultphone=xxxxxxxx
|
||||
#故障恢复是否启用电话通知0为关闭,1为开启
|
||||
phonecallresolved=0
|
||||
#是否前台输出file or console
|
||||
logtype=file
|
||||
#日志文件路径
|
||||
logpath=logs/prometheusalertcenter.log
|
||||
#转换Prometheus,graylog告警消息的时区为CST时区(如默认已经是CST时区,请勿开启)
|
||||
prometheus_cst_time=0
|
||||
#数据库驱动,支持sqlite3,mysql,postgres如使用mysql或postgres,请开启db_host,db_port,db_user,db_password,db_name的注释
|
||||
db_driver=sqlite3
|
||||
#db_host=127.0.0.1
|
||||
#db_port=3306
|
||||
#db_user=root
|
||||
#db_password=root
|
||||
#db_name=prometheusalert
|
||||
#是否开启告警记录 0为关闭,1为开启
|
||||
AlertRecord=0
|
||||
#是否开启告警记录定时删除 0为关闭,1为开启
|
||||
RecordLive=0
|
||||
#告警记录定时删除周期,单位天
|
||||
RecordLiveDay=7
|
||||
# 是否将告警记录写入es7,0为关闭,1为开启
|
||||
alert_to_es=0
|
||||
# es地址,是[]string
|
||||
# beego.Appconfig.Strings读取配置为[]string,使用";"而不是","
|
||||
to_es_url=http://localhost:9200
|
||||
# to_es_url=http://es1:9200;http://es2:9200;http://es3:9200
|
||||
# es用户和密码
|
||||
# to_es_user=username
|
||||
# to_es_pwd=password
|
||||
# 长连接最大空闲数
|
||||
maxIdleConns=100
|
||||
# 热更新配置文件
|
||||
open-hotreload=0
|
||||
|
||||
#---------------------↓webhook-----------------------
|
||||
#是否开启钉钉告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-dingding=1
|
||||
#默认钉钉机器人地址
|
||||
ddurl=https://oapi.dingtalk.com/robot/send?access_token=xxxxx
|
||||
#是否开启 @所有人(0为关闭,1为开启)
|
||||
dd_isatall=1
|
||||
#是否开启钉钉机器人加签,0为关闭,1为开启
|
||||
# 使用方法:https://oapi.dingtalk.com/robot/send?access_token=XXXXXX&secret=mysecret
|
||||
open-dingding-secret=0
|
||||
|
||||
#是否开启微信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-weixin=1
|
||||
#默认企业微信机器人地址
|
||||
wxurl=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxxxx
|
||||
|
||||
#是否开启飞书告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-feishu=1
|
||||
#默认飞书机器人地址
|
||||
fsurl=https://open.feishu.cn/open-apis/bot/hook/xxxxxxxxx
|
||||
# webhook 发送 http 请求的 contentType, 如 application/json, application/x-www-form-urlencoded,不配置默认 application/json
|
||||
wh_contenttype=application/json
|
||||
|
||||
#---------------------↓腾讯云接口-----------------------
|
||||
#是否开启腾讯云短信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-txdx=0
|
||||
#腾讯云短信接口key
|
||||
TXY_DX_appkey=xxxxx
|
||||
#腾讯云短信模版ID 腾讯云短信模版配置可参考 prometheus告警:{1}
|
||||
TXY_DX_tpl_id=xxxxx
|
||||
#腾讯云短信sdk app id
|
||||
TXY_DX_sdkappid=xxxxx
|
||||
#腾讯云短信签名 根据自己审核通过的签名来填写
|
||||
TXY_DX_sign=腾讯云
|
||||
|
||||
#是否开启腾讯云电话告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-txdh=0
|
||||
#腾讯云电话接口key
|
||||
TXY_DH_phonecallappkey=xxxxx
|
||||
#腾讯云电话模版ID
|
||||
TXY_DH_phonecalltpl_id=xxxxx
|
||||
#腾讯云电话sdk app id
|
||||
TXY_DH_phonecallsdkappid=xxxxx
|
||||
|
||||
#---------------------↓华为云接口-----------------------
|
||||
#是否开启华为云短信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-hwdx=0
|
||||
#华为云短信接口key
|
||||
HWY_DX_APP_Key=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#华为云短信接口Secret
|
||||
HWY_DX_APP_Secret=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#华为云APP接入地址(端口接口地址)
|
||||
HWY_DX_APP_Url=https://rtcsms.cn-north-1.myhuaweicloud.com:10743
|
||||
#华为云短信模板ID
|
||||
HWY_DX_Templateid=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#华为云签名名称,必须是已审核通过的,与模板类型一致的签名名称,按照自己的实际签名填写
|
||||
HWY_DX_Signature=华为云
|
||||
#华为云签名通道号
|
||||
HWY_DX_Sender=xxxxxxxxxx
|
||||
|
||||
#---------------------↓阿里云接口-----------------------
|
||||
#是否开启阿里云短信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-alydx=0
|
||||
#阿里云短信主账号AccessKey的ID
|
||||
ALY_DX_AccessKeyId=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#阿里云短信接口密钥
|
||||
ALY_DX_AccessSecret=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#阿里云短信签名名称
|
||||
ALY_DX_SignName=阿里云
|
||||
#阿里云短信模板ID
|
||||
ALY_DX_Template=xxxxxxxxxxxxxxxxxxxxxx
|
||||
|
||||
#是否开启阿里云电话告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-alydh=0
|
||||
#阿里云电话主账号AccessKey的ID
|
||||
ALY_DH_AccessKeyId=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#阿里云电话接口密钥
|
||||
ALY_DH_AccessSecret=xxxxxxxxxxxxxxxxxxxxxx
|
||||
#阿里云电话被叫显号,必须是已购买的号码
|
||||
ALY_DX_CalledShowNumber=xxxxxxxxx
|
||||
#阿里云电话文本转语音(TTS)模板ID
|
||||
ALY_DH_TtsCode=xxxxxxxx
|
||||
|
||||
#---------------------↓容联云接口-----------------------
|
||||
#是否开启容联云电话告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-rlydh=0
|
||||
#容联云基础接口地址
|
||||
RLY_URL=https://app.cloopen.com:8883/2013-12-26/Accounts/
|
||||
#容联云后台SID
|
||||
RLY_ACCOUNT_SID=xxxxxxxxxxx
|
||||
#容联云api-token
|
||||
RLY_ACCOUNT_TOKEN=xxxxxxxxxx
|
||||
#容联云app_id
|
||||
RLY_APP_ID=xxxxxxxxxxxxx
|
||||
|
||||
#---------------------↓邮件配置-----------------------
|
||||
#是否开启邮件
|
||||
open-email=0
|
||||
#邮件发件服务器地址
|
||||
Email_host=smtp.qq.com
|
||||
#邮件发件服务器端口
|
||||
Email_port=465
|
||||
#邮件帐号
|
||||
Email_user=xxxxxxx@qq.com
|
||||
#邮件密码
|
||||
Email_password=xxxxxx
|
||||
#邮件标题
|
||||
Email_title=运维告警
|
||||
#默认发送邮箱
|
||||
Default_emails=xxxxx@qq.com,xxxxx@qq.com
|
||||
|
||||
#---------------------↓七陌云接口-----------------------
|
||||
#是否开启七陌短信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-7moordx=0
|
||||
#七陌账户ID
|
||||
7MOOR_ACCOUNT_ID=Nxxx
|
||||
#七陌账户APISecret
|
||||
7MOOR_ACCOUNT_APISECRET=xxx
|
||||
#七陌账户短信模板编号
|
||||
7MOOR_DX_TEMPLATENUM=n
|
||||
#注意:七陌短信变量这里只用一个var1,在代码里写死了。
|
||||
#-----------
|
||||
#是否开启七陌webcall语音通知告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-7moordh=0
|
||||
#请在七陌平台添加虚拟服务号、文本节点
|
||||
#七陌账户webcall的虚拟服务号
|
||||
7MOOR_WEBCALL_SERVICENO=xxx
|
||||
# 文本节点里被替换的变量,我配置的是text。如果被替换的变量不是text,请修改此配置
|
||||
7MOOR_WEBCALL_VOICE_VAR=text
|
||||
|
||||
#---------------------↓telegram接口-----------------------
|
||||
#是否开启telegram告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-tg=0
|
||||
#tg机器人token
|
||||
TG_TOKEN=xxxxx
|
||||
#tg消息模式 个人消息或者频道消息 0为关闭(推送给个人),1为开启(推送给频道)
|
||||
TG_MODE_CHAN=0
|
||||
#tg用户ID
|
||||
TG_USERID=xxxxx
|
||||
#tg频道name或者id, 频道name需要以@开始
|
||||
TG_CHANNAME=xxxxx
|
||||
#tg api地址, 可以配置为代理地址
|
||||
#TG_API_PROXY="https://api.telegram.org/bot%s/%s"
|
||||
#TG_PARSE_MODE设置为 "1" ,启用Markdown
|
||||
TG_PARSE_MODE = "0"
|
||||
|
||||
#---------------------↓workwechat接口-----------------------
|
||||
#是否开启workwechat告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-workwechat=0
|
||||
# 企业ID
|
||||
WorkWechat_CropID=xxxxx
|
||||
# 应用ID
|
||||
WorkWechat_AgentID=xxxx
|
||||
# 应用secret
|
||||
WorkWechat_AgentSecret=xxxx
|
||||
# 接受用户
|
||||
WorkWechat_ToUser="zhangsan|lisi"
|
||||
# 接受部门
|
||||
WorkWechat_ToParty="ops|dev"
|
||||
# 接受标签
|
||||
WorkWechat_ToTag=""
|
||||
# 消息类型, 暂时只支持markdown
|
||||
# WorkWechat_Msgtype = "markdown"
|
||||
|
||||
#---------------------↓百度云接口-----------------------
|
||||
#是否开启百度云短信告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-baidudx=0
|
||||
#百度云短信接口AK(ACCESS_KEY_ID)
|
||||
BDY_DX_AK=xxxxx
|
||||
#百度云短信接口SK(SECRET_ACCESS_KEY)
|
||||
BDY_DX_SK=xxxxx
|
||||
#百度云短信ENDPOINT(ENDPOINT参数需要用指定区域的域名来进行定义,如服务所在区域为北京,则为)
|
||||
BDY_DX_ENDPOINT=http://smsv3.bj.baidubce.com
|
||||
#百度云短信模版ID,根据自己审核通过的模版来填写(模版支持一个参数code:如prometheus告警:{code})
|
||||
BDY_DX_TEMPLATE_ID=xxxxx
|
||||
#百度云短信签名ID,根据自己审核通过的签名来填写
|
||||
TXY_DX_SIGNATURE_ID=xxxxx
|
||||
|
||||
#---------------------↓百度Hi(如流)-----------------------
|
||||
#是否开启百度Hi(如流)告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-ruliu=0
|
||||
#默认百度Hi(如流)机器人地址
|
||||
BDRL_URL=https://api.im.baidu.com/api/msg/groupmsgsend?access_token=xxxxxxxxxxxxxx
|
||||
#百度Hi(如流)群ID
|
||||
BDRL_ID=123456
|
||||
#---------------------↓bark接口-----------------------
|
||||
#是否开启telegram告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-bark=0
|
||||
#bark默认地址, 建议自行部署bark-server
|
||||
BARK_URL=https://api.day.app
|
||||
#bark key, 多个key使用分割
|
||||
BARK_KEYS=xxxxx
|
||||
# 复制, 推荐开启
|
||||
BARK_COPY=1
|
||||
# 历史记录保存,推荐开启
|
||||
BARK_ARCHIVE=1
|
||||
# 消息分组
|
||||
BARK_GROUP=PrometheusAlert
|
||||
|
||||
#---------------------↓语音播报-----------------------
|
||||
#语音播报需要配合语音播报插件才能使用
|
||||
#是否开启语音播报通道,0为关闭,1为开启
|
||||
open-voice=1
|
||||
VOICE_IP=127.0.0.1
|
||||
VOICE_PORT=9999
|
||||
|
||||
#---------------------↓飞书机器人应用-----------------------
|
||||
#是否开启feishuapp告警通道,可同时开始多个通道0为关闭,1为开启
|
||||
open-feishuapp=1
|
||||
# APPID
|
||||
FEISHU_APPID=cli_xxxxxxxxxxxxx
|
||||
# APPSECRET
|
||||
FEISHU_APPSECRET=xxxxxxxxxxxxxxxxxxxxxx
|
||||
# 可填飞书 用户open_id、user_id、union_ids、部门open_department_id
|
||||
AT_USER_ID="xxxxxxxx"
|
||||
|
||||
|
||||
#---------------------↓告警组-----------------------
|
||||
# 有其他新增的配置段,请放在告警组的上面
|
||||
# 暂时仅针对 PrometheusContronller 中的 /prometheus/alert 路由
|
||||
# 告警组如果放在了 wx, dd... 那部分的上分,beego section 取 url 值不太对。
|
||||
# 所以这里使用 include 来包含另告警组配置
|
||||
|
||||
# 是否启用告警组功能
|
||||
open-alertgroup=0
|
||||
|
||||
# 自定义的告警组既可以写在这里,也可以写在单独的文件里。
|
||||
# 写在单独的告警组配置里更便于修改。
|
||||
# include "alertgroup.conf"
|
||||
|
||||
#---------------------↓kafka地址-----------------------
|
||||
# kafka服务器的地址
|
||||
open-kafka=1
|
||||
kafka_server = 127.0.0.1:9092
|
||||
# 写入消息的kafka topic
|
||||
kafka_topic = devops
|
||||
# 用户标记该消息是来自PrometheusAlert,一般无需修改
|
||||
kafka_key = PrometheusAlert
|
||||
283
prometheus/prometheus/config/prometheus.yml
Normal file
283
prometheus/prometheus/config/prometheus.yml
Normal file
@@ -0,0 +1,283 @@
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: ['172.24.16.20:9093']
|
||||
|
||||
rule_files:
|
||||
- "./reles/*.yml"
|
||||
|
||||
scrape_configs:
|
||||
# 运维服务器================================
|
||||
- job_name: 'ops-server_node'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.20:9100']
|
||||
labels:
|
||||
instance: ops-server
|
||||
|
||||
- job_name: 'ops-server_prometheus'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.20:9090']
|
||||
labels:
|
||||
instance: ops-server
|
||||
# 运维服务器================================
|
||||
|
||||
|
||||
# 国内测试机器==============================
|
||||
- job_name: 'app-test-server_node'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.13:9100']
|
||||
labels:
|
||||
instance: app-test-server
|
||||
|
||||
- job_name: 'test-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.13:9256']
|
||||
labels:
|
||||
instance: app-test-server
|
||||
# 国内测试机器==============================
|
||||
|
||||
|
||||
# 国内生产服务器01==========================
|
||||
- job_name: 'app-prod-server_node01'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.10:9100']
|
||||
labels:
|
||||
instance: app-prod-server_01
|
||||
|
||||
- job_name: 'prod01-server-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.10:9256']
|
||||
labels:
|
||||
instance: app-prod-server_01
|
||||
# 国内生产服务器01==========================
|
||||
|
||||
|
||||
# 国内生产服务器02==========================
|
||||
- job_name: 'app-prod-server_node02'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.7:9100']
|
||||
labels:
|
||||
instance: app-prod-server_02
|
||||
|
||||
- job_name: 'prod02-server-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['172.24.16.7:9256']
|
||||
labels:
|
||||
instance: app-prod-server_02
|
||||
# 国内生产服务器02==========================
|
||||
|
||||
|
||||
# 海外服务器01==============================
|
||||
- job_name: 'webdrive-server_node'
|
||||
static_configs:
|
||||
- targets: ['43.159.145.241:9100']
|
||||
labels:
|
||||
instance: webdrive-server
|
||||
|
||||
- job_name: 'webdrive-server-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.159.145.241:9256']
|
||||
labels:
|
||||
instance: webdrive-server
|
||||
# 海外服务器01==============================
|
||||
|
||||
|
||||
# 海外服务器02==============================
|
||||
- job_name: 'website-server_node'
|
||||
static_configs:
|
||||
- targets: ['49.51.46.148:9100']
|
||||
labels:
|
||||
instance: website-server
|
||||
|
||||
- job_name: 'website-server-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['49.51.46.148:9256']
|
||||
labels:
|
||||
instance: website-server
|
||||
# 海外服务器02==============================
|
||||
|
||||
|
||||
# 海外服务器03==============================
|
||||
- job_name: 'weblessie-server1_node'
|
||||
static_configs:
|
||||
- targets: ['43.130.56.138:9100']
|
||||
labels:
|
||||
instance: weblessie-server1
|
||||
|
||||
- job_name: 'weblessie-server1-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.130.56.138:9256']
|
||||
labels:
|
||||
instance: weblessie-server1
|
||||
# 海外服务器03==============================
|
||||
|
||||
# 海外服务器04==============================
|
||||
- job_name: 'prod-lessie-server5_node'
|
||||
static_configs:
|
||||
- targets: ['43.130.53.202:9100']
|
||||
labels:
|
||||
instance: prod-lessie-server5
|
||||
- job_name: 'prod-lessie-server5-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.130.53.202:9256']
|
||||
labels:
|
||||
instance: prod-lessie-server5
|
||||
# 海外服务器04==============================
|
||||
|
||||
# 海外服务器05==============================
|
||||
- job_name: 'prod-lessie-server1_node'
|
||||
static_configs:
|
||||
- targets: ['43.130.59.68:9100']
|
||||
labels:
|
||||
instance: prod-lessie-server1
|
||||
|
||||
- job_name: 'prod-lessie-server1-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.130.59.68:9256']
|
||||
labels:
|
||||
instance: prod-lessie-server1
|
||||
# 海外服务器05==============================
|
||||
|
||||
# 海外服务器06==============================
|
||||
- job_name: 'prod-lessie-server2_node'
|
||||
static_configs:
|
||||
- targets: ['43.173.126.43:9100']
|
||||
labels:
|
||||
instance: prod-lessie-server2
|
||||
|
||||
- job_name: 'prod-lessie-server2-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.173.126.43:9256']
|
||||
labels:
|
||||
instance: prod-lessie-server2
|
||||
# 海外服务器06==============================
|
||||
|
||||
# 海外服务器07==============================
|
||||
- job_name: 'prod-lessie-server3_node'
|
||||
static_configs:
|
||||
- targets: ['49.51.189.136:9100']
|
||||
labels:
|
||||
instance: prod-lessie-server3
|
||||
|
||||
- job_name: 'prod-lessie-server3-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['49.51.189.136:9256']
|
||||
labels:
|
||||
instance: prod-lessie-server3
|
||||
# 海外服务器07==============================
|
||||
|
||||
|
||||
# 海外服务器08==============================
|
||||
- job_name: prod-lessie-server4_node
|
||||
static_configs:
|
||||
- targets: ['170.106.187.156:9100']
|
||||
labels:
|
||||
instance: prod-lessie-server4
|
||||
|
||||
- job_name: 'prod-lessie-server4-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['170.106.187.156:9256']
|
||||
labels:
|
||||
instance: prod-lessie-server4
|
||||
# 海外服务器08==============================
|
||||
|
||||
|
||||
# 海外服务器us-prod-01======================
|
||||
- job_name: 'us-prod-01_node'
|
||||
static_configs:
|
||||
- targets: ['43.153.21.64:9100']
|
||||
labels:
|
||||
instance: us-prod-01
|
||||
|
||||
- job_name: 'us-prod-01-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.153.21.64:9256']
|
||||
labels:
|
||||
instance: us-prod-01
|
||||
# 海外服务器us-prod-01======================
|
||||
|
||||
|
||||
# 海外服务器us-prod-02======================
|
||||
- job_name: 'us-prod-02_node'
|
||||
static_configs:
|
||||
- targets: ['43.153.98.191:9100']
|
||||
labels:
|
||||
instance: us-prod-02
|
||||
|
||||
- job_name: 'us-prod-02-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['43.153.98.191:9256']
|
||||
labels:
|
||||
instance: us-prod-02
|
||||
# 海外服务器us-prod-02======================
|
||||
|
||||
|
||||
# 海外服务器us-prod-03======================
|
||||
- job_name: 'us-prod-03_node'
|
||||
static_configs:
|
||||
- targets: ['49.51.41.243:9100']
|
||||
labels:
|
||||
instance: us-prod-03
|
||||
|
||||
- job_name: 'us-prod-03-process-exporter'
|
||||
static_configs:
|
||||
- targets: ['49.51.41.243:9256']
|
||||
labels:
|
||||
instance: us-prod-03
|
||||
# 海外服务器us-prod-03======================
|
||||
|
||||
|
||||
# 自建mongodb的机器=========================
|
||||
- job_name: 'mongodb_node'
|
||||
static_configs:
|
||||
- targets: ['170.106.187.49:9100']
|
||||
labels:
|
||||
instance: mongodb-server
|
||||
|
||||
- job_name: 'mongodb_process'
|
||||
static_configs:
|
||||
- targets: ['170.106.187.49:9256']
|
||||
labels:
|
||||
instance: mongodb-server
|
||||
|
||||
- job_name: 'mongodb_exporter'
|
||||
static_configs:
|
||||
- targets: ['170.106.187.49:9216']
|
||||
labels:
|
||||
instance: mongodb-server
|
||||
# 自建mongodb==============================
|
||||
|
||||
|
||||
# 国内nacos================================
|
||||
- job_name: 'nacos-cluster'
|
||||
metrics_path: '/actuator/prometheus'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '172.24.16.10:8081'
|
||||
- '172.24.16.7:8081'
|
||||
- '172.24.16.13:8081'
|
||||
labels:
|
||||
instance: nacos-cluster
|
||||
# 国内nacos================================
|
||||
|
||||
|
||||
# 本地存储日志的es机器======================
|
||||
- job_name: 'elasticsearch'
|
||||
static_configs:
|
||||
- targets: ['192.168.70.16:9100']
|
||||
labels:
|
||||
instance: es-server
|
||||
# 本地存储日志的es机器======================
|
||||
|
||||
|
||||
# 本地的sit机器============================
|
||||
- job_name: 'sit-server_node'
|
||||
static_configs:
|
||||
- targets: ['192.168.70.18:9100']
|
||||
labels:
|
||||
instance: sit-server
|
||||
# 本地的sit机器============================
|
||||
78
prometheus/prometheus/config/reles/node_rules.yml
Normal file
78
prometheus/prometheus/config/reles/node_rules.yml
Normal file
@@ -0,0 +1,78 @@
|
||||
# 服务器资源告警策略
|
||||
groups:
|
||||
- name: 服务器资源监控
|
||||
rules:
|
||||
- alert: 内存使用率过高
|
||||
expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 90
|
||||
for: 1m # 告警持续时间,超过这个时间才会发送给alertmanager
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
|
||||
description: "{{ $labels.instance }}内存使用率超过95%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 服务器宕机
|
||||
expr: up{job=~".*_node.*"} == 0
|
||||
for: 10s
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 服务器宕机超过1分钟!"
|
||||
description: "{{$labels.instance}} 服务器已宕机。"
|
||||
|
||||
- alert: CPU高负荷
|
||||
expr: 100 - (avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} CPU使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} CPU使用大于90%,当前使用率{{ $value }}%. "
|
||||
|
||||
- alert: 磁盘IO性能
|
||||
expr: avg(irate(node_disk_io_time_seconds_total[1m])) by(instance,job)* 100 > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入磁盘IO大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
- alert: 网络流入
|
||||
expr: ((sum(rate (node_network_receive_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入网络带宽持续1分钟高于100M. RX带宽使用量{{$value}}."
|
||||
|
||||
- alert: 网络流出
|
||||
expr: ((sum(rate (node_network_transmit_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流出网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流出网络带宽持15分钟高于100M. RX带宽使用量{$value}}."
|
||||
|
||||
- alert: TCP连接数
|
||||
expr: node_netstat_Tcp_CurrEstab > 10000
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: " TCP_ESTABLISHED过高!"
|
||||
description: "{{$labels.instance}} TCP_ESTABLISHED大于100%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 磁盘容量
|
||||
expr: 100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes {fstype=~"ext4|xfs"}*100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.mountpoint}} 磁盘分区使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 磁盘分区使用大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
78
prometheus/prometheus/config/reles/process_rules.yml
Normal file
78
prometheus/prometheus/config/reles/process_rules.yml
Normal file
@@ -0,0 +1,78 @@
|
||||
# 服务器资源告警策略
|
||||
groups:
|
||||
- name: 服务器资源监控
|
||||
rules:
|
||||
- alert: 内存使用率过高
|
||||
expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 90
|
||||
for: 1m # 告警持续时间,超过这个时间才会发送给alertmanager
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
|
||||
description: "{{ $labels.instance }}内存使用率超过95%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 服务器宕机
|
||||
expr: up == 0
|
||||
for: 10s
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 服务器宕机超过1分钟!"
|
||||
description: "{{$labels.instance}} 服务器已宕机。"
|
||||
|
||||
- alert: CPU高负荷
|
||||
expr: 100 - (avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} CPU使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} CPU使用大于90%,当前使用率{{ $value }}%. "
|
||||
|
||||
- alert: 磁盘IO性能
|
||||
expr: avg(irate(node_disk_io_time_seconds_total[1m])) by(instance,job)* 100 > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入磁盘IO大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
- alert: 网络流入
|
||||
expr: ((sum(rate (node_network_receive_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入网络带宽持续1分钟高于100M. RX带宽使用量{{$value}}."
|
||||
|
||||
- alert: 网络流出
|
||||
expr: ((sum(rate (node_network_transmit_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流出网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流出网络带宽持15分钟高于100M. RX带宽使用量{$value}}."
|
||||
|
||||
- alert: TCP连接数
|
||||
expr: node_netstat_Tcp_CurrEstab > 10000
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: " TCP_ESTABLISHED过高!"
|
||||
description: "{{$labels.instance}} TCP_ESTABLISHED大于100%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 磁盘容量
|
||||
expr: 100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes {fstype=~"ext4|xfs"}*100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.mountpoint}} 磁盘分区使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 磁盘分区使用大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
78
prometheus/prometheus/config/rules.yml
Normal file
78
prometheus/prometheus/config/rules.yml
Normal file
@@ -0,0 +1,78 @@
|
||||
# 服务器资源告警策略
|
||||
groups:
|
||||
- name: 服务器资源监控
|
||||
rules:
|
||||
- alert: 内存使用率过高
|
||||
expr: 100 - (node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 > 90
|
||||
for: 1m # 告警持续时间,超过这个时间才会发送给alertmanager
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
|
||||
description: "{{ $labels.instance }}内存使用率超过90%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 服务器宕机
|
||||
expr: up == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 服务器宕机,请尽快处理!"
|
||||
description: "{{$labels.instance}} 服务器延时超过1分钟,当前状态{{ $value }}. "
|
||||
|
||||
- alert: CPU高负荷
|
||||
expr: 100 - (avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} CPU使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} CPU使用大于90%,当前使用率{{ $value }}%. "
|
||||
|
||||
- alert: 磁盘IO性能
|
||||
expr: avg(irate(node_disk_io_time_seconds_total[1m])) by(instance,job)* 100 > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入磁盘IO大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
- alert: 网络流入
|
||||
expr: ((sum(rate (node_network_receive_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流入网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流入网络带宽持续1分钟高于100M. RX带宽使用量{{$value}}."
|
||||
|
||||
- alert: 网络流出
|
||||
expr: ((sum(rate (node_network_transmit_bytes_total{device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*'}[5m])) by (instance,job)) / 100) > 102400
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.instance}} 流出网络带宽过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 流出网络带宽持15分钟高于100M. RX带宽使用量{$value}}."
|
||||
|
||||
- alert: TCP连接数
|
||||
expr: node_netstat_Tcp_CurrEstab > 10000
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: " TCP_ESTABLISHED过高!"
|
||||
description: "{{$labels.instance}} TCP_ESTABLISHED大于100%,当前使用率{{ $value }}%."
|
||||
|
||||
- alert: 磁盘容量
|
||||
expr: 100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes {fstype=~"ext4|xfs"}*100) > 90
|
||||
for: 1m
|
||||
labels:
|
||||
severity: 严重告警
|
||||
annotations:
|
||||
summary: "{{$labels.mountpoint}} 磁盘分区使用率过高,请尽快处理!"
|
||||
description: "{{$labels.instance}} 磁盘分区使用大于90%,当前使用率{{ $value }}%."
|
||||
|
||||
|
||||
196
prometheus/安装exporter
Normal file
196
prometheus/安装exporter
Normal file
@@ -0,0 +1,196 @@
|
||||
tar -zxvf node_exporter-1.8.2.linux-amd64.tar.gz
|
||||
|
||||
vim /etc/systemd/system/node_exporter.service
|
||||
|
||||
[Unit]
|
||||
Description=node_exporter Monitoring System
|
||||
Documentation=node_exporter Monitoring System
|
||||
[Service]
|
||||
ExecStart=/opt/exporter/node_exporter/node_exporter --web.listen-address=:9100
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
systemctl daemon-reexec
|
||||
systemctl daemon-reload
|
||||
|
||||
|
||||
systemctl start node_exporter
|
||||
systemctl status node_exporter
|
||||
|
||||
systemctl enable node_exporter
|
||||
|
||||
ss -ntl | grep 9100
|
||||
|
||||
curl http://localhost:9100/metrics
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------------------------------
|
||||
|
||||
# 下载 node_exporter
|
||||
cd /opt/
|
||||
wget https://github.com/prometheus/node_exporter/releases/download/v1.8.0/node_exporter-1.8.0.linux-amd64.tar.gz
|
||||
tar -xzf node_exporter-1.8.0.linux-amd64.tar.gz
|
||||
mv node_exporter-1.8.2.linux-amd64 node_exporter
|
||||
|
||||
|
||||
# systemd 管理----------
|
||||
sudo tee /etc/systemd/system/node_exporter.service > /dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Prometheus Node Exporter
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
User=root
|
||||
ExecStart=/opt/exporter/node_exporter/node_exporter
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
# systemd 管理----------
|
||||
sudo systemctl daemon-reexec
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl start node_exporter
|
||||
sudo systemctl status node_exporter
|
||||
sudo systemctl enable node_exporter
|
||||
|
||||
|
||||
# 启动服务
|
||||
nohup /opt/node_exporter/node_exporter > /opt/node_exporter/node_exporter.log 2>&1 &
|
||||
|
||||
默认监听 9100,确认防火墙放行或仅 Prometheus 网络能访问。
|
||||
|
||||
调试技巧
|
||||
cat /proc/1784152/cmdline | tr '\0' '\n'
|
||||
|
||||
安装 process_exporter(二进制)
|
||||
cd /opt/
|
||||
wget https://github.com/ncabatoff/process-exporter/releases/download/v0.7.10/process-exporter-0.7.10.linux-amd64.tar.gz
|
||||
tar -xzf process-exporter-0.7.10.linux-amd64.tar.gz
|
||||
mv process-exporter-0.8.5.linux-amd64 process-exporter
|
||||
|
||||
|
||||
sudo tee /etc/systemd/system/process_exporter.service > /dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Prometheus Process Exporter
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/exporter/process-exporter/process-exporter --config.path=/opt/exporter/process-exporter/process-exporter.yml
|
||||
Restart=always
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl start process_exporter
|
||||
sudo systemctl status process_exporter
|
||||
sudo systemctl enable process_exporter
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
安装 mongodb_exporter
|
||||
cd /opt
|
||||
wget https://github.com/percona/mongodb_exporter/releases/download/v0.40.0/mongodb_exporter-0.40.0.linux-amd64.tar.gz
|
||||
tar -xzf mongodb_exporter-0.40.0.linux-amd64.tar.gz
|
||||
mv mongodb_exporter-0.40.0.linux-amd64 /opt/mongodb_exporter
|
||||
|
||||
|
||||
use admin
|
||||
db.createUser({
|
||||
user: "prometheus",
|
||||
pwd: "StrongPassword",
|
||||
roles: [ { role: "clusterMonitor", db: "admin" } ]
|
||||
})
|
||||
|
||||
|
||||
[Unit]
|
||||
Description=Prometheus MongoDB Exporter
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/exporter/mongodb_exporter/mongodb_exporter \
|
||||
--mongodb.uri=mongodb://prometheus:StrongPassword@localhost:27017/admin
|
||||
Restart=on-failure
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
|
||||
sudo tee /etc/systemd/system/mongodb_exporter.service > /dev/null <<EOF
|
||||
[Unit]
|
||||
Description=Prometheus MongoDB Exporter
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/exporter/mongodb_exporter/mongodb_exporter \
|
||||
--mongodb.uri=mongodb://admin:Ud4G8sty6BK@localhost:27017/admin \
|
||||
Restart=on-failure
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
|
||||
# 请将 mongodb://myuser:mypassword@localhost:27017/admin 替换为你的实际账号密码和数据库地址。
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart mongodb_exporter
|
||||
sudo systemctl start mongodb_exporter
|
||||
sudo systemctl status mongodb_exporter
|
||||
sudo systemctl enable mongodb_exporter
|
||||
|
||||
|
||||
[Unit]
|
||||
Description=Prometheus MongoDB Exporter
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/exporter/mongodb_exporter/mongodb_exporter --mongodb.uri=mongodb://admin:Ud4G8sty6BK@localhost:27017/admin --compatible-mode
|
||||
Restart=on-failure
|
||||
User=root
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
|
||||
# 启动
|
||||
/opt/mongodb_exporter/mongodb_exporter \
|
||||
--mongodb.uri="mongodb://<user>:<password>@localhost:27017/admin" &
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
use admin
|
||||
|
||||
// 获取总连接数信息
|
||||
var connStatus = db.serverStatus().connections;
|
||||
print("总连接数 current:", connStatus.current, "可用 available:", connStatus.available);
|
||||
|
||||
// 统计每个客户端 IP 当前连接数
|
||||
|
||||
db.adminCommand({
|
||||
aggregate: 1,
|
||||
pipeline: [
|
||||
{ $currentOp: { allUsers: true, localOps: true } },
|
||||
{ $match: { client: { $exists: true } } },
|
||||
{ $project: { ip: { $arrayElemAt: [ { $split: ["$client", ":"] }, 0 ] } } },
|
||||
{ $group: { _id: "$ip", count: { $sum: 1 } } },
|
||||
{ $sort: { count: -1 } }
|
||||
],
|
||||
cursor: {}
|
||||
}).cursor.firstBatch.forEach(doc => printjson(doc));
|
||||
Reference in New Issue
Block a user