改改改
This commit is contained in:
29
OpenTelemetry/1.txt
Normal file
29
OpenTelemetry/1.txt
Normal file
@@ -0,0 +1,29 @@
|
||||
阶段 1:仅部署指标采集(当前目标)
|
||||
部署内容:
|
||||
创建monitoring命名空间 + Collector 的 RBAC 权限;
|
||||
部署 DaemonSet Collector(仅配置hostmetrics/kubeletstats接收器,采集节点 / 容器指标);
|
||||
部署 Deployment Collector(仅配置otlp接收器 + prometheusremotewrite导出器,转发指标到 Prometheus);
|
||||
核心配置模块:
|
||||
plaintext
|
||||
receivers: hostmetrics、kubeletstats、otlp
|
||||
processors: batch、resource
|
||||
exporters: prometheusremotewrite
|
||||
pipelines: metrics(关联上述接收器/处理器/导出器)
|
||||
|
||||
阶段 2:新增日志采集(基于阶段 1 扩展)
|
||||
无冲突操作:
|
||||
仅更新 DaemonSet Collector 的 ConfigMap:新增filelog接收器(配置日志采集路径),并在pipelines中新增logs流水线;
|
||||
仅更新 Deployment Collector 的 ConfigMap:新增elasticsearch导出器,并在pipelines中新增logs流水线;
|
||||
重启 DaemonSet/Deployment Collector Pod(配置热更新);
|
||||
核心逻辑:
|
||||
日志采集依赖 DaemonSet 挂载宿主机日志目录(仅需在 DaemonSet 的 Pod 配置中新增 volume 挂载,不影响原有指标采集);
|
||||
日志的filelog接收器、elasticsearch导出器与指标的模块完全独立,互不干扰;
|
||||
|
||||
阶段 3:新增追踪采集(基于阶段 1+2 扩展)
|
||||
无冲突操作:
|
||||
无需修改 DaemonSet Collector(追踪无需节点级采集);
|
||||
仅更新 Deployment Collector 的 ConfigMap:新增otlp/tempo导出器,并在pipelines中新增traces流水线;
|
||||
重启 Deployment Collector Pod;
|
||||
核心逻辑:
|
||||
追踪仅需 Deployment Collector 暴露 4317/4318 端口(阶段 1 已配置otlp接收器,无需新增);
|
||||
追踪的otlp/tempo导出器与指标 / 日志的模块完全独立,仅新增traces流水线即可。
|
||||
56
OpenTelemetry/Collector /01-otel-rbac.yaml
Normal file
56
OpenTelemetry/Collector /01-otel-rbac.yaml
Normal file
@@ -0,0 +1,56 @@
|
||||
# 1. 创建monitoring命名空间
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
labels:
|
||||
name: monitoring
|
||||
|
||||
---
|
||||
# 2. 创建ServiceAccount
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-collector
|
||||
namespace: monitoring
|
||||
|
||||
---
|
||||
# 3. 创建ClusterRole(最小权限)
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-collector-role
|
||||
rules:
|
||||
# 读取节点/Pod/服务元数据(基础权限)
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes", "pods", "services", "endpoints", "nodes/metrics", "nodes/stats"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
# 后续增加
|
||||
# # 新增:采集Deployment/DaemonSet/StatefulSet(apps API组)
|
||||
# - apiGroups: ["apps"]
|
||||
# resources: ["deployments", "daemonsets", "statefulsets", "replicasets"]
|
||||
# verbs: ["get", "list", "watch"]
|
||||
# # 新增:采集HPA(autoscaling API组)
|
||||
# - apiGroups: ["autoscaling"]
|
||||
# resources: ["horizontalpodautoscalers"]
|
||||
# verbs: ["get", "list", "watch"]
|
||||
# # 新增:采集k8s事件(可选,用于故障排查)
|
||||
# - apiGroups: [""]
|
||||
# resources: ["events"]
|
||||
# verbs: ["get", "list", "watch"]
|
||||
|
||||
---
|
||||
# 4. 绑定ClusterRole到ServiceAccount
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-collector-binding
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-collector
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: otel-collector-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
64
OpenTelemetry/Collector /02-otel-collector-ConfigMap.yaml
Normal file
64
OpenTelemetry/Collector /02-otel-collector-ConfigMap.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: otel-collector-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.yaml: |
|
||||
# 全局配置
|
||||
receivers:
|
||||
# 1. 节点级指标采集(仅DaemonSet生效)
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
scrapers:
|
||||
cpu: {}
|
||||
memory: {}
|
||||
disk: {}
|
||||
filesystem: {}
|
||||
network: {}
|
||||
load: {}
|
||||
processes: {}
|
||||
# 2. 容器级指标采集(仅DaemonSet生效,修复kubeletstats配置)
|
||||
kubeletstats:
|
||||
collection_interval: 30s
|
||||
auth_type: "serviceAccount"
|
||||
endpoint: "https://${K8S_NODE_NAME}:10250"
|
||||
insecure_skip_verify: true
|
||||
# 3. OTLP接收器(DaemonSet/Deployment都生效)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch: {}
|
||||
resource:
|
||||
attributes:
|
||||
- key: k8s.cluster.name
|
||||
value: test-k8s
|
||||
action: insert
|
||||
- key: k8s.node.name
|
||||
from_attribute: host.name
|
||||
action: insert
|
||||
|
||||
exporters:
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://10.0.0.38:9090/api/v1/write"
|
||||
external_labels:
|
||||
k8s_cluster: test-k8s
|
||||
|
||||
# 核心修复:service配置
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [hostmetrics, kubeletstats, otlp]
|
||||
processors: [batch, resource]
|
||||
exporters: [prometheusremotewrite]
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
metrics:
|
||||
endpoint: 0.0.0.0:8888
|
||||
collection_interval: 60s
|
||||
57
OpenTelemetry/Collector /03-otel-collector-daemonset.yaml
Normal file
57
OpenTelemetry/Collector /03-otel-collector-daemonset.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
# 部署 DaemonSet(节点级指标采集)
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: otel-collector-daemonset
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: otel-collector-daemonset
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: otel-collector-daemonset
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: otel-collector-daemonset
|
||||
spec:
|
||||
serviceAccountName: otel-collector
|
||||
hostNetwork: false # 无需主机网络
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
args: ["--config=/etc/otel-collector/config.yaml"]
|
||||
# 挂载宿主机目录(采集节点指标)
|
||||
volumeMounts:
|
||||
- name: otel-config
|
||||
mountPath: /etc/otel-collector
|
||||
- name: proc
|
||||
mountPath: /proc
|
||||
readOnly: true
|
||||
- name: sys
|
||||
mountPath: /sys
|
||||
readOnly: true
|
||||
- name: rootfs
|
||||
mountPath: /rootfs
|
||||
readOnly: true
|
||||
# 资源限制(按需调整)
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
volumes:
|
||||
- name: otel-config
|
||||
configMap:
|
||||
name: otel-collector-config
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
- name: sys
|
||||
hostPath:
|
||||
path: /sys
|
||||
- name: rootfs
|
||||
hostPath:
|
||||
path: /
|
||||
43
OpenTelemetry/Collector /04-otel-collector-deployment.yaml
Normal file
43
OpenTelemetry/Collector /04-otel-collector-deployment.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
# 部署 Deployment(集群级聚合转发)
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: otel-collector-deployment
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: otel-collector-deployment
|
||||
spec:
|
||||
replicas: 1 # 测试环境单副本,生产可扩为2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: otel-collector-deployment
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: otel-collector-deployment
|
||||
spec:
|
||||
serviceAccountName: otel-collector
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
args: ["--config=/etc/otel-collector/config.yaml"]
|
||||
volumeMounts:
|
||||
- name: otel-config
|
||||
mountPath: /etc/otel-collector
|
||||
# 暴露端口
|
||||
ports:
|
||||
- containerPort: 4317 # OTLP gRPC
|
||||
- containerPort: 4318 # OTLP HTTP
|
||||
- containerPort: 8888 # 自身监控
|
||||
# 资源限制
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
volumes:
|
||||
- name: otel-config
|
||||
configMap:
|
||||
name: otel-collector-config
|
||||
57
OpenTelemetry/Collector /config.yaml
Normal file
57
OpenTelemetry/Collector /config.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
# 全局配置
|
||||
receivers:
|
||||
# 1. 节点级指标采集(仅DaemonSet生效)
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
scrapers:
|
||||
cpu: {}
|
||||
memory: {}
|
||||
disk: {}
|
||||
filesystem: {}
|
||||
network: {}
|
||||
load: {}
|
||||
processes: {}
|
||||
# 2. 容器级指标采集(仅DaemonSet生效,修复kubeletstats配置)
|
||||
kubeletstats:
|
||||
collection_interval: 30s
|
||||
auth_type: "serviceAccount"
|
||||
endpoint: "https://${K8S_NODE_NAME}:10250"
|
||||
insecure_skip_verify: true
|
||||
# 3. OTLP接收器(DaemonSet/Deployment都生效)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch: {}
|
||||
resource:
|
||||
attributes:
|
||||
- key: k8s.cluster.name
|
||||
value: test-k8s
|
||||
action: insert
|
||||
- key: k8s.node.name
|
||||
from_attribute: host.name
|
||||
action: insert
|
||||
|
||||
exporters:
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://10.0.0.38:9090/api/v1/write"
|
||||
external_labels:
|
||||
k8s_cluster: test-k8s
|
||||
|
||||
# 核心修复:service配置
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [hostmetrics, kubeletstats, otlp]
|
||||
processors: [batch, resource]
|
||||
exporters: [prometheusremotewrite]
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
metrics:
|
||||
endpoint: 0.0.0.0:8888
|
||||
collection_interval: 60s
|
||||
73
OpenTelemetry/ES/filebast/01-filebeat-serviceaccount.yaml
Normal file
73
OpenTelemetry/ES/filebast/01-filebeat-serviceaccount.yaml
Normal file
@@ -0,0 +1,73 @@
|
||||
# 定义 Filebeat 的服务账户(ServiceAccount)
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: filebeat # 服务账户名称
|
||||
namespace: kube-system # 所在命名空间
|
||||
labels:
|
||||
k8s-app: filebeat # 标签,标识这是 Filebeat 应用
|
||||
---
|
||||
# 定义 Filebeat 的集群角色(ClusterRole),授予集群范围的权限
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: filebeat # 集群角色名称
|
||||
labels:
|
||||
k8s-app: filebeat # 标签
|
||||
rules:
|
||||
# 授予对 namespaces, pods, nodes 资源的 get, list, watch 权限
|
||||
- apiGroups: [""]
|
||||
resources: ["namespaces", "pods", "nodes"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# 授予对 ReplicaSets 的 get, list, watch 权限
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["replicasets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# 授予对 Jobs 的 get, list, watch 权限
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
# 定义 Filebeat 的角色(Role),授予命名空间范围的权限
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: filebeat # 角色名称
|
||||
namespace: kube-system # 作用命名空间
|
||||
labels:
|
||||
k8s-app: filebeat # 标签
|
||||
rules:
|
||||
# 授予对 leases 资源的 get, create, update 权限
|
||||
# Leases 用于协调和领导者选举
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
verbs: ["get", "create", "update"]
|
||||
---
|
||||
# 将 Filebeat 的服务账户与集群角色绑定(ClusterRoleBinding)
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: filebeat # 绑定名称
|
||||
subjects:
|
||||
- kind: ServiceAccount # 主体类型为服务账户
|
||||
name: filebeat # 服务账户名称
|
||||
namespace: kube-system # 服务账户所在命名空间
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole # 引用的角色类型
|
||||
name: filebeat # 引用的角色名称
|
||||
---
|
||||
# 将 Filebeat 的服务账户与角色绑定(RoleBinding)
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: filebeat # 绑定名称
|
||||
namespace: kube-system # 作用命名空间
|
||||
subjects:
|
||||
- kind: ServiceAccount # 主体类型为服务账户
|
||||
name: filebeat # 服务账户名称
|
||||
namespace: kube-system # 服务账户所在命名空间
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role # 引用的角色类型
|
||||
name: filebeat # 引用的角色名称
|
||||
233
OpenTelemetry/ES/filebast/02-filebeat-configmap.yaml
Normal file
233
OpenTelemetry/ES/filebast/02-filebeat-configmap.yaml
Normal file
@@ -0,0 +1,233 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: filebeat-config
|
||||
namespace: kube-system
|
||||
data:
|
||||
filebeat.yml: |
|
||||
setup.ilm.enabled: false
|
||||
setup.template.enabled: false
|
||||
|
||||
filebeat.autodiscover:
|
||||
providers:
|
||||
- type: kubernetes
|
||||
templates:
|
||||
# ---------- ↓ json格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- regexp:
|
||||
kubernetes.namespace: "^(sit|apex-evaluation)$"
|
||||
- regexp:
|
||||
kubernetes.labels.app: "^(lessie-go-api|apex)$"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- decode_json_fields:
|
||||
fields: ["message"]
|
||||
target: "mylog"
|
||||
overwrite_keys: true
|
||||
add_error_key: true
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.namespace_labels.kubernetes_io/metadata_name"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ json格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- or:
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-admin"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-agent"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-payment"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
|
||||
# ---------- ↑ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, email 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-email"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
# ---------- ↑ java语言的服务的Pod, email 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
# 第一层:仅解析符合时间戳开头的日志行(for业务告警的日志格式)
|
||||
- dissect:
|
||||
when:
|
||||
regexp:
|
||||
message: '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}.*'
|
||||
tokenizer: '%{timestamp} - %{level} - %{module} - %{function} - %{msg_body}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第二层:针对带有 [level: | event: | msg: | context:] 的日志,再做一次 dissect
|
||||
- dissect:
|
||||
when:
|
||||
contains:
|
||||
mylog.msg_body: "[level:"
|
||||
tokenizer: '[level: %{event_level} | event: %{event} | msg: %{msg} | context: %{ctx_raw}]'
|
||||
field: "mylog.msg_body"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第三层:把 ctx_raw 再拆成独立字段
|
||||
- script:
|
||||
lang: javascript
|
||||
id: parse_context
|
||||
source: >
|
||||
function process(event) {
|
||||
var ctx = event.Get("mylog.ctx_raw");
|
||||
if (!ctx) return;
|
||||
var parts = ctx.trim().split(",");
|
||||
for (var i = 0; i < parts.length; i++) {
|
||||
var pair = parts[i].split(":");
|
||||
if (pair.length === 2) {
|
||||
event.Put("mylog." + pair[0].trim(), pair[1].trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
# 第四层: 去除大量不需要的k8s元数据字段
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: apex-evaluation
|
||||
- equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
|
||||
# ---- 输出到 Elasticsearch ----
|
||||
output.elasticsearch:
|
||||
hosts: ["http://10.0.0.38:9200"]
|
||||
username: "admin"
|
||||
password: "G7ZSKFM4AQwHQpwA"
|
||||
|
||||
indices:
|
||||
- index: "k8s-%{[kubernetes.labels.environment]}-%{[kubernetes.labels.app]}-%{+yyyy.MM}"
|
||||
when:
|
||||
regexp:
|
||||
kubernetes.labels.app: "(lessie-go-api|flymoon-admin|flymoon-agent|flymoon-payment|flymoon-email|lessie-agents|apex)"
|
||||
|
||||
- index: "apex-python-%{+yyyy.MM}"
|
||||
when:
|
||||
equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
|
||||
logging.level: info
|
||||
logging.selectors: ["*"]
|
||||
233
OpenTelemetry/ES/filebast/022-filebeat-configmap.yaml
Normal file
233
OpenTelemetry/ES/filebast/022-filebeat-configmap.yaml
Normal file
@@ -0,0 +1,233 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: filebeat-config
|
||||
namespace: kube-system
|
||||
data:
|
||||
filebeat.yml: |
|
||||
setup.ilm.enabled: false
|
||||
setup.template.enabled: false
|
||||
|
||||
filebeat.autodiscover:
|
||||
providers:
|
||||
- type: kubernetes
|
||||
templates:
|
||||
# ---------- ↓ json格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- regexp:
|
||||
kubernetes.namespace: "^(sit|apex-evaluation)$"
|
||||
- regexp:
|
||||
kubernetes.labels.app: "^(lessie-go-api|apex)$"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- decode_json_fields:
|
||||
fields: ["message"]
|
||||
target: "mylog"
|
||||
overwrite_keys: true
|
||||
add_error_key: true
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.namespace_labels.kubernetes_io/metadata_name"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ json格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- or:
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-admin"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-agent"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-payment"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
|
||||
# ---------- ↑ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, email 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-email"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
# ---------- ↑ java语言的服务的Pod, email 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
# 第一层:仅解析符合时间戳开头的日志行(for业务告警的日志格式)
|
||||
- dissect:
|
||||
when:
|
||||
regexp:
|
||||
message: '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}.*'
|
||||
tokenizer: '%{timestamp} - %{level} - %{module} - %{function} - %{msg_body}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第二层:针对带有 [level: | event: | msg: | context:] 的日志,再做一次 dissect
|
||||
- dissect:
|
||||
when:
|
||||
contains:
|
||||
mylog.msg_body: "[level:"
|
||||
tokenizer: '[level: %{event_level} | event: %{event} | msg: %{msg} | context: %{ctx_raw}]'
|
||||
field: "mylog.msg_body"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第三层:把 ctx_raw 再拆成独立字段
|
||||
- script:
|
||||
lang: javascript
|
||||
id: parse_context
|
||||
source: >
|
||||
function process(event) {
|
||||
var ctx = event.Get("mylog.ctx_raw");
|
||||
if (!ctx) return;
|
||||
var parts = ctx.trim().split(",");
|
||||
for (var i = 0; i < parts.length; i++) {
|
||||
var pair = parts[i].split(":");
|
||||
if (pair.length === 2) {
|
||||
event.Put("mylog." + pair[0].trim(), pair[1].trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
# 第四层: 去除大量不需要的k8s元数据字段
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: apex-evaluation
|
||||
- equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
|
||||
# ---- 输出到 Elasticsearch ----
|
||||
output.elasticsearch:
|
||||
hosts: ["http://10.0.0.38:9200"]
|
||||
username: "admin"
|
||||
password: "G7ZSKFM4AQwHQpwA"
|
||||
|
||||
indices:
|
||||
- index: "k8s-%{[kubernetes.labels.environment]}-%{[kubernetes.labels.app]}-%{+yyyy.MM}"
|
||||
when:
|
||||
regexp:
|
||||
kubernetes.labels.app: "(lessie-go-api|flymoon-admin|flymoon-agent|flymoon-payment|flymoon-email|lessie-agents|apex)"
|
||||
|
||||
- index: "apex-python-%{+yyyy.MM}"
|
||||
when:
|
||||
equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
|
||||
logging.level: info
|
||||
logging.selectors: ["*"]
|
||||
65
OpenTelemetry/ES/filebast/03-filebeat-daemonset.yaml
Normal file
65
OpenTelemetry/ES/filebast/03-filebeat-daemonset.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# 滚动更新
|
||||
# kubectl rollout restart daemonset filebeat -n kube-system
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: filebeat
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: filebeat
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: filebeat
|
||||
spec:
|
||||
serviceAccountName: filebeat
|
||||
terminationGracePeriodSeconds: 30
|
||||
containers:
|
||||
- name: filebeat
|
||||
image: docker.elastic.co/beats/filebeat:9.2.2
|
||||
args:
|
||||
- "-e"
|
||||
env:
|
||||
- name: TZ
|
||||
value: Asia/Shanghai
|
||||
- name: NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
resources:
|
||||
limits:
|
||||
memory: 300Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 200Mi
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /usr/share/filebeat/filebeat.yml
|
||||
subPath: filebeat.yml
|
||||
- name: data
|
||||
mountPath: /var/lib/filebeat-data
|
||||
- name: containers
|
||||
mountPath: /var/log/containers
|
||||
readOnly: true
|
||||
- name: pods
|
||||
mountPath: /var/log/pods
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: filebeat-config
|
||||
- name: data
|
||||
hostPath:
|
||||
path: /var/lib/filebeat-data
|
||||
type: DirectoryOrCreate
|
||||
- name: containers
|
||||
hostPath:
|
||||
path: /var/log/containers
|
||||
- name: pods
|
||||
hostPath:
|
||||
path: /var/log/pods
|
||||
226
OpenTelemetry/ES/filebast/filebeat.yaml
Normal file
226
OpenTelemetry/ES/filebast/filebeat.yaml
Normal file
@@ -0,0 +1,226 @@
|
||||
setup.ilm.enabled: false
|
||||
setup.template.enabled: false
|
||||
|
||||
filebeat.autodiscover:
|
||||
providers:
|
||||
- type: kubernetes
|
||||
templates:
|
||||
# ---------- ↓ json格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- regexp:
|
||||
kubernetes.namespace: "^(sit|apex-evaluation)$"
|
||||
- regexp:
|
||||
kubernetes.labels.app: "^(lessie-go-api|apex)$"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- decode_json_fields:
|
||||
fields: ["message"]
|
||||
target: "mylog"
|
||||
overwrite_keys: true
|
||||
add_error_key: true
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.namespace_labels.kubernetes_io/metadata_name"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ json格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- or:
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-admin"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-agent"
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-payment"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}-\d{2}:\d{2}:\d{2}\.\d{3}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : [%{app_name->}] %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
|
||||
# ---------- ↑ java语言的服务的Pod, agnet\admin\payment 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ java语言的服务的Pod, email 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "flymoon-email"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
- multiline:
|
||||
type: pattern
|
||||
pattern: '^\d{4}-\d{2}-\d{2}'
|
||||
negate: true
|
||||
match: after
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
- dissect:
|
||||
tokenizer: '%{timestamp} %{level} %{pid} --- [%{thread}] %{class} : %{message}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
- drop_fields:
|
||||
fields: ["kubernetes.node.labels", "kubernetes.annotations"]
|
||||
ignore_missing: true
|
||||
# ---------- ↑ java语言的服务的Pod, email 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: sit
|
||||
- equals:
|
||||
kubernetes.labels.app: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- add_kubernetes_metadata:
|
||||
host: ${NODE_NAME}
|
||||
# 第一层:仅解析符合时间戳开头的日志行(for业务告警的日志格式)
|
||||
- dissect:
|
||||
when:
|
||||
regexp:
|
||||
message: '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}.*'
|
||||
tokenizer: '%{timestamp} - %{level} - %{module} - %{function} - %{msg_body}'
|
||||
field: "message"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第二层:针对带有 [level: | event: | msg: | context:] 的日志,再做一次 dissect
|
||||
- dissect:
|
||||
when:
|
||||
contains:
|
||||
mylog.msg_body: "[level:"
|
||||
tokenizer: '[level: %{event_level} | event: %{event} | msg: %{msg} | context: %{ctx_raw}]'
|
||||
field: "mylog.msg_body"
|
||||
target_prefix: "mylog"
|
||||
ignore_missing: true
|
||||
overwrite_keys: true
|
||||
# 第三层:把 ctx_raw 再拆成独立字段
|
||||
- script:
|
||||
lang: javascript
|
||||
id: parse_context
|
||||
source: >
|
||||
function process(event) {
|
||||
var ctx = event.Get("mylog.ctx_raw");
|
||||
if (!ctx) return;
|
||||
var parts = ctx.trim().split(",");
|
||||
for (var i = 0; i < parts.length; i++) {
|
||||
var pair = parts[i].split(":");
|
||||
if (pair.length === 2) {
|
||||
event.Put("mylog." + pair[0].trim(), pair[1].trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
# 第四层: 去除大量不需要的k8s元数据字段
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
# ---------- ↓ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↓ ----------
|
||||
- condition:
|
||||
and:
|
||||
- equals:
|
||||
kubernetes.namespace: apex-evaluation
|
||||
- equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
config:
|
||||
- type: filestream
|
||||
id: "container-${data.kubernetes.container.id}"
|
||||
prospector.scanner.symlinks: true
|
||||
close.on_state_change.removed: false
|
||||
parsers:
|
||||
- container: ~
|
||||
paths:
|
||||
- /var/log/containers/*-${data.kubernetes.container.id}.log
|
||||
processors:
|
||||
- drop_fields:
|
||||
fields:
|
||||
- "kubernetes.node.labels"
|
||||
- "kubernetes.annotations"
|
||||
ignore_missing: true
|
||||
# ---------- ↑ apex 动态创建的 python语言的agents服务的Pod, lessie-agents 项目自由文本格式日志 ↑ ----------
|
||||
|
||||
|
||||
|
||||
# ---- 输出到 Elasticsearch ----
|
||||
output.elasticsearch:
|
||||
hosts: ["http://10.0.0.38:9200"]
|
||||
username: "admin"
|
||||
password: "G7ZSKFM4AQwHQpwA"
|
||||
|
||||
indices:
|
||||
- index: "k8s-%{[kubernetes.labels.environment]}-%{[kubernetes.labels.app]}-%{+yyyy.MM.dd}"
|
||||
when:
|
||||
regexp:
|
||||
kubernetes.labels.app: "(lessie-go-api|flymoon-admin|flymoon-agent|flymoon-payment|flymoon-email|lessie-agents|apex)"
|
||||
|
||||
- index: "apex-python-%{+yyyy.MM.dd}"
|
||||
when:
|
||||
equals:
|
||||
kubernetes.labels.apex: "lessie-agents"
|
||||
|
||||
logging.level: info
|
||||
logging.selectors: ["*"]
|
||||
143
OpenTelemetry/ES/单节点/安装es.conf
Normal file
143
OpenTelemetry/ES/单节点/安装es.conf
Normal file
@@ -0,0 +1,143 @@
|
||||
# 前置 & 准备工作
|
||||
sudo dnf update -y
|
||||
sudo dnf install -y nano wget curl unzip
|
||||
|
||||
# 安全组防火墙开放9200端口、5601端口
|
||||
|
||||
# 安装 Elasticsearch 9.2.2
|
||||
# 导入官方 GPG key
|
||||
sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch
|
||||
|
||||
# 新建 yum repo 文件
|
||||
sudo tee /etc/yum.repos.d/elasticsearch.repo <<-'EOF'
|
||||
[elasticsearch]
|
||||
name=Elasticsearch repository for 9.x packages
|
||||
baseurl=https://artifacts.elastic.co/packages/9.x/yum
|
||||
gpgcheck=1
|
||||
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch
|
||||
enabled=1
|
||||
autorefresh=1
|
||||
type=rpm-md
|
||||
EOF
|
||||
|
||||
# 安装 Elasticsearch:
|
||||
sudo dnf install elasticsearch --enablerepo=elasticsearch
|
||||
|
||||
# 先不管直接启动、报错再查看日志,有可能是权限问题
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable elasticsearch
|
||||
sudo systemctl start elasticsearch
|
||||
sudo systemctl status elasticsearch
|
||||
sudo journalctl -u elasticsearch -f
|
||||
|
||||
# 手动创建日志目录 + 设置权限
|
||||
sudo mkdir -p /usr/share/elasticsearch/logs
|
||||
sudo chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/logs
|
||||
sudo chmod 750 /usr/share/elasticsearch/logs
|
||||
|
||||
# 设置 elastic 超级用户密码 (推荐立即设定):
|
||||
sudo /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
|
||||
|
||||
# 查看自签名证书,有则正常
|
||||
ll /etc/elasticsearch/certs/
|
||||
|
||||
# 查看 HTTP CA 证书指纹(用于其他客户端配置)
|
||||
sudo openssl x509 -fingerprint -sha256 -in /etc/elasticsearch/certs/http_ca.crt -noout
|
||||
|
||||
# 设置环境变量(替换为你的实际密码)
|
||||
export ELASTIC_PASSWORD='MyElastic123!'
|
||||
# 测试 HTTPS 请求(必须用 --cacert,因启用了 TLS)
|
||||
curl --cacert /etc/elasticsearch/certs/http_ca.crt \
|
||||
-u elastic:$ELASTIC_PASSWORD \
|
||||
https://localhost:9200
|
||||
|
||||
|
||||
# 查看默认的配置文件
|
||||
grep -v '^\s*#\|^\s*$' /etc/elasticsearch/elasticsearch.yml
|
||||
# 按实际情况修改配置文件集群名、非本地访问等
|
||||
cluster.name: my-test-es
|
||||
path.data: /var/lib/elasticsearch
|
||||
path.logs: /var/log/elasticsearch
|
||||
network.host: 0.0.0.0
|
||||
xpack.security.enabled: true
|
||||
xpack.security.enrollment.enabled: true
|
||||
xpack.security.http.ssl:
|
||||
enabled: true
|
||||
keystore.path: certs/http.p12
|
||||
xpack.security.transport.ssl:
|
||||
enabled: true
|
||||
verification_mode: certificate
|
||||
keystore.path: certs/transport.p12
|
||||
truststore.path: certs/transport.p12
|
||||
cluster.initial_master_nodes: ["weblessie-server-02"]
|
||||
http.host: 0.0.0.0
|
||||
|
||||
|
||||
# 更改es的jvm大小
|
||||
vim /etc/elasticsearch/jvm.options
|
||||
-Xms4g
|
||||
-Xmx4g
|
||||
|
||||
# 重启
|
||||
sudo systemctl restart elasticsearch
|
||||
|
||||
# 准备token,后续在Kibana中使用
|
||||
sudo /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
|
||||
|
||||
|
||||
# 准备安装 Kibana 9.2.2
|
||||
# 新建 repo /etc/yum.repos.d/kibana.repo
|
||||
sudo tee /etc/yum.repos.d/kibana.repo <<-'EOF'
|
||||
[kibana]
|
||||
name=Kibana repository for 9.x packages
|
||||
baseurl=https://artifacts.elastic.co/packages/9.x/yum
|
||||
gpgcheck=1
|
||||
gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch
|
||||
enabled=1
|
||||
autorefresh=1
|
||||
type=rpm-md
|
||||
EOF
|
||||
|
||||
# 安装 Kibana:
|
||||
sudo dnf install kibana --enablerepo=kibana
|
||||
# 启动
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now kibana
|
||||
|
||||
# 访问 Kibana,输入生成的token
|
||||
http://ip:5601
|
||||
|
||||
# 获取 “verification code”
|
||||
/usr/share/kibana/bin/kibana-verification-code
|
||||
|
||||
# 使用官方工具生成加密密钥(最规范)
|
||||
sudo /usr/share/kibana/bin/kibana-encryption-keys generate --force
|
||||
# 输出应类似:
|
||||
# ✔ Encryption keys generated and written to /etc/kibana/kibana.yml:
|
||||
# xpack.encryptedSavedObjects.encryptionKey
|
||||
# xpack.reporting.encryptionKey
|
||||
# xpack.security.encryptionKey
|
||||
|
||||
# 修改配置文件
|
||||
grep -v '^\s*#\|^\s*$' /etc/kibana/kibana.yml
|
||||
server.host: "0.0.0.0"
|
||||
logging:
|
||||
appenders:
|
||||
file:
|
||||
type: file
|
||||
fileName: /var/log/kibana/kibana.log
|
||||
layout:
|
||||
type: json
|
||||
root:
|
||||
appenders:
|
||||
- default
|
||||
- file
|
||||
pid.file: /run/kibana/kibana.pid
|
||||
i18n.locale: "zh-CN"
|
||||
elasticsearch.hosts: [https://10.0.0.38:9200]
|
||||
elasticsearch.serviceAccountToken: AAEAAWVsYXN0aWMva2liYW5hL2Vucm9sbC1wcm9jZXNzLXRva2VuLTE3NjUzNDE4OTI3MjY6Um9KdUo2N1hSZVNPeGNzOXFDaUh2dw
|
||||
elasticsearch.ssl.certificateAuthorities: [/var/lib/kibana/ca_1765341893683.crt]
|
||||
xpack.fleet.outputs: [{id: fleet-default-output, name: default, is_default: true, is_default_monitoring: true, type: elasticsearch, hosts: [https://10.0.0.38:9200], ca_trusted_fingerprint: 80af64db043e12ebda11c10f70042af91306a705fdcb6285814a84b420c734a5}]
|
||||
xpack.encryptedSavedObjects.encryptionKey: f10166c761265d5ca61e7fa2c1acac73
|
||||
xpack.reporting.encryptionKey: 1772a5152522675d5a38470e905b2817
|
||||
xpack.security.encryptionKey: d4b30e82e47f530a998e29cb0b8e5295
|
||||
41
OpenTelemetry/ES/单节点/证书使用示例.conf
Normal file
41
OpenTelemetry/ES/单节点/证书使用示例.conf
Normal file
@@ -0,0 +1,41 @@
|
||||
|
||||
# 获取ES 的证书指纹
|
||||
sudo openssl x509 -fingerprint -sha256 -in /etc/elasticsearch/certs/http_ca.crt -noout
|
||||
sha256 Fingerprint=80:AF:64:DB:04:3E:12:EB:DA:11:C1:0F:70:04:2A:F9:13:06:A7:05:FD:CB:62:85:81:4A:84:B4:20:C7:34:A5
|
||||
|
||||
# kibana web创建的用户
|
||||
admin
|
||||
G7ZSKFM4AQwHQpwA
|
||||
|
||||
|
||||
|
||||
# Filebeat
|
||||
output.elasticsearch:
|
||||
hosts: ["https://49.51.33.153:9200"]
|
||||
username: "elastic"
|
||||
password: "-0NiIBOJGn2CATuPWzNc"
|
||||
|
||||
# 用指纹验证(代替证书文件)
|
||||
ssl.verification_mode: "certificate"
|
||||
ssl.certificate_authorities: [] # 留空(不校验完整链)
|
||||
ssl.supported_protocols: [TLSv1.2, TLSv1.3]
|
||||
|
||||
# 关键:指定 CA 指纹(必须全大写,无 0x,带冒号)
|
||||
ssl.ca_trusted_fingerprint: "80AF64DB043E12EBDA11C10F70042AF91306A705FD2CB6285814A84B420C734A5"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# python
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
es = Elasticsearch(
|
||||
hosts=["https://49.51.33.153:9200"],
|
||||
basic_auth=("elastic", "-0NiIBOJGn2CATuPWzNc"),
|
||||
# 指纹必须去掉冒号,全大写
|
||||
ssl_assert_fingerprint="80AF64DB043E12EBDA11C10F70042AF91306A705FD2CB6285814A84B420C734A5",
|
||||
verify_certs=True # 必须为 True
|
||||
)
|
||||
|
||||
print(es.info())
|
||||
46
OpenTelemetry/prometheus/二进制部署/grafana/dm.sh
Normal file
46
OpenTelemetry/prometheus/二进制部署/grafana/dm.sh
Normal file
@@ -0,0 +1,46 @@
|
||||
# 下载tar
|
||||
wget https://dl.grafana.com/grafana-enterprise/release/12.3.1/grafana-enterprise_12.3.1_20271043721_linux_amd64.tar.gz
|
||||
|
||||
# 创建Grafana相关目录(数据+配置)
|
||||
mkdir -p /data/grafana/
|
||||
|
||||
# 为 Grafana 创建用户帐户
|
||||
useradd -r -s /bin/false grafana
|
||||
|
||||
# 将解压后的二进制文件移动到/data/grafana/
|
||||
tar -xzf grafana-enterprise_12.3.1_20271043721_linux_amd64.tar.gz -C /data/grafana/
|
||||
|
||||
# 所有者更改/data/grafana/为 Grafana 用户
|
||||
chown -R grafana:grafana /data/grafana/
|
||||
|
||||
# 复制默认的配置文件
|
||||
cp /data/grafana/conf/defaults.ini /data/grafana/conf/grafana.ini
|
||||
|
||||
# 创建 Grafana 服务器 systemd 单元文件
|
||||
sudo touch /etc/systemd/system/grafana-server.service
|
||||
|
||||
[Unit]
|
||||
Description=Grafana Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=grafana
|
||||
Group=grafana
|
||||
ExecStart=/data/grafana/bin/grafana server --config=/data/grafana/conf/grafana.ini --homepath=/data/grafana
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
# 启用 Grafana 服务器 systemd 服务
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl start grafana-server
|
||||
sudo systemctl enable grafana-server
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
81
OpenTelemetry/prometheus/二进制部署/prometheus/dm.bash
Normal file
81
OpenTelemetry/prometheus/二进制部署/prometheus/dm.bash
Normal file
@@ -0,0 +1,81 @@
|
||||
# 创建目录
|
||||
mkdir -p /data/prometheus/
|
||||
mkdir -p /data/alertmanager/
|
||||
|
||||
# 下载tar包
|
||||
wget https://github.com/prometheus/prometheus/releases/download/v3.8.1/prometheus-3.8.1.linux-amd64.tar.gz
|
||||
wget https://github.com/prometheus/alertmanager/releases/download/v0.30.0/alertmanager-0.30.0.linux-amd64.tar.gz
|
||||
|
||||
# 创建系统用户(如果尚未创建)
|
||||
sudo useradd --no-create-home --shell /bin/false prometheus || true
|
||||
|
||||
# 授权目录权限
|
||||
sudo chown -R prometheus:prometheus /data/prometheus
|
||||
sudo chown -R prometheus:prometheus /data/alertmanager
|
||||
|
||||
# 创建文件 /etc/systemd/system/prometheus.service
|
||||
[Unit]
|
||||
Description=Prometheus
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=prometheus
|
||||
Group=prometheus
|
||||
Type=simple
|
||||
# 注意:--storage.tsdb.path 指定数据存储位置,建议设在 /data 目录下
|
||||
ExecStart=/data/prometheus/prometheus \
|
||||
--config.file=/data/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/data/prometheus/data \
|
||||
--web.console.templates=/data/prometheus/consoles \
|
||||
--web.console.libraries=/data/prometheus/console_libraries
|
||||
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# 创建文件 /etc/systemd/system/alertmanager.service
|
||||
[Unit]
|
||||
Description=Alertmanager
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=prometheus
|
||||
Group=prometheus
|
||||
Type=simple
|
||||
ExecStart=/data/alertmanager/alertmanager \
|
||||
--config.file=/data/alertmanager/alertmanager.yml \
|
||||
--storage.path=/data/alertmanager/data
|
||||
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# 修改 Prometheus 关联 Alertmanager
|
||||
# Alerting configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- localhost:9093 # Alertmanager 默认端口
|
||||
|
||||
|
||||
# 重载 systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# 启动并设置开机自启
|
||||
sudo systemctl enable --now prometheus
|
||||
sudo systemctl enable --now alertmanager
|
||||
|
||||
# 检查状态
|
||||
sudo systemctl status prometheus
|
||||
sudo systemctl status alertmanager
|
||||
|
||||
|
||||
配置文件检查
|
||||
在重启服务前,可以使用自带的工具检查语法是否正确:
|
||||
Prometheus 检查: /data/prometheus/promtool check config /data/prometheus/prometheus.yml
|
||||
Alertmanager 检查: /data/alertmanager/amtool check-config /data/alertmanager/alertmanager.yml
|
||||
53
OpenTelemetry/readme.txt
Normal file
53
OpenTelemetry/readme.txt
Normal file
@@ -0,0 +1,53 @@
|
||||
一、存储
|
||||
ES、Prometheus、Tempo 均二进制部署在k8s集群外部
|
||||
|
||||
二、采集、处理、中转
|
||||
OpenTelemetry Collector 其它 部署在 k8s 集群内
|
||||
1、DaemonSet Collector 采集(节点 / 容器级)「指标 + 日志」
|
||||
2、Deployment Collector 接收DaemonSet Collector 的数据、处理、中转至存储「追踪数据」
|
||||
|
||||
三、三种类型数据
|
||||
1、指标数据(最终到 Prometheus)
|
||||
采集方:OTel Collector(部署在 k8s 内的DaemonSet 模式)
|
||||
采集内容:节点 CPU / 内存 / 磁盘(替代node-exporter)、容器资源使用率(替代kubelet指标)、业务 Pod 的自定义指标(需应用集成 OTel SDK)。
|
||||
处理方:OTel Collector(Deployment 模式,集群级聚合)
|
||||
处理逻辑:标准化指标格式、补充 k8s 标签(如集群名、Pod 名)、批处理。
|
||||
发送方:OTel Collector
|
||||
发送协议:Prometheus Remote Write
|
||||
接收方:集群外的 Prometheus。
|
||||
|
||||
graph TD
|
||||
A[k8s节点] -->|DaemonSet Collector采集| B[节点CPU/内存/磁盘指标]
|
||||
C[k8s容器] -->|DaemonSet Collector采集| D[容器使用率指标]
|
||||
B -->|上报| E[Deployment Collector]
|
||||
D -->|上报| E[Deployment Collector]
|
||||
E -->|标准化+批处理| F[Remote Write协议]
|
||||
F -->|转发| G[集群外Prometheus]
|
||||
|
||||
|
||||
|
||||
2. 日志数据(最终到 ES)
|
||||
采集方:OTel Collector(DaemonSet 模式)
|
||||
采集内容:k8s 节点/var/log/containers目录下的容器日志(替代 Filebeat)。
|
||||
处理方:OTel Collector(Deployment 模式)
|
||||
处理逻辑:解析日志格式(JSON / 正则)、过滤冗余日志、补充 k8s 资源标签。
|
||||
发送方:OTel Collector
|
||||
发送协议:Elasticsearch API
|
||||
接收方:集群外的 ES。
|
||||
|
||||
3. 请求链路(追踪数据,最终到 Tempo)
|
||||
采集方:分两种场景
|
||||
基础设施链路:OTel Collector(Deployment 模式)采集 k8s 组件的链路数据(如 Ingress/Nginx)
|
||||
业务链路:业务应用集成OTel SDK(如 Java Agent、Go SDK),在应用内部采集请求链路。
|
||||
处理方:OTel Collector(Deployment 模式)
|
||||
处理逻辑:标准化 Trace 格式、关联 k8s 资源信息、批处理。
|
||||
发送方:OTel Collector
|
||||
发送协议:OTLP(OpenTelemetry 协议,支持 gRPC/HTTP)
|
||||
接收方:集群外的 Tempo。
|
||||
|
||||
|
||||
|
||||
graph LR
|
||||
A[指标接收器] -->|metrics流水线| B[指标处理器] --> C[Prometheus导出器]
|
||||
D[日志接收器] -->|logs流水线| E[日志处理器] --> F[ES导出器]
|
||||
G[追踪接收器] -->|traces流水线| H[追踪处理器] --> I[Tempo导出器]
|
||||
58
OpenTelemetry/tempo/cos-tempo.yaml
Normal file
58
OpenTelemetry/tempo/cos-tempo.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
server:
|
||||
http_listen_port: 3200 # HTTP 接口监听端口
|
||||
grpc_listen_port: 9095 # gRPC 接口监听端口
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317 # OTLP gRPC 接口监听地址
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318 # OTLP HTTP 接口监听地址
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
ring:
|
||||
replication_factor: 1 # 数据的副本数
|
||||
max_block_duration: 5m # 最大数据块时长
|
||||
trace_idle_period: 10s # 如果某个 Trace 长时间未活动,自动清理
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 720h # 数据块保留时间,720小时(30天)
|
||||
compacted_block_retention: 168h # 压缩后的数据块保留时间,168小时(7天)
|
||||
max_compaction_objects: 1000000 # 每次压缩的最大对象数
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
cluster: linux-microservices
|
||||
storage:
|
||||
path: /data/tempo/data/wal
|
||||
remote_write:
|
||||
- url: http://127.0.0.1:9090/api/v1/write
|
||||
send_exemplars: true
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: s3
|
||||
s3:
|
||||
endpoint: outscalelink-1324597558.cos.na-siliconvalley.myqcloud.com
|
||||
bucket: outscalelink-1324597558
|
||||
prefix: tempo-data/
|
||||
forcepathstyle: true
|
||||
enable_dual_stack: false
|
||||
insecure: true
|
||||
access_key: AKIDkgR4lHvU1QfieR7cxBLLTaUCh0S0dDev
|
||||
secret_key: fAWjldKuPhz4wb6RedPzPccOwGOet9Ug
|
||||
wal:
|
||||
path: /data/tempo/data/wal
|
||||
local:
|
||||
path: /data/tempo/blocks
|
||||
|
||||
overrides:
|
||||
metrics_generator_processors: [service-graphs, span-metrics]
|
||||
|
||||
|
||||
96
OpenTelemetry/tempo/dm.sh
Normal file
96
OpenTelemetry/tempo/dm.sh
Normal file
@@ -0,0 +1,96 @@
|
||||
mkdir -p /data/tempo/{conf,data,metrics-generator}
|
||||
mkdir -p /data/tempo/data/wal
|
||||
mkdir -p /data/tempo/metrics-generator/wal
|
||||
|
||||
chown -R tempo:tempo /data/tempo
|
||||
|
||||
chown -R tempo:tempo /data/tempo/data/traces
|
||||
|
||||
# 创建一个专用用户并配置服务,确保 Tempo 在后台稳定运行
|
||||
sudo useradd --no-create-home --shell /bin/false tempo || true
|
||||
|
||||
# 下载tar包
|
||||
wget https://github.com/grafana/tempo/releases/download/v2.9.0/tempo_2.9.0_linux_amd64.tar.gz
|
||||
# 解压
|
||||
tar -xzf tempo_2.9.0_linux_amd64.tar.gz -C /data/tempo/
|
||||
# 移动可执行文件到 /usr/local/bin/
|
||||
mv /data/tempo/tempo /data/tempo/tempo-cli /data/tempo/tempo-query /usr/local/bin/
|
||||
# 检查版本
|
||||
tempo --version
|
||||
|
||||
# 创建配置文件(本地为存储介质)
|
||||
vim local-tempo.yaml
|
||||
server:
|
||||
http_listen_port: 3200 # Tempo Web 端口(Grafana 对接用)
|
||||
grpc_listen_port: 9095 # gRPC 端口(可选)
|
||||
|
||||
distributor:
|
||||
receivers: # 接收 OTel 追踪数据的协议(核心)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc: # 监听 4317 端口(和 OTel Collector 一致,接收 OTLP gRPC 数据)
|
||||
endpoint: 0.0.0.0:4317
|
||||
http: # 监听 4318 端口(接收 OTLP HTTP 数据)
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m # 数据块存储时长(测试环境可设小)
|
||||
trace_idle_period: 10s # 追踪会话空闲超时
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 30d # 追踪数据保留 30 天(可按需调整)
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local # 单节点本地存储(生产可换 S3/MinIO)
|
||||
local:
|
||||
path: /data/tempo/data # 追踪数据存储目录(指定到 /data/tempo/data)
|
||||
wal:
|
||||
path: /data/tempo/data/wal # 预写日志目录(保证数据不丢)
|
||||
|
||||
# 存储桶为存储对象
|
||||
vim cos-tempo.yaml
|
||||
|
||||
|
||||
# 前台启动
|
||||
/usr/local/bin/tempo \
|
||||
-config.file=/data/tempo/conf/tempo.yaml \
|
||||
-config.expand-env=true
|
||||
|
||||
# 检查服务状态
|
||||
systemctl is-active tempo
|
||||
|
||||
# Systemd 服务守护进程
|
||||
vim /etc/systemd/system/tempo.service
|
||||
|
||||
[Unit]
|
||||
Description=Grafana Tempo
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=tempo
|
||||
Group=tempo
|
||||
Type=simple
|
||||
# config.file 指定配置文件路径,这里的配置文件注意文件名
|
||||
ExecStart=/usr/local/bin/tempo \
|
||||
-config.file=/data/tempo/conf/tempo.yaml \
|
||||
-config.expand-env=true
|
||||
|
||||
Restart=always
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
# 加载配置
|
||||
sudo systemctl daemon-reload
|
||||
# 启动并设置自启
|
||||
sudo systemctl enable --now tempo
|
||||
# 检查状态
|
||||
sudo systemctl status tempo
|
||||
|
||||
# 查看日志
|
||||
journalctl -u tempo --no-pager -n 50
|
||||
@@ -57,6 +57,7 @@ pipeline {
|
||||
TIMESTAMP=\$(date +"%Y%m%d_%H%M%S")
|
||||
LOGFILE="${REMOTE_PROJECT_PATH}/logs/lessie_email_\${TIMESTAMP}.log"
|
||||
nohup env ENV=s4 uv run uvicorn app.main:app --host 0.0.0.0 --port 8031 > "\$LOGFILE" 2>&1 &
|
||||
// nohup env ENV=s4 uv run uvicorn app.main:app --host 0.0.0.0 --port 8031 --log-config logging_config.json > "\$LOGFILE" 2>&1 &
|
||||
ln -sf "\$LOGFILE" ${REMOTE_PROJECT_PATH}/logs/lessie_email_latest.log
|
||||
'
|
||||
"""
|
||||
|
||||
@@ -78,6 +78,8 @@ server {
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
client_max_body_size 50m;
|
||||
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
46
prometheus/二进制部署/grafana/dm.sh
Normal file
46
prometheus/二进制部署/grafana/dm.sh
Normal file
@@ -0,0 +1,46 @@
|
||||
# 下载tar
|
||||
wget https://dl.grafana.com/grafana-enterprise/release/12.3.1/grafana-enterprise_12.3.1_20271043721_linux_amd64.tar.gz
|
||||
|
||||
# 创建Grafana相关目录(数据+配置)
|
||||
mkdir -p /data/grafana/
|
||||
|
||||
# 为 Grafana 创建用户帐户
|
||||
useradd -r -s /bin/false grafana
|
||||
|
||||
# 将解压后的二进制文件移动到/data/grafana/
|
||||
tar -xzf grafana-enterprise_12.3.1_20271043721_linux_amd64.tar.gz -C /data/grafana/
|
||||
|
||||
# 所有者更改/data/grafana/为 Grafana 用户
|
||||
chown -R grafana:grafana /data/grafana/
|
||||
|
||||
# 复制默认的配置文件
|
||||
cp /data/grafana/conf/defaults.ini /data/grafana/conf/grafana.ini
|
||||
|
||||
# 创建 Grafana 服务器 systemd 单元文件
|
||||
sudo touch /etc/systemd/system/grafana-server.service
|
||||
|
||||
[Unit]
|
||||
Description=Grafana Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=grafana
|
||||
Group=grafana
|
||||
ExecStart=/data/grafana/bin/grafana server --config=/data/grafana/conf/grafana.ini --homepath=/data/grafana
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
# 启用 Grafana 服务器 systemd 服务
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl start grafana-server
|
||||
sudo systemctl enable grafana-server
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
81
prometheus/二进制部署/prometheus/dm.bash
Normal file
81
prometheus/二进制部署/prometheus/dm.bash
Normal file
@@ -0,0 +1,81 @@
|
||||
# 创建目录
|
||||
mkdir -p /data/prometheus/
|
||||
mkdir -p /data/alertmanager/
|
||||
|
||||
# 下载tar包
|
||||
wget https://github.com/prometheus/prometheus/releases/download/v3.8.1/prometheus-3.8.1.linux-amd64.tar.gz
|
||||
wget https://github.com/prometheus/alertmanager/releases/download/v0.30.0/alertmanager-0.30.0.linux-amd64.tar.gz
|
||||
|
||||
# 创建系统用户(如果尚未创建)
|
||||
sudo useradd --no-create-home --shell /bin/false prometheus || true
|
||||
|
||||
# 授权目录权限
|
||||
sudo chown -R prometheus:prometheus /data/prometheus
|
||||
sudo chown -R prometheus:prometheus /data/alertmanager
|
||||
|
||||
# 创建文件 /etc/systemd/system/prometheus.service
|
||||
[Unit]
|
||||
Description=Prometheus
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=prometheus
|
||||
Group=prometheus
|
||||
Type=simple
|
||||
# 注意:--storage.tsdb.path 指定数据存储位置,建议设在 /data 目录下
|
||||
ExecStart=/data/prometheus/prometheus \
|
||||
--config.file=/data/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/data/prometheus/data \
|
||||
--web.console.templates=/data/prometheus/consoles \
|
||||
--web.console.libraries=/data/prometheus/console_libraries
|
||||
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# 创建文件 /etc/systemd/system/alertmanager.service
|
||||
[Unit]
|
||||
Description=Alertmanager
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=prometheus
|
||||
Group=prometheus
|
||||
Type=simple
|
||||
ExecStart=/data/alertmanager/alertmanager \
|
||||
--config.file=/data/alertmanager/alertmanager.yml \
|
||||
--storage.path=/data/alertmanager/data
|
||||
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
# 修改 Prometheus 关联 Alertmanager
|
||||
# Alerting configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- localhost:9093 # Alertmanager 默认端口
|
||||
|
||||
|
||||
# 重载 systemd
|
||||
sudo systemctl daemon-reload
|
||||
|
||||
# 启动并设置开机自启
|
||||
sudo systemctl enable --now prometheus
|
||||
sudo systemctl enable --now alertmanager
|
||||
|
||||
# 检查状态
|
||||
sudo systemctl status prometheus
|
||||
sudo systemctl status alertmanager
|
||||
|
||||
|
||||
配置文件检查
|
||||
在重启服务前,可以使用自带的工具检查语法是否正确:
|
||||
Prometheus 检查: /data/prometheus/promtool check config /data/prometheus/prometheus.yml
|
||||
Alertmanager 检查: /data/alertmanager/amtool check-config /data/alertmanager/alertmanager.yml
|
||||
58
tempo/cos-tempo.yaml
Normal file
58
tempo/cos-tempo.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
server:
|
||||
http_listen_port: 3200 # HTTP 接口监听端口
|
||||
grpc_listen_port: 9095 # gRPC 接口监听端口
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317 # OTLP gRPC 接口监听地址
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318 # OTLP HTTP 接口监听地址
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
ring:
|
||||
replication_factor: 1 # 数据的副本数
|
||||
max_block_duration: 5m # 最大数据块时长
|
||||
trace_idle_period: 10s # 如果某个 Trace 长时间未活动,自动清理
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 720h # 数据块保留时间,720小时(30天)
|
||||
compacted_block_retention: 168h # 压缩后的数据块保留时间,168小时(7天)
|
||||
max_compaction_objects: 1000000 # 每次压缩的最大对象数
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
cluster: linux-microservices
|
||||
storage:
|
||||
path: /data/tempo/data/wal
|
||||
remote_write:
|
||||
- url: http://127.0.0.1:9090/api/v1/write
|
||||
send_exemplars: true
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: s3
|
||||
s3:
|
||||
endpoint: outscalelink-1324597558.cos.na-siliconvalley.myqcloud.com
|
||||
bucket: outscalelink-1324597558
|
||||
prefix: tempo-data/
|
||||
forcepathstyle: true
|
||||
enable_dual_stack: false
|
||||
insecure: true
|
||||
access_key: AKIDkgR4lHvU1QfieR7cxBLLTaUCh0S0dDev
|
||||
secret_key: fAWjldKuPhz4wb6RedPzPccOwGOet9Ug
|
||||
wal:
|
||||
path: /data/tempo/data/wal
|
||||
local:
|
||||
path: /data/tempo/blocks
|
||||
|
||||
overrides:
|
||||
metrics_generator_processors: [service-graphs, span-metrics]
|
||||
|
||||
|
||||
96
tempo/dm.sh
Normal file
96
tempo/dm.sh
Normal file
@@ -0,0 +1,96 @@
|
||||
mkdir -p /data/tempo/{conf,data,metrics-generator}
|
||||
mkdir -p /data/tempo/data/wal
|
||||
mkdir -p /data/tempo/metrics-generator/wal
|
||||
|
||||
chown -R tempo:tempo /data/tempo
|
||||
|
||||
chown -R tempo:tempo /data/tempo/data/traces
|
||||
|
||||
# 创建一个专用用户并配置服务,确保 Tempo 在后台稳定运行
|
||||
sudo useradd --no-create-home --shell /bin/false tempo || true
|
||||
|
||||
# 下载tar包
|
||||
wget https://github.com/grafana/tempo/releases/download/v2.9.0/tempo_2.9.0_linux_amd64.tar.gz
|
||||
# 解压
|
||||
tar -xzf tempo_2.9.0_linux_amd64.tar.gz -C /data/tempo/
|
||||
# 移动可执行文件到 /usr/local/bin/
|
||||
mv /data/tempo/tempo /data/tempo/tempo-cli /data/tempo/tempo-query /usr/local/bin/
|
||||
# 检查版本
|
||||
tempo --version
|
||||
|
||||
# 创建配置文件(本地为存储介质)
|
||||
vim local-tempo.yaml
|
||||
server:
|
||||
http_listen_port: 3200 # Tempo Web 端口(Grafana 对接用)
|
||||
grpc_listen_port: 9095 # gRPC 端口(可选)
|
||||
|
||||
distributor:
|
||||
receivers: # 接收 OTel 追踪数据的协议(核心)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc: # 监听 4317 端口(和 OTel Collector 一致,接收 OTLP gRPC 数据)
|
||||
endpoint: 0.0.0.0:4317
|
||||
http: # 监听 4318 端口(接收 OTLP HTTP 数据)
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
ingester:
|
||||
max_block_duration: 5m # 数据块存储时长(测试环境可设小)
|
||||
trace_idle_period: 10s # 追踪会话空闲超时
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 30d # 追踪数据保留 30 天(可按需调整)
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local # 单节点本地存储(生产可换 S3/MinIO)
|
||||
local:
|
||||
path: /data/tempo/data # 追踪数据存储目录(指定到 /data/tempo/data)
|
||||
wal:
|
||||
path: /data/tempo/data/wal # 预写日志目录(保证数据不丢)
|
||||
|
||||
# 存储桶为存储对象
|
||||
vim cos-tempo.yaml
|
||||
|
||||
|
||||
# 前台启动
|
||||
/usr/local/bin/tempo \
|
||||
-config.file=/data/tempo/conf/tempo.yaml \
|
||||
-config.expand-env=true
|
||||
|
||||
# 检查服务状态
|
||||
systemctl is-active tempo
|
||||
|
||||
# Systemd 服务守护进程
|
||||
vim /etc/systemd/system/tempo.service
|
||||
|
||||
[Unit]
|
||||
Description=Grafana Tempo
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
User=tempo
|
||||
Group=tempo
|
||||
Type=simple
|
||||
# config.file 指定配置文件路径,这里的配置文件注意文件名
|
||||
ExecStart=/usr/local/bin/tempo \
|
||||
-config.file=/data/tempo/conf/tempo.yaml \
|
||||
-config.expand-env=true
|
||||
|
||||
Restart=always
|
||||
LimitNOFILE=65536
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
|
||||
# 加载配置
|
||||
sudo systemctl daemon-reload
|
||||
# 启动并设置自启
|
||||
sudo systemctl enable --now tempo
|
||||
# 检查状态
|
||||
sudo systemctl status tempo
|
||||
|
||||
# 查看日志
|
||||
journalctl -u tempo --no-pager -n 50
|
||||
@@ -73,3 +73,7 @@ log_format s1_jennie_im_log '客户端IP: $remote_addr | 用户: $remote_user |
|
||||
'来源页面: "$http_referer" | 客户端UA: "$http_user_agent" | '
|
||||
'上游服务器: $upstream_addr | 上游响应耗时: $upstream_response_time | '
|
||||
'请求总耗时: $request_time | Host: $host';
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user