改改改

This commit is contained in:
2026-01-09 17:50:32 +08:00
parent cf14d8a6db
commit 0384834345
37 changed files with 1944 additions and 2 deletions

View File

@@ -0,0 +1,56 @@
# 1. 创建monitoring命名空间
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
labels:
name: monitoring
---
# 2. 创建ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
name: otel-collector
namespace: monitoring
---
# 3. 创建ClusterRole最小权限
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: otel-collector-role
rules:
# 读取节点/Pod/服务元数据(基础权限)
- apiGroups: [""]
resources: ["nodes", "pods", "services", "endpoints", "nodes/metrics", "nodes/stats"]
verbs: ["get", "list", "watch"]
# 后续增加
# # 新增采集Deployment/DaemonSet/StatefulSetapps API组
# - apiGroups: ["apps"]
# resources: ["deployments", "daemonsets", "statefulsets", "replicasets"]
# verbs: ["get", "list", "watch"]
# # 新增采集HPAautoscaling API组
# - apiGroups: ["autoscaling"]
# resources: ["horizontalpodautoscalers"]
# verbs: ["get", "list", "watch"]
# # 新增采集k8s事件可选用于故障排查
# - apiGroups: [""]
# resources: ["events"]
# verbs: ["get", "list", "watch"]
---
# 4. 绑定ClusterRole到ServiceAccount
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: otel-collector-binding
subjects:
- kind: ServiceAccount
name: otel-collector
namespace: monitoring
roleRef:
kind: ClusterRole
name: otel-collector-role
apiGroup: rbac.authorization.k8s.io

View File

@@ -0,0 +1,64 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-collector-config
namespace: monitoring
data:
config.yaml: |
# 全局配置
receivers:
# 1. 节点级指标采集仅DaemonSet生效
hostmetrics:
collection_interval: 30s
scrapers:
cpu: {}
memory: {}
disk: {}
filesystem: {}
network: {}
load: {}
processes: {}
# 2. 容器级指标采集仅DaemonSet生效修复kubeletstats配置
kubeletstats:
collection_interval: 30s
auth_type: "serviceAccount"
endpoint: "https://${K8S_NODE_NAME}:10250"
insecure_skip_verify: true
# 3. OTLP接收器DaemonSet/Deployment都生效
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch: {}
resource:
attributes:
- key: k8s.cluster.name
value: test-k8s
action: insert
- key: k8s.node.name
from_attribute: host.name
action: insert
exporters:
prometheusremotewrite:
endpoint: "http://10.0.0.38:9090/api/v1/write"
external_labels:
k8s_cluster: test-k8s
# 核心修复service配置
service:
pipelines:
metrics:
receivers: [hostmetrics, kubeletstats, otlp]
processors: [batch, resource]
exporters: [prometheusremotewrite]
telemetry:
logs:
level: info
metrics:
endpoint: 0.0.0.0:8888
collection_interval: 60s

View File

@@ -0,0 +1,57 @@
# 部署 DaemonSet节点级指标采集
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: otel-collector-daemonset
namespace: monitoring
labels:
app: otel-collector-daemonset
spec:
selector:
matchLabels:
app: otel-collector-daemonset
template:
metadata:
labels:
app: otel-collector-daemonset
spec:
serviceAccountName: otel-collector
hostNetwork: false # 无需主机网络
containers:
- name: otel-collector
image: otel/opentelemetry-collector-contrib:latest
args: ["--config=/etc/otel-collector/config.yaml"]
# 挂载宿主机目录(采集节点指标)
volumeMounts:
- name: otel-config
mountPath: /etc/otel-collector
- name: proc
mountPath: /proc
readOnly: true
- name: sys
mountPath: /sys
readOnly: true
- name: rootfs
mountPath: /rootfs
readOnly: true
# 资源限制(按需调整)
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 100m
memory: 256Mi
volumes:
- name: otel-config
configMap:
name: otel-collector-config
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /

View File

@@ -0,0 +1,43 @@
# 部署 Deployment集群级聚合转发
apiVersion: apps/v1
kind: Deployment
metadata:
name: otel-collector-deployment
namespace: monitoring
labels:
app: otel-collector-deployment
spec:
replicas: 1 # 测试环境单副本生产可扩为2
selector:
matchLabels:
app: otel-collector-deployment
template:
metadata:
labels:
app: otel-collector-deployment
spec:
serviceAccountName: otel-collector
containers:
- name: otel-collector
image: otel/opentelemetry-collector-contrib:latest
args: ["--config=/etc/otel-collector/config.yaml"]
volumeMounts:
- name: otel-config
mountPath: /etc/otel-collector
# 暴露端口
ports:
- containerPort: 4317 # OTLP gRPC
- containerPort: 4318 # OTLP HTTP
- containerPort: 8888 # 自身监控
# 资源限制
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 100m
memory: 256Mi
volumes:
- name: otel-config
configMap:
name: otel-collector-config

View File

@@ -0,0 +1,57 @@
# 全局配置
receivers:
# 1. 节点级指标采集仅DaemonSet生效
hostmetrics:
collection_interval: 30s
scrapers:
cpu: {}
memory: {}
disk: {}
filesystem: {}
network: {}
load: {}
processes: {}
# 2. 容器级指标采集仅DaemonSet生效修复kubeletstats配置
kubeletstats:
collection_interval: 30s
auth_type: "serviceAccount"
endpoint: "https://${K8S_NODE_NAME}:10250"
insecure_skip_verify: true
# 3. OTLP接收器DaemonSet/Deployment都生效
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch: {}
resource:
attributes:
- key: k8s.cluster.name
value: test-k8s
action: insert
- key: k8s.node.name
from_attribute: host.name
action: insert
exporters:
prometheusremotewrite:
endpoint: "http://10.0.0.38:9090/api/v1/write"
external_labels:
k8s_cluster: test-k8s
# 核心修复service配置
service:
pipelines:
metrics:
receivers: [hostmetrics, kubeletstats, otlp]
processors: [batch, resource]
exporters: [prometheusremotewrite]
telemetry:
logs:
level: info
metrics:
endpoint: 0.0.0.0:8888
collection_interval: 60s