2026-01-27同步
This commit is contained in:
@@ -1,96 +0,0 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: otel-gateway
|
||||
namespace: opentelemetry-operator-system
|
||||
spec:
|
||||
mode: deployment
|
||||
replicas: 1
|
||||
serviceAccount: otel-gateway-collector # Operator 会自动创建并绑定权限
|
||||
config:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
# --- 核心:采集 K8s 集群状态指标 ---
|
||||
# 采集 Deployment, DaemonSet, StatefulSet, HPA, Node 等资源的状态
|
||||
k8s_cluster:
|
||||
collection_interval: 30s
|
||||
node_conditions_to_report: [Ready, MemoryPressure, DiskPressure, PIDPressure]
|
||||
allocatable_types_to_report: [cpu, memory]
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 1000
|
||||
timeout: 10s
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_percentage: 70
|
||||
spike_limit_percentage: 30
|
||||
|
||||
# 增加 K8s 元数据标签 (这也是 Gateway 的重要作用)
|
||||
k8sattributes:
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.namespace.name
|
||||
- k8s.pod.name
|
||||
- k8s.deployment.name
|
||||
- k8s.statefulset.name
|
||||
- k8s.daemonset.name
|
||||
- k8s.cronjob.name
|
||||
- k8s.job.name
|
||||
- k8s.node.name
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
exporters:
|
||||
# 1. 导出 Metrics 到外部 Prometheus (使用 Remote Write)
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://10.0.0.38:9090/api/v1/write"
|
||||
# 如果有 Basic Auth,在此配置
|
||||
# external_labels:
|
||||
# cluster: "test-k8s-cluster"
|
||||
|
||||
# 2. 导出 Traces 到外部 Tempo (使用 OTLP gRPC)
|
||||
# otlp/tempo:
|
||||
# endpoint: "<你的TEMPO_IP>:4317"
|
||||
# tls:
|
||||
# insecure: true
|
||||
|
||||
# 3. 导出 Logs 到外部 Elasticsearch (可选)
|
||||
# elasticsearch:
|
||||
# endpoints: ["http://<你的ES_IP>:9200"]
|
||||
# logs_index: "k8s-logs"
|
||||
|
||||
debug:
|
||||
verbosity: basic
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp, k8s_cluster]
|
||||
processors: [memory_limiter, batch]
|
||||
# 确保 k8sattributes 在 batch 之前或之后取决于架构,Gateway通常主要做转发
|
||||
# 这里 k8s_cluster 产生的数据自带标签,otlp 来的数据应在 Agent 端打好标签
|
||||
exporters: [prometheusremotewrite]
|
||||
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, batch]
|
||||
exporters: [otlp/tempo]
|
||||
|
||||
# logs:
|
||||
# receivers: [otlp]
|
||||
# processors: [memory_limiter, batch]
|
||||
# exporters: [elasticsearch]
|
||||
38
OpenTelemetry/Collector_v3/04-monitoring-rbac.yaml
Normal file
38
OpenTelemetry/Collector_v3/04-monitoring-rbac.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-collector-sa
|
||||
namespace: monitoring
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-collector-role
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["events", "nodes", "nodes/proxy", "nodes/stats", "services", "endpoints", "pods", "namespaces", "replicationcontrollers", "resourcequotas"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets", "daemonsets", "deployments", "replicasets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs", "cronjobs"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources: ["horizontalpodautoscalers"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-collector-binding
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: otel-collector-role
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-collector-sa
|
||||
namespace: monitoring
|
||||
@@ -1,88 +0,0 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: otel-agent
|
||||
namespace: opentelemetry-operator-system
|
||||
spec:
|
||||
mode: daemonset
|
||||
hostNetwork: true # 建议开启,以便更准确获取 Host 指标
|
||||
config:
|
||||
receivers:
|
||||
# 1. 采集 Pod 和 容器 的资源使用情况 (CPU/Mem)
|
||||
kubeletstats:
|
||||
collection_interval: 20s
|
||||
auth_type: "serviceAccount"
|
||||
endpoint: "${env:K8S_NODE_NAME}:10250"
|
||||
insecure_skip_verify: true
|
||||
metric_groups:
|
||||
- node
|
||||
- pod
|
||||
- container
|
||||
|
||||
# 2. 采集宿主机物理指标
|
||||
hostmetrics:
|
||||
collection_interval: 20s
|
||||
scrapers:
|
||||
cpu:
|
||||
memory:
|
||||
load:
|
||||
filesystem:
|
||||
network:
|
||||
|
||||
# 3. (可选) 采集日志
|
||||
# filelog:
|
||||
# include: [/var/log/pods/*/*/*.log]
|
||||
# ...
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 500
|
||||
timeout: 5s
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_mib: 400
|
||||
spike_limit_mib: 100
|
||||
|
||||
# 资源检测:自动识别云厂商(腾讯云)信息、主机名等
|
||||
resourcedetection:
|
||||
detectors: [system] # 如果在腾讯云CVM上,可以尝试加入 'tencentcloud' 但 system 通常足够
|
||||
timeout: 2s
|
||||
override: false
|
||||
|
||||
# 关键:给指标打上 K8s 标签 (Pod Name, Namespace, Node Name)
|
||||
k8sattributes:
|
||||
passthrough: false
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.pod.uid
|
||||
- k8s.deployment.name
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
exporters:
|
||||
# 发送给集群内的 Gateway Service
|
||||
otlp:
|
||||
endpoint: "otel-gateway-collector.opentelemetry-operator-system.svc.cluster.local:4317"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [kubeletstats, hostmetrics]
|
||||
processors: [resourcedetection, k8sattributes, memory_limiter, batch]
|
||||
exporters: [otlp]
|
||||
|
||||
# traces: # 如果应用配置了 sidecar 或其他方式发送 trace 到本地 agent
|
||||
# receivers: [otlp]
|
||||
# exporters: [otlp]
|
||||
57
OpenTelemetry/Collector_v3/05-otel-gateway.yaml
Normal file
57
OpenTelemetry/Collector_v3/05-otel-gateway.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: otel-gateway
|
||||
namespace: monitoring
|
||||
spec:
|
||||
mode: deployment
|
||||
image: otel/opentelemetry-collector-contrib:0.144.0
|
||||
replicas: 1
|
||||
serviceAccount: otel-collector-sa
|
||||
env:
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
config:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
k8s_cluster:
|
||||
collection_interval: 30s
|
||||
k8s_events: {}
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 1000
|
||||
timeout: 10s
|
||||
resourcedetection:
|
||||
detectors: [env, system, k8snode]
|
||||
|
||||
exporters:
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
otlp_http/prometheus:
|
||||
endpoint: "http://10.0.0.38:9090/api/v1/otlp"
|
||||
|
||||
elasticsearch:
|
||||
endpoints: ["http://10.0.0.38:9200"]
|
||||
logs_index: "k8s-test-cluster-events"
|
||||
user: "elastic"
|
||||
password: "-0NiIBOJGn2CATuPWzNc"
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp, k8s_cluster]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp_http/prometheus]
|
||||
logs:
|
||||
receivers: [k8s_events]
|
||||
processors: [batch]
|
||||
exporters: [elasticsearch, debug]
|
||||
55
OpenTelemetry/Collector_v3/06-otel-agent.yaml
Normal file
55
OpenTelemetry/Collector_v3/06-otel-agent.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: otel-agent
|
||||
namespace: monitoring
|
||||
spec:
|
||||
mode: daemonset
|
||||
image: otel/opentelemetry-collector-contrib:0.144.0
|
||||
serviceAccount: otel-collector-sa
|
||||
env:
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
# --- 新增:在这里定义集群名称,Prod 环境改一下这个值即可 ---
|
||||
- name: CLUSTER_NAME
|
||||
value: "test-k8s-cluster"
|
||||
config:
|
||||
receivers:
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
scrapers:
|
||||
cpu: {}
|
||||
memory: {}
|
||||
kubeletstats:
|
||||
collection_interval: 30s
|
||||
auth_type: "serviceAccount"
|
||||
endpoint: "https://${env:K8S_NODE_NAME}:10250"
|
||||
insecure_skip_verify: true
|
||||
|
||||
processors:
|
||||
batch: {}
|
||||
resourcedetection:
|
||||
detectors: [env, system, k8snode]
|
||||
|
||||
# --- 新增:强制给所有指标打上集群名称标签 ---
|
||||
resource:
|
||||
attributes:
|
||||
- key: k8s.cluster.name
|
||||
value: ${env:CLUSTER_NAME}
|
||||
action: insert
|
||||
|
||||
exporters:
|
||||
otlp:
|
||||
endpoint: "otel-gateway-collector.monitoring.svc.cluster.local:4317"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [hostmetrics, kubeletstats]
|
||||
# 注意:这里要加上 resource 处理器
|
||||
processors: [resourcedetection, resource, batch]
|
||||
exporters: [otlp]
|
||||
Reference in New Issue
Block a user