Files
Work-configuration-file/OpenTelemetry/Collector_v2/02-otel-gateway-config.yaml
2026-01-19 22:08:33 +08:00

82 lines
2.5 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: otel-gateway-config
namespace: monitoring
data:
config.yaml: |
receivers:
# 接收来自 Agent 的数据 (gRPC 4317, HTTP 4318)
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
# 1. 集群宏观: K8s 事件
k8s_events:
auth_type: serviceAccount
# 2. 集群宏观:拉取 TKE 自带的 tke-kube-state-metrics (Kube-State-Metrics)
prometheus:
config:
scrape_configs:
- job_name: 'tke-kube-state-metrics'
scrape_interval: 30s
static_configs:
- targets: ['tke-kube-state-metrics.kube-system.svc.cluster.local:8180']
processors:
batch:
send_batch_size: 2000
timeout: 10s
resourcedetection:
detectors: [env, system]
override: true
# 3. 注入集群 ID, 解决 Prometheus 重复采样报错
resource:
attributes:
- key: cluster.name
value: "test-k8s"
action: upsert
# 将 OTLP Resource 属性转换为 Metric 标签,确保 Prometheus 能够区分不同 Pod/Node
transform:
metric_statements:
- context: datapoint
statements:
- set(attributes["k8s_pod_name"], resource.attributes["k8s.pod.name"])
- set(attributes["k8s_node_name"], resource.attributes["k8s.node.name"])
- set(attributes["k8s_namespace_name"], resource.attributes["k8s.namespace.name"])
- set(attributes["k8s_container_name"], resource.attributes["k8s.container.name"])
- set(attributes["cluster_name"], resource.attributes["cluster.name"])
memory_limiter:
check_interval: 1s
limit_mib: 1500
spike_limit_mib: 512
exporters:
# 对接 Prometheus
otlphttp/prometheus:
endpoint: "http://10.0.0.38:9090/api/v1/write"
tls:
insecure: true
# 打印日志(用于排查)
debug:
verbosity: detailed
service:
pipelines:
metrics:
receivers: [otlp, prometheus] # 汇聚微观(otlp)和宏观(prometheus)指标
processors: [memory_limiter, resourcedetection, resource, transform, batch]
exporters: [otlphttp/prometheus]
logs:
receivers: [k8s_events]
processors: [memory_limiter, resourcedetection, resource, batch]
exporters: [debug]