apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: name: otel-gateway namespace: opentelemetry-operator-system spec: mode: deployment replicas: 1 serviceAccount: otel-gateway-collector # Operator 会自动创建并绑定权限 config: receivers: otlp: protocols: grpc: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 # --- 核心:采集 K8s 集群状态指标 --- # 采集 Deployment, DaemonSet, StatefulSet, HPA, Node 等资源的状态 k8s_cluster: collection_interval: 30s node_conditions_to_report: [Ready, MemoryPressure, DiskPressure, PIDPressure] allocatable_types_to_report: [cpu, memory] processors: batch: send_batch_size: 1000 timeout: 10s memory_limiter: check_interval: 1s limit_percentage: 70 spike_limit_percentage: 30 # 增加 K8s 元数据标签 (这也是 Gateway 的重要作用) k8sattributes: extract: metadata: - k8s.namespace.name - k8s.pod.name - k8s.deployment.name - k8s.statefulset.name - k8s.daemonset.name - k8s.cronjob.name - k8s.job.name - k8s.node.name pod_association: - sources: - from: resource_attribute name: k8s.pod.ip - sources: - from: resource_attribute name: k8s.pod.uid - sources: - from: connection exporters: # 1. 导出 Metrics 到外部 Prometheus (使用 Remote Write) prometheusremotewrite: endpoint: "http://10.0.0.38:9090/api/v1/write" # 如果有 Basic Auth,在此配置 # external_labels: # cluster: "test-k8s-cluster" # 2. 导出 Traces 到外部 Tempo (使用 OTLP gRPC) # otlp/tempo: # endpoint: "<你的TEMPO_IP>:4317" # tls: # insecure: true # 3. 导出 Logs 到外部 Elasticsearch (可选) # elasticsearch: # endpoints: ["http://<你的ES_IP>:9200"] # logs_index: "k8s-logs" debug: verbosity: basic service: pipelines: metrics: receivers: [otlp, k8s_cluster] processors: [memory_limiter, batch] # 确保 k8sattributes 在 batch 之前或之后取决于架构,Gateway通常主要做转发 # 这里 k8s_cluster 产生的数据自带标签,otlp 来的数据应在 Agent 端打好标签 exporters: [prometheusremotewrite] traces: receivers: [otlp] processors: [memory_limiter, batch] exporters: [otlp/tempo] # logs: # receivers: [otlp] # processors: [memory_limiter, batch] # exporters: [elasticsearch]