apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: name: otel-agent namespace: opentelemetry-operator-system spec: mode: daemonset hostNetwork: true # 建议开启,以便更准确获取 Host 指标 config: receivers: # 1. 采集 Pod 和 容器 的资源使用情况 (CPU/Mem) kubeletstats: collection_interval: 20s auth_type: "serviceAccount" endpoint: "${env:K8S_NODE_NAME}:10250" insecure_skip_verify: true metric_groups: - node - pod - container # 2. 采集宿主机物理指标 hostmetrics: collection_interval: 20s scrapers: cpu: memory: load: filesystem: network: # 3. (可选) 采集日志 # filelog: # include: [/var/log/pods/*/*/*.log] # ... processors: batch: send_batch_size: 500 timeout: 5s memory_limiter: check_interval: 1s limit_mib: 400 spike_limit_mib: 100 # 资源检测:自动识别云厂商(腾讯云)信息、主机名等 resourcedetection: detectors: [system] # 如果在腾讯云CVM上,可以尝试加入 'tencentcloud' 但 system 通常足够 timeout: 2s override: false # 关键:给指标打上 K8s 标签 (Pod Name, Namespace, Node Name) k8sattributes: passthrough: false extract: metadata: - k8s.pod.name - k8s.pod.uid - k8s.deployment.name - k8s.namespace.name - k8s.node.name pod_association: - sources: - from: resource_attribute name: k8s.pod.uid - sources: - from: resource_attribute name: k8s.pod.ip - sources: - from: connection exporters: # 发送给集群内的 Gateway Service otlp: endpoint: "otel-gateway-collector.opentelemetry-operator-system.svc.cluster.local:4317" tls: insecure: true service: pipelines: metrics: receivers: [kubeletstats, hostmetrics] processors: [resourcedetection, k8sattributes, memory_limiter, batch] exporters: [otlp] # traces: # 如果应用配置了 sidecar 或其他方式发送 trace 到本地 agent # receivers: [otlp] # exporters: [otlp]