0119同步
This commit is contained in:
39
OpenTelemetry/Collector_v2/01-otel-gateway-rbac.yaml
Normal file
39
OpenTelemetry/Collector_v2/01-otel-gateway-rbac.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
---
|
||||
# 1. 权限配置 (RBAC)
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-gateway
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-gateway-role
|
||||
rules:
|
||||
# 允许读取 Pods, Nodes, Namespaces 用于打标和获取元数据
|
||||
- apiGroups: [""]
|
||||
resources: ["pods", "nodes", "namespaces", "services", "endpoints"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
# 允许读取 Events (用于采集 K8s 事件)
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-gateway-binding
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-gateway
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: otel-gateway-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
||||
82
OpenTelemetry/Collector_v2/02-otel-gateway-config.yaml
Normal file
82
OpenTelemetry/Collector_v2/02-otel-gateway-config.yaml
Normal file
@@ -0,0 +1,82 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: otel-gateway-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.yaml: |
|
||||
receivers:
|
||||
# 接收来自 Agent 的数据 (gRPC 4317, HTTP 4318)
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
# 1. 集群宏观: K8s 事件
|
||||
k8s_events:
|
||||
auth_type: serviceAccount
|
||||
|
||||
# 2. 集群宏观:拉取 TKE 自带的 tke-kube-state-metrics (Kube-State-Metrics)
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: 'tke-kube-state-metrics'
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets: ['tke-kube-state-metrics.kube-system.svc.cluster.local:8180']
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 2000
|
||||
timeout: 10s
|
||||
|
||||
resourcedetection:
|
||||
detectors: [env, system]
|
||||
override: true
|
||||
|
||||
# 3. 注入集群 ID, 解决 Prometheus 重复采样报错
|
||||
resource:
|
||||
attributes:
|
||||
- key: cluster.name
|
||||
value: "test-k8s"
|
||||
action: upsert
|
||||
|
||||
# 将 OTLP Resource 属性转换为 Metric 标签,确保 Prometheus 能够区分不同 Pod/Node
|
||||
transform:
|
||||
metric_statements:
|
||||
- context: datapoint
|
||||
statements:
|
||||
- set(attributes["k8s_pod_name"], resource.attributes["k8s.pod.name"])
|
||||
- set(attributes["k8s_node_name"], resource.attributes["k8s.node.name"])
|
||||
- set(attributes["k8s_namespace_name"], resource.attributes["k8s.namespace.name"])
|
||||
- set(attributes["k8s_container_name"], resource.attributes["k8s.container.name"])
|
||||
- set(attributes["cluster_name"], resource.attributes["cluster.name"])
|
||||
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_mib: 1500
|
||||
spike_limit_mib: 512
|
||||
|
||||
exporters:
|
||||
# 对接 Prometheus
|
||||
otlphttp/prometheus:
|
||||
endpoint: "http://10.0.0.38:9090/api/v1/write"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# 打印日志(用于排查)
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp, prometheus] # 汇聚微观(otlp)和宏观(prometheus)指标
|
||||
processors: [memory_limiter, resourcedetection, resource, transform, batch]
|
||||
exporters: [otlphttp/prometheus]
|
||||
logs:
|
||||
receivers: [k8s_events]
|
||||
processors: [memory_limiter, resourcedetection, resource, batch]
|
||||
exporters: [debug]
|
||||
60
OpenTelemetry/Collector_v2/03-otel-gateway-deployment.yaml
Normal file
60
OpenTelemetry/Collector_v2/03-otel-gateway-deployment.yaml
Normal file
@@ -0,0 +1,60 @@
|
||||
# 部署主体 (Deployment)
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: otel-gateway
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: otel-gateway
|
||||
spec:
|
||||
replicas: 1 # 采集 Events 和 KSM 建议单副本,避免数据重复
|
||||
selector:
|
||||
matchLabels:
|
||||
app: otel-gateway
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: otel-gateway
|
||||
spec:
|
||||
serviceAccountName: otel-gateway
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
command:
|
||||
- "/otelcol-contrib"
|
||||
args:
|
||||
- "--config=/conf/config.yaml"
|
||||
volumeMounts:
|
||||
- name: config-vol
|
||||
mountPath: /conf
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 400Mi
|
||||
volumes:
|
||||
- name: config-vol
|
||||
configMap:
|
||||
name: otel-gateway-config
|
||||
---
|
||||
# 服务暴露 (Service)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: otel-gateway
|
||||
namespace: monitoring
|
||||
spec:
|
||||
clusterIP: None
|
||||
selector:
|
||||
app: otel-gateway
|
||||
ports:
|
||||
- name: grpc
|
||||
port: 4317
|
||||
targetPort: 4317
|
||||
protocol: TCP
|
||||
- name: http
|
||||
port: 4318
|
||||
targetPort: 4318
|
||||
protocol: TCP
|
||||
39
OpenTelemetry/Collector_v2/11-otel-agent-rbac.yaml
Normal file
39
OpenTelemetry/Collector_v2/11-otel-agent-rbac.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
# 1. 权限配置
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-agent
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-agent-role
|
||||
rules:
|
||||
# 允许读取 Pod 和 Node 信息
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes", "nodes/stats", "nodes/proxy", "pods", "services", "endpoints"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
|
||||
# 允许读取 ReplicaSets,以便 k8sattributes 处理器解析 Deployment 名称
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["replicasets"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
|
||||
# 非资源型 URL 权限 (访问 Kubelet 统计接口)
|
||||
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-agent-binding
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-agent
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: otel-agent-role
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
|
||||
75
OpenTelemetry/Collector_v2/12-otel-agent-config.yaml
Normal file
75
OpenTelemetry/Collector_v2/12-otel-agent-config.yaml
Normal file
@@ -0,0 +1,75 @@
|
||||
# Agent 配置文件
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: otel-agent-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.yaml: |
|
||||
receivers:
|
||||
# 1. 采集宿主机宏观指标
|
||||
hostmetrics:
|
||||
collection_interval: 30s
|
||||
root_path: /hostfs
|
||||
scrapers:
|
||||
cpu: {}
|
||||
memory: {}
|
||||
load: {}
|
||||
disk: {}
|
||||
filesystem: {}
|
||||
network: {}
|
||||
paging: {}
|
||||
processes: {}
|
||||
|
||||
# 2. 采集 Pod/Container/Volume 微观指标
|
||||
kubeletstats:
|
||||
collection_interval: 30s
|
||||
auth_type: "serviceAccount"
|
||||
endpoint: "https://${env:K8S_NODE_NAME}:10250" # 使用环境变量定位本地 Kubelet
|
||||
insecure_skip_verify: true
|
||||
metric_groups:
|
||||
- node
|
||||
- pod
|
||||
- container
|
||||
- volume
|
||||
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 1000
|
||||
timeout: 10s
|
||||
|
||||
resourcedetection:
|
||||
detectors: [env, system]
|
||||
|
||||
# 3. 提取 K8s 详细标签,确保指标唯一性
|
||||
k8sattributes:
|
||||
auth_type: "serviceAccount"
|
||||
passthrough: false
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.pod.uid
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
- k8s.deployment.name
|
||||
- k8s.container.name
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
exporters:
|
||||
# 发送给集群内的 Gateway Service
|
||||
otlp:
|
||||
endpoint: "otel-gateway.monitoring.svc.cluster.local:4317"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [hostmetrics, kubeletstats]
|
||||
processors: [resourcedetection, k8sattributes, batch]
|
||||
exporters: [otlp]
|
||||
55
OpenTelemetry/Collector_v2/13-otel-agent-daemonset.yaml
Normal file
55
OpenTelemetry/Collector_v2/13-otel-agent-daemonset.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
# 部署主体 (DaemonSet)
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: otel-agent
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: otel-agent
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: otel-agent
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: otel-agent
|
||||
spec:
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
serviceAccountName: otel-agent
|
||||
containers:
|
||||
- name: otel-collector
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
command:
|
||||
- "/otelcol-contrib"
|
||||
args:
|
||||
- "--config=/conf/config.yaml"
|
||||
env:
|
||||
# 获取当前节点名称,传给 kubeletstats 使用
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- name: config-vol
|
||||
mountPath: /conf
|
||||
# 挂载宿主机根目录,以便采集宿主机指标
|
||||
- name: hostfs
|
||||
mountPath: /hostfs
|
||||
readOnly: true
|
||||
mountPropagation: HostToContainer
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 500Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 200Mi
|
||||
volumes:
|
||||
- name: config-vol
|
||||
configMap:
|
||||
name: otel-agent-config
|
||||
- name: hostfs
|
||||
hostPath:
|
||||
path: /
|
||||
Reference in New Issue
Block a user