---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: cilium-health-check
namespace: kuboard
spec:
groups:
- name: cilium-health-check
rules:
- alert: unreachable-cilium-endpoints
annotations:
message: >-
ns:{{ $labels.namespace }} pod:{{ $labels.pod }} instance: {{
$labels.instance }} 存在无法到达的cilium端点
summary: '{{ $labels.app }} 存在无法到达的cilium端点'
expr: 'max(cilium_unreachable_nodes) by (namespace,pod) >0'
for: 1m
labels:
severity: critical
- alert: unreachable-health-cilium-endpoints
annotations:
message: >-
ns:{{ $labels.namespace }} pod:{{ $labels.pod }} instance: {{
$labels.instance }} 与健康端点连接不正常
summary: '{{ $labels.app }} 与健康端点连接有问题'
expr: 'max(unreachable_health_endpoints) by (namespace,pod) >0'
for: 1m
labels:
severity: critical
- alert: cilium-memory-used
annotations:
message: >-
ns:{{ $labels.namespace }} pod:{{ $labels.pod }} 系统中安装的 eBPF
映射使用的最大内存大于 200M
summary: '{{ $labels.app }} 系统中安装的 eBPF 映射使用的最大内存大于 200M'
expr: max(cilium_bpf_maps_virtual_memory_max_bytes) by (pod) > 209715200
for: 1m
labels:
severity: warning
文档更新时间: 2023-04-24 10:53 作者:张尚