prometheuslinux系统告警规则实例

#prometheus linux系统告警规则实例
根据实际情况修改参数
#l
groups:
- name: linux
rules:
- alert: Node-Down #告警名称
expr: up == 0
for: 1m #持续多久后发送
labels:
severity: warning
annotations: #信息
summary: "Node has been down"
description: "has been down "
- alert: "内存使⽤率过⾼"
expr: round(100- node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes*100) > 80
for: 1m
告警系统labels:
severity: warning
annotations:
summary: "内存使⽤率过⾼"
description: "当前使⽤率{{ $value }}%"
- alert: "CPU使⽤率过⾼"
expr: round(100 - ((avg by (instance,job)(irate(node_cpu_seconds_total{mode="idle",instance!~'bac-.*'}[5m]))) *100)) > 80
for: 2m
labels:
severity: warning
annotations:
summary: "CPU使⽤率过⾼"
description: "当前使⽤率{{ $value }}%"
- alert: "磁盘使⽤率过⾼"
expr: round(100-100*(node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"})) > 80
for: 15s
labels:
severity: warning
annotations:
summary: "磁盘使⽤率过⾼"
description: "当前磁盘{{$untpoint}} 使⽤率{{ $value }}%"
- alert: "分区容量过低"
expr: round(node_filesystem_avail_bytes{fstype=~"ext4|xfs",instance!~"testnode",mountpoint!~"/boot.*"}/1024/1024/1024) < 10
for: 15s
labels:
severity: warning
annotations:
summary: "分区容量过低"
description: "当前分区{{$untpoint}} 容量{{ $value }}GB"
- alert: "⽹络流出速率过⾼"
expr: round(irate(node_network_receive_bytes_total{instance!~"data.*",device!~'tap.*|veth.*|br.*|docker.*|vir.*|lo.*|vnet.*'}[1m])/1024) > 2048    for: 1m
labels:
severity: warning
annotations:
summary: "⽹络流出速率过⾼"
description: "当前速率{{ $value }}KB/s"

本文发布于:2024-09-21 19:36:35,感谢您对本站的认可!

本文链接:https://www.17tex.com/tex/3/383309.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:告警   系统   规则   实例   情况
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2024 Comsenz Inc.Powered by © 易纺专利技术学习网 豫ICP备2022007602号 豫公网安备41160202000603 站长QQ:729038198 关于我们 投诉建议