Update vector and double thres for Context Switch alert

This commit is contained in:
Daniel Berteaud 2024-03-26 17:05:51 +01:00
parent cda0499371
commit 094aa1eb7e
5 changed files with 8 additions and 8 deletions

View File

@ -296,7 +296,7 @@ _EOT
leader = true
config {
image = "danielberteaud/vector:0.36.1-1"
image = "danielberteaud/vector:0.37.0-1"
userns_mode = "host"
readonly_rootfs = true
pids_limit = 200

View File

@ -1 +1 @@
FROM timberio/vector:0.36.1-alpine
FROM timberio/vector:0.37.0-alpine

View File

@ -1116,13 +1116,13 @@ groups:
description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostContextSwitching
expr: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
expr: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 20000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
for: 0m
labels:
severity: warning
annotations:
summary: Host context switching (instance {{ $labels.instance }})
description: "Context switching is growing on the node (> 10000 / CPU / s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
description: "Context switching is growing on the node (> 20000 / CPU / s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# - alert: HostSwapIsFillingUp
# expr: '((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
@ -2096,7 +2096,7 @@ _EOT
leader = true
config {
image = "danielberteaud/vector:0.36.1-1"
image = "danielberteaud/vector:0.37.0-1"
readonly_rootfs = true
pids_limit = 200
args = ["--config=/local/vector.yml"]

View File

@ -176,13 +176,13 @@ groups:
description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostContextSwitching
expr: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 10000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
expr: '((rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 20000) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'
for: 0m
labels:
severity: warning
annotations:
summary: Host context switching (instance {{ $labels.instance }})
description: "Context switching is growing on the node (> 10000 / CPU / s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
description: "Context switching is growing on the node (> 20000 / CPU / s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# - alert: HostSwapIsFillingUp
# expr: '((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}'

View File

@ -262,7 +262,7 @@ monitoring:
# Common vector settings
vector:
# Version of vector
version: 0.36.1
version: 0.37.0
# Docker image to use
image: '[[ .docker.repo ]]vector:[[ .monitoring.vector.version ]]-1'