2025-06-02 19:57:31 -04:00

253 lines
16 KiB
YAML

{{- /*
Generated from 'prometheus-operator' group from https://github.com/prometheus-operator/kube-prometheus.git
Do not change in-place! In order to change this file first read following link:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack
*/ -}}
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }}
{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }}
{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }}
namespace: {{ template "kube-prometheus-stack.namespace" . }}
labels:
app: {{ template "kube-prometheus-stack.name" . }}
{{ include "kube-prometheus-stack.labels" . | indent 4 }}
{{- if .Values.defaultRules.labels }}
{{ toYaml .Values.defaultRules.labels | indent 4 }}
{{- end }}
{{- if .Values.defaultRules.annotations }}
annotations:
{{ toYaml .Values.defaultRules.annotations | indent 4 }}
{{- end }}
spec:
groups:
- name: prometheus-operator
rules:
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorListErrors | default false) }}
- alert: PrometheusOperatorListErrors
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Errors while performing List operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorlisterrors
summary: Errors while performing list operations in controller.
expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m]))) > 0.4
for: {{ dig "PrometheusOperatorListErrors" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorListErrors" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorWatchErrors | default false) }}
- alert: PrometheusOperatorWatchErrors
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller.
expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.4
for: {{ dig "PrometheusOperatorWatchErrors" "for" "15m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorWatchErrors" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorSyncFailed | default false) }}
- alert: PrometheusOperatorSyncFailed
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Controller {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} namespace fails to reconcile {{`{{`}} $value {{`}}`}} objects.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorsyncfailed
summary: Last controller reconciliation failed
expr: min_over_time(prometheus_operator_syncs{status="failed",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: {{ dig "PrometheusOperatorSyncFailed" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorSyncFailed" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorReconcileErrors | default false) }}
- alert: PrometheusOperatorReconcileErrors
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of reconciling operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorreconcileerrors
summary: Errors while reconciling objects.
expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1
for: {{ dig "PrometheusOperatorReconcileErrors" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorReconcileErrors" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorStatusUpdateErrors | default false) }}
- alert: PrometheusOperatorStatusUpdateErrors
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of status update operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.'
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorstatusupdateerrors
summary: Errors while updating objects status.
expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1
for: {{ dig "PrometheusOperatorStatusUpdateErrors" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorStatusUpdateErrors" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNodeLookupErrors | default false) }}
- alert: PrometheusOperatorNodeLookupErrors
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus.
expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1
for: {{ dig "PrometheusOperatorNodeLookupErrors" "for" "10m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorNodeLookupErrors" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNotReady | default false) }}
- alert: PrometheusOperatorNotReady
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornotready
summary: Prometheus operator not ready
expr: min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) == 0)
for: {{ dig "PrometheusOperatorNotReady" "for" "5m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorNotReady" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- if not (.Values.defaultRules.disabled.PrometheusOperatorRejectedResources | default false) }}
- alert: PrometheusOperatorRejectedResources
annotations:
{{- if .Values.defaultRules.additionalRuleAnnotations }}
{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }}
{{- end }}
{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }}
{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }}
{{- end }}
description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace rejected {{`{{`}} printf "%0.0f" $value {{`}}`}} {{`{{`}} $labels.controller {{`}}`}}/{{`{{`}} $labels.resource {{`}}`}} resources.
runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorrejectedresources
summary: Resources rejected by Prometheus operator
expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0
for: {{ dig "PrometheusOperatorRejectedResources" "for" "5m" .Values.customRules }}
{{- with .Values.defaultRules.keepFiringFor }}
keep_firing_for: "{{ . }}"
{{- end }}
labels:
severity: {{ dig "PrometheusOperatorRejectedResources" "severity" "warning" .Values.customRules }}
{{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- with .Values.defaultRules.additionalRuleLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}