apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    app.kubernetes.io/component: grafana
    app.kubernetes.io/name: grafana
    app.kubernetes.io/part-of: kube-prometheus
    app.kubernetes.io/version: 9.5.3
    prometheus: k8s
    role: alert-rules
  name: grafana-rules
  namespace: monitoring
  annotations:
    argocd.argoproj.io/sync-wave: '1'
spec:
  groups:
    - name: GrafanaAlerts
      rules:
        - alert: GrafanaRequestsFailing
          annotations:
            message: '{{ $labels.namespace }}/{{ $labels.job }}/{{ $labels.handler
              }} is experiencing {{ $value | humanize }}% errors'
            runbook_url: https://runbooks.prometheus-operator.dev/runbooks/grafana/grafanarequestsfailing
          expr: '100 * sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query",
            status_code=~"5.."})

            /

            sum without (status_code) (namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m{handler!~"/api/datasources/proxy/:id.*|/api/ds/query|/api/tsdb/query"})

            > 50

            '
          for: 5m
          labels:
            severity: warning
    - name: grafana_rules
      rules:
        - expr: 'sum by (namespace, job, handler, status_code) (rate(grafana_http_request_duration_seconds_count[5m]))

            '
          record: namespace_job_handler_statuscode:grafana_http_request_duration_seconds_count:rate5m