diff --git a/docs/installation/kubernetes.md b/docs/installation/kubernetes.md index 48d3e141a..169be8fef 100644 --- a/docs/installation/kubernetes.md +++ b/docs/installation/kubernetes.md @@ -25,11 +25,6 @@ config: # Can be either debug, info, warning, error log_level: warning -# This Helm chart ships with built-in Prometheus ServiceMonitors and Rules. -# This requires the CoreOS Prometheus Operator. -monitoring: - enabled: false - # Enable Database Backups to S3 # backup: # access_key: access-key diff --git a/helm/templates/prom-rules.yaml b/helm/templates/prom-rules.yaml deleted file mode 100644 index 38c2dfd16..000000000 --- a/helm/templates/prom-rules.yaml +++ /dev/null @@ -1,121 +0,0 @@ -{{- if .Values.monitoring.enabled -}} ---- -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ include "passbook.fullname" . }}-static-rules - labels: - app.kubernetes.io/name: {{ include "passbook.name" . }} - helm.sh/chart: {{ include "passbook.chart" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} -spec: - groups: - - name: Aggregate request counters - rules: - - record: job:django_http_requests_before_middlewares_total:sum_rate30s - expr: sum(rate(django_http_requests_before_middlewares_total[30s])) by (job) - - record: job:django_http_requests_unknown_latency_total:sum_rate30s - expr: sum(rate(django_http_requests_unknown_latency_total[30s])) by (job) - - record: job:django_http_ajax_requests_total:sum_rate30s - expr: sum(rate(django_http_ajax_requests_total[30s])) by (job) - - record: job:django_http_responses_before_middlewares_total:sum_rate30s - expr: sum(rate(django_http_responses_before_middlewares_total[30s])) by (job) - - record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s - expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s])) by (job) - - record: job:django_http_requests_body_total_bytes:sum_rate30s - expr: sum(rate(django_http_requests_body_total_bytes[30s])) by (job) - - record: job:django_http_responses_streaming_total:sum_rate30s - expr: sum(rate(django_http_responses_streaming_total[30s])) by (job) - - record: job:django_http_responses_body_total_bytes:sum_rate30s - expr: sum(rate(django_http_responses_body_total_bytes[30s])) by (job) - - record: job:django_http_requests_total:sum_rate30s - expr: sum(rate(django_http_requests_total_by_method[30s])) by (job) - - record: job:django_http_requests_total_by_method:sum_rate30s - expr: sum(rate(django_http_requests_total_by_method[30s])) by (job,method) - - record: job:django_http_requests_total_by_transport:sum_rate30s - expr: sum(rate(django_http_requests_total_by_transport[30s])) by (job,transport) - - record: job:django_http_requests_total_by_view:sum_rate30s - expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) by (job,view) - - record: job:django_http_requests_total_by_view_transport_method:sum_rate30s - expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) by (job,view,transport,method) - - record: job:django_http_responses_total_by_templatename:sum_rate30s - expr: sum(rate(django_http_responses_total_by_templatename[30s])) by (job,templatename) - - record: job:django_http_responses_total_by_status:sum_rate30s - expr: sum(rate(django_http_responses_total_by_status[30s])) by (job,status) - - record: job:django_http_responses_total_by_status_name_method:sum_rate30s - expr: sum(rate(django_http_responses_total_by_status_name_method[30s])) by (job,status,name,method) - - record: job:django_http_responses_total_by_charset:sum_rate30s - expr: sum(rate(django_http_responses_total_by_charset[30s])) by (job,charset) - - record: job:django_http_exceptions_total_by_type:sum_rate30s - expr: sum(rate(django_http_exceptions_total_by_type[30s])) by (job,type) - - record: job:django_http_exceptions_total_by_view:sum_rate30s - expr: sum(rate(django_http_exceptions_total_by_view[30s])) by (job,view) - - name: Aggregate latency histograms - rules: - - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s - expr: histogram_quantile(0.50, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "50" - - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s - expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "95" - - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s - expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "99" - - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s - expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "99.9" - - record: job:django_http_requests_latency_seconds:quantile_rate30s - expr: histogram_quantile(0.50, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "50" - - record: job:django_http_requests_latency_seconds:quantile_rate30s - expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "95" - - record: job:django_http_requests_latency_seconds:quantile_rate30s - expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "99" - - record: job:django_http_requests_latency_seconds:quantile_rate30s - expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s])) by (job, le)) - labels: - quantile: "99.9" - - name: Aggregate model operations - rules: - - record: job:django_model_inserts_total:sum_rate1m - expr: sum(rate(django_model_inserts_total[1m])) by (job, model) - - record: job:django_model_updates_total:sum_rate1m - expr: sum(rate(django_model_updates_total[1m])) by (job, model) - - record: job:django_model_deletes_total:sum_rate1m - expr: sum(rate(django_model_deletes_total[1m])) by (job, model) - - name: Aggregate database operations - rules: - - record: job:django_db_new_connections_total:sum_rate30s - expr: sum(rate(django_db_new_connections_total[30s])) by (alias, vendor) - - record: job:django_db_new_connection_errors_total:sum_rate30s - expr: sum(rate(django_db_new_connection_errors_total[30s])) by (alias, vendor) - - record: job:django_db_execute_total:sum_rate30s - expr: sum(rate(django_db_execute_total[30s])) by (alias, vendor) - - record: job:django_db_execute_many_total:sum_rate30s - expr: sum(rate(django_db_execute_many_total[30s])) by (alias, vendor) - - record: job:django_db_errors_total:sum_rate30s - expr: sum(rate(django_db_errors_total[30s])) by (alias, vendor, type) - - name: Aggregate migrations - rules: - - record: job:django_migrations_applied_total:max - expr: max(django_migrations_applied_total) by (job, connection) - - record: job:django_migrations_unapplied_total:max - expr: max(django_migrations_unapplied_total) by (job, connection) - - name: Alerts - rules: - - alert: UnappliedMigrations - expr: job:django_migrations_unapplied_total:max > 0 - for: 1m - labels: - severity: testing -{{- end }} diff --git a/helm/templates/static-sm.yaml b/helm/templates/static-sm.yaml deleted file mode 100644 index 33f37b794..000000000 --- a/helm/templates/static-sm.yaml +++ /dev/null @@ -1,17 +0,0 @@ -{{- if .Values.monitoring.enabled -}} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - app.kubernetes.io/name: {{ include "passbook.name" . }} - helm.sh/chart: {{ include "passbook.chart" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - name: {{ include "passbook.fullname" . }}-static-monitoring -spec: - endpoints: - - port: http - selector: - matchLabels: - k8s.passbook.beryju.org/component: static -{{- end }} diff --git a/helm/templates/web-sm.yaml b/helm/templates/web-sm.yaml deleted file mode 100644 index 287769718..000000000 --- a/helm/templates/web-sm.yaml +++ /dev/null @@ -1,26 +0,0 @@ -{{- if .Values.monitoring.enabled -}} -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - app.kubernetes.io/name: {{ include "passbook.name" . }} - helm.sh/chart: {{ include "passbook.chart" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} - name: {{ include "passbook.fullname" . }}-web-monitoring -spec: - endpoints: - - basicAuth: - password: - name: {{ include "passbook.fullname" . }}-secret-key - key: secret_key - username: - name: {{ include "passbook.fullname" . }}-secret-key - key: monitoring_username - port: http - path: /metrics/ - interval: 10s - selector: - matchLabels: - k8s.passbook.beryju.org/component: web -{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index c34eb1388..e6895d508 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -18,11 +18,6 @@ config: # Can be either debug, info, warning, error log_level: warning -# This Helm chart ships with built-in Prometheus ServiceMonitors and Rules. -# This requires the CoreOS Prometheus Operator. -monitoring: - enabled: false - # Enable Database Backups to S3 # backup: # access_key: access-key