Prometheus metrics (#914)
* admin: add worker metrics Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * admin: add version metrics Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add gauge for system tasks Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * outposts: add gauge for last hello and connection status Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: re-add prometheus metrics to database Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: allow access to metrics without credentials when debug is on Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: add UpdatingGauge to auto-set value on load Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * flows: add metrics for cache and building Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * policies: add metrics for policy engine Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add histogram for task durations Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: revert to gauge because values are updated on export view Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * core: add gauge to count all models Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add metrics for events Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>
This commit is contained in:
parent
a5cd9fa141
commit
53e2b2c784
|
@ -22,7 +22,7 @@ class TaskSerializer(PassiveSerializer):
|
|||
|
||||
task_name = CharField()
|
||||
task_description = CharField()
|
||||
task_finish_timestamp = DateTimeField(source="finish_timestamp")
|
||||
task_finish_timestamp = DateTimeField(source="finish_time")
|
||||
|
||||
status = ChoiceField(
|
||||
source="result.status.name",
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""authentik administration overview"""
|
||||
from drf_spectacular.utils import extend_schema, inline_serializer
|
||||
from prometheus_client import Gauge
|
||||
from rest_framework.fields import IntegerField
|
||||
from rest_framework.permissions import IsAdminUser
|
||||
from rest_framework.request import Request
|
||||
|
@ -8,6 +9,8 @@ from rest_framework.views import APIView
|
|||
|
||||
from authentik.root.celery import CELERY_APP
|
||||
|
||||
GAUGE_WORKERS = Gauge("authentik_admin_workers", "Currently connected workers")
|
||||
|
||||
|
||||
class WorkerView(APIView):
|
||||
"""Get currently connected worker count."""
|
||||
|
@ -19,4 +22,5 @@ class WorkerView(APIView):
|
|||
)
|
||||
def get(self, request: Request) -> Response:
|
||||
"""Get currently connected worker count."""
|
||||
return Response({"count": len(CELERY_APP.control.ping(timeout=0.5))})
|
||||
count = len(CELERY_APP.control.ping(timeout=0.5))
|
||||
return Response({"count": count})
|
||||
|
|
|
@ -1,13 +1,15 @@
|
|||
"""authentik admin tasks"""
|
||||
import re
|
||||
from os import environ
|
||||
|
||||
from django.core.cache import cache
|
||||
from django.core.validators import URLValidator
|
||||
from packaging.version import parse
|
||||
from prometheus_client import Info
|
||||
from requests import RequestException, get
|
||||
from structlog.stdlib import get_logger
|
||||
|
||||
from authentik import __version__
|
||||
from authentik import ENV_GIT_HASH_KEY, __version__
|
||||
from authentik.events.models import Event, EventAction
|
||||
from authentik.events.monitored_tasks import MonitoredTask, TaskResult, TaskResultStatus
|
||||
from authentik.root.celery import CELERY_APP
|
||||
|
@ -17,6 +19,18 @@ VERSION_CACHE_KEY = "authentik_latest_version"
|
|||
VERSION_CACHE_TIMEOUT = 8 * 60 * 60 # 8 hours
|
||||
# Chop of the first ^ because we want to search the entire string
|
||||
URL_FINDER = URLValidator.regex.pattern[1:]
|
||||
PROM_INFO = Info("authentik_version", "Currently running authentik version")
|
||||
|
||||
|
||||
def _set_prom_info():
|
||||
"""Set prometheus info for version"""
|
||||
PROM_INFO.info(
|
||||
{
|
||||
"version": __version__,
|
||||
"latest": cache.get(VERSION_CACHE_KEY, ""),
|
||||
"build_hash": environ.get(ENV_GIT_HASH_KEY, ""),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@CELERY_APP.task(bind=True, base=MonitoredTask)
|
||||
|
@ -36,6 +50,7 @@ def update_latest_version(self: MonitoredTask):
|
|||
TaskResultStatus.SUCCESSFUL, ["Successfully updated latest Version"]
|
||||
)
|
||||
)
|
||||
_set_prom_info()
|
||||
# Check if upstream version is newer than what we're running,
|
||||
# and if no event exists yet, create one.
|
||||
local_version = parse(__version__)
|
||||
|
@ -53,3 +68,6 @@ def update_latest_version(self: MonitoredTask):
|
|||
except (RequestException, IndexError) as exc:
|
||||
cache.set(VERSION_CACHE_KEY, "0.0.0", VERSION_CACHE_TIMEOUT)
|
||||
self.set_status(TaskResult(TaskResultStatus.ERROR).with_error(exc))
|
||||
|
||||
|
||||
_set_prom_info()
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
from importlib import import_module
|
||||
|
||||
from django.apps import AppConfig
|
||||
from django.db import ProgrammingError
|
||||
|
||||
from authentik.core.signals import GAUGE_MODELS
|
||||
from authentik.lib.utils.reflection import get_apps
|
||||
|
||||
|
||||
class AuthentikCoreConfig(AppConfig):
|
||||
|
@ -15,3 +19,12 @@ class AuthentikCoreConfig(AppConfig):
|
|||
def ready(self):
|
||||
import_module("authentik.core.signals")
|
||||
import_module("authentik.core.managed")
|
||||
try:
|
||||
for app in get_apps():
|
||||
for model in app.get_models():
|
||||
GAUGE_MODELS.labels(
|
||||
model_name=model._meta.model_name,
|
||||
app=model._meta.app_label,
|
||||
).set(model.objects.count())
|
||||
except ProgrammingError:
|
||||
pass
|
||||
|
|
|
@ -1,20 +1,31 @@
|
|||
"""authentik core signals"""
|
||||
from django.core.cache import cache
|
||||
from django.core.signals import Signal
|
||||
from django.db.models import Model
|
||||
from django.db.models.signals import post_save
|
||||
from django.dispatch import receiver
|
||||
from prometheus_client import Gauge
|
||||
|
||||
# Arguments: user: User, password: str
|
||||
password_changed = Signal()
|
||||
|
||||
GAUGE_MODELS = Gauge(
|
||||
"authentik_models", "Count of various objects", ["model_name", "app"]
|
||||
)
|
||||
|
||||
|
||||
@receiver(post_save)
|
||||
# pylint: disable=unused-argument
|
||||
def post_save_application(sender, instance, created: bool, **_):
|
||||
def post_save_application(sender: type[Model], instance, created: bool, **_):
|
||||
"""Clear user's application cache upon application creation"""
|
||||
from authentik.core.api.applications import user_app_cache_key
|
||||
from authentik.core.models import Application
|
||||
|
||||
GAUGE_MODELS.labels(
|
||||
model_name=sender._meta.model_name,
|
||||
app=sender._meta.app_label,
|
||||
).set(sender.objects.count())
|
||||
|
||||
if sender != Application:
|
||||
return
|
||||
if not created: # pragma: no cover
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
"""authentik events app"""
|
||||
from datetime import timedelta
|
||||
from importlib import import_module
|
||||
|
||||
from django.apps import AppConfig
|
||||
from django.db import ProgrammingError
|
||||
from django.utils.timezone import datetime
|
||||
|
||||
|
||||
class AuthentikEventsConfig(AppConfig):
|
||||
|
@ -13,3 +16,12 @@ class AuthentikEventsConfig(AppConfig):
|
|||
|
||||
def ready(self):
|
||||
import_module("authentik.events.signals")
|
||||
try:
|
||||
from authentik.events.models import Event
|
||||
|
||||
date_from = datetime.now() - timedelta(days=1)
|
||||
|
||||
for event in Event.objects.filter(created__gte=date_from):
|
||||
event._set_prom_metrics()
|
||||
except ProgrammingError:
|
||||
pass
|
||||
|
|
|
@ -11,6 +11,7 @@ from django.http import HttpRequest
|
|||
from django.utils.timezone import now
|
||||
from django.utils.translation import gettext as _
|
||||
from geoip2.errors import GeoIP2Error
|
||||
from prometheus_client import Gauge
|
||||
from requests import RequestException, post
|
||||
from structlog.stdlib import get_logger
|
||||
|
||||
|
@ -28,6 +29,11 @@ from authentik.policies.models import PolicyBindingModel
|
|||
from authentik.stages.email.utils import TemplateEmailMessage
|
||||
|
||||
LOGGER = get_logger("authentik.events")
|
||||
GAUGE_EVENTS = Gauge(
|
||||
"authentik_events",
|
||||
"Events in authentik",
|
||||
["action", "user_username", "app", "client_ip"],
|
||||
)
|
||||
|
||||
|
||||
def default_event_duration():
|
||||
|
@ -169,6 +175,14 @@ class Event(ExpiringModel):
|
|||
except GeoIP2Error as exc:
|
||||
LOGGER.warning("Failed to add geoIP Data to event", exc=exc)
|
||||
|
||||
def _set_prom_metrics(self):
|
||||
GAUGE_EVENTS.labels(
|
||||
action=self.action,
|
||||
user_username=self.user.get("username"),
|
||||
app=self.app,
|
||||
client_ip=self.client_ip,
|
||||
).set(self.created.timestamp())
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if self._state.adding:
|
||||
LOGGER.debug(
|
||||
|
@ -178,7 +192,8 @@ class Event(ExpiringModel):
|
|||
client_ip=self.client_ip,
|
||||
user=self.user,
|
||||
)
|
||||
return super().save(*args, **kwargs)
|
||||
super().save(*args, **kwargs)
|
||||
self._set_prom_metrics()
|
||||
|
||||
@property
|
||||
def summary(self) -> str:
|
||||
|
|
|
@ -2,14 +2,22 @@
|
|||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from timeit import default_timer
|
||||
from traceback import format_tb
|
||||
from typing import Any, Optional
|
||||
|
||||
from celery import Task
|
||||
from django.core.cache import cache
|
||||
from prometheus_client import Gauge
|
||||
|
||||
from authentik.events.models import Event, EventAction
|
||||
|
||||
GAUGE_TASKS = Gauge(
|
||||
"authentik_system_tasks",
|
||||
"System tasks and their status",
|
||||
["task_name", "task_uid", "status"],
|
||||
)
|
||||
|
||||
|
||||
class TaskResultStatus(Enum):
|
||||
"""Possible states of tasks"""
|
||||
|
@ -43,7 +51,9 @@ class TaskInfo:
|
|||
"""Info about a task run"""
|
||||
|
||||
task_name: str
|
||||
finish_timestamp: datetime
|
||||
start_timestamp: float
|
||||
finish_timestamp: float
|
||||
finish_time: datetime
|
||||
|
||||
result: TaskResult
|
||||
|
||||
|
@ -73,12 +83,25 @@ class TaskInfo:
|
|||
"""Delete task info from cache"""
|
||||
return cache.delete(f"task_{self.task_name}")
|
||||
|
||||
def set_prom_metrics(self):
|
||||
"""Update prometheus metrics"""
|
||||
start = default_timer()
|
||||
if hasattr(self, "start_timestamp"):
|
||||
start = self.start_timestamp
|
||||
duration = max(self.finish_timestamp - start, 0)
|
||||
GAUGE_TASKS.labels(
|
||||
task_name=self.task_name,
|
||||
task_uid=self.result.uid or "",
|
||||
status=self.result.status,
|
||||
).set(duration)
|
||||
|
||||
def save(self, timeout_hours=6):
|
||||
"""Save task into cache"""
|
||||
key = f"task_{self.task_name}"
|
||||
if self.result.uid:
|
||||
key += f"_{self.result.uid}"
|
||||
self.task_name += f"_{self.result.uid}"
|
||||
self.set_prom_metrics()
|
||||
cache.set(key, self, timeout=timeout_hours * 60 * 60)
|
||||
|
||||
|
||||
|
@ -98,6 +121,7 @@ class MonitoredTask(Task):
|
|||
self._uid = None
|
||||
self._result = TaskResult(status=TaskResultStatus.ERROR, messages=[])
|
||||
self.result_timeout_hours = 6
|
||||
self.start = default_timer()
|
||||
|
||||
def set_uid(self, uid: str):
|
||||
"""Set UID, so in the case of an unexpected error its saved correctly"""
|
||||
|
@ -117,7 +141,9 @@ class MonitoredTask(Task):
|
|||
TaskInfo(
|
||||
task_name=self.__name__,
|
||||
task_description=self.__doc__,
|
||||
finish_timestamp=datetime.now(),
|
||||
start_timestamp=self.start,
|
||||
finish_timestamp=default_timer(),
|
||||
finish_time=datetime.now(),
|
||||
result=self._result,
|
||||
task_call_module=self.__module__,
|
||||
task_call_func=self.__name__,
|
||||
|
@ -133,7 +159,9 @@ class MonitoredTask(Task):
|
|||
TaskInfo(
|
||||
task_name=self.__name__,
|
||||
task_description=self.__doc__,
|
||||
finish_timestamp=datetime.now(),
|
||||
start_timestamp=self.start,
|
||||
finish_timestamp=default_timer(),
|
||||
finish_time=datetime.now(),
|
||||
result=self._result,
|
||||
task_call_module=self.__module__,
|
||||
task_call_func=self.__name__,
|
||||
|
@ -151,3 +179,7 @@ class MonitoredTask(Task):
|
|||
|
||||
def run(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
for task in TaskInfo.all().values():
|
||||
task.set_prom_metrics()
|
||||
|
|
|
@ -4,6 +4,7 @@ from typing import Any, Optional
|
|||
|
||||
from django.core.cache import cache
|
||||
from django.http import HttpRequest
|
||||
from prometheus_client import Histogram
|
||||
from sentry_sdk.hub import Hub
|
||||
from sentry_sdk.tracing import Span
|
||||
from structlog.stdlib import BoundLogger, get_logger
|
||||
|
@ -14,6 +15,7 @@ from authentik.flows.exceptions import EmptyFlowException, FlowNonApplicableExce
|
|||
from authentik.flows.markers import ReevaluateMarker, StageMarker
|
||||
from authentik.flows.models import Flow, FlowStageBinding, Stage
|
||||
from authentik.policies.engine import PolicyEngine
|
||||
from authentik.root.monitoring import UpdatingGauge
|
||||
|
||||
LOGGER = get_logger()
|
||||
PLAN_CONTEXT_PENDING_USER = "pending_user"
|
||||
|
@ -21,6 +23,16 @@ PLAN_CONTEXT_SSO = "is_sso"
|
|||
PLAN_CONTEXT_REDIRECT = "redirect"
|
||||
PLAN_CONTEXT_APPLICATION = "application"
|
||||
PLAN_CONTEXT_SOURCE = "source"
|
||||
GAUGE_FLOWS_CACHED = UpdatingGauge(
|
||||
"authentik_flows_cached",
|
||||
"Cached flows",
|
||||
update_func=lambda: len(cache.keys("flow_*")),
|
||||
)
|
||||
HIST_FLOWS_PLAN_TIME = Histogram(
|
||||
"authentik_flows_plan_time",
|
||||
"Duration to build a plan for a flow",
|
||||
["flow_slug"],
|
||||
)
|
||||
|
||||
|
||||
def cache_key(flow: Flow, user: Optional[User] = None) -> str:
|
||||
|
@ -146,6 +158,7 @@ class FlowPlanner:
|
|||
)
|
||||
plan = self._build_plan(user, request, default_context)
|
||||
cache.set(cache_key(self.flow, user), plan)
|
||||
GAUGE_FLOWS_CACHED.update()
|
||||
if not plan.stages and not self.allow_empty_flows:
|
||||
raise EmptyFlowException()
|
||||
return plan
|
||||
|
@ -158,7 +171,9 @@ class FlowPlanner:
|
|||
) -> FlowPlan:
|
||||
"""Build flow plan by checking each stage in their respective
|
||||
order and checking the applied policies"""
|
||||
with Hub.current.start_span(op="flow.planner.build_plan") as span:
|
||||
with Hub.current.start_span(
|
||||
op="flow.planner.build_plan"
|
||||
) as span, HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug).time():
|
||||
span: Span
|
||||
span.set_data("flow", self.flow)
|
||||
span.set_data("user", user)
|
||||
|
@ -202,6 +217,7 @@ class FlowPlanner:
|
|||
marker = ReevaluateMarker(binding=binding, user=user)
|
||||
if stage:
|
||||
plan.append(stage, marker)
|
||||
HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug)
|
||||
self._logger.debug(
|
||||
"f(plan): finished building",
|
||||
)
|
||||
|
|
|
@ -8,11 +8,21 @@ from channels.exceptions import DenyConnection
|
|||
from dacite import from_dict
|
||||
from dacite.data import Data
|
||||
from guardian.shortcuts import get_objects_for_user
|
||||
from prometheus_client import Gauge
|
||||
from structlog.stdlib import get_logger
|
||||
|
||||
from authentik.core.channels import AuthJsonConsumer
|
||||
from authentik.outposts.models import OUTPOST_HELLO_INTERVAL, Outpost, OutpostState
|
||||
|
||||
GAUGE_OUTPOSTS_CONNECTED = Gauge(
|
||||
"authentik_outposts_connected", "Currently connected outposts", ["outpost", "uid"]
|
||||
)
|
||||
GAUGE_OUTPOSTS_LAST_UPDATE = Gauge(
|
||||
"authentik_outposts_last_update",
|
||||
"Last update from any outpost",
|
||||
["outpost", "uid", "version"],
|
||||
)
|
||||
|
||||
LOGGER = get_logger()
|
||||
|
||||
|
||||
|
@ -44,6 +54,8 @@ class OutpostConsumer(AuthJsonConsumer):
|
|||
|
||||
last_uid: Optional[str] = None
|
||||
|
||||
first_msg = False
|
||||
|
||||
def connect(self):
|
||||
super().connect()
|
||||
uuid = self.scope["url_route"]["kwargs"]["pk"]
|
||||
|
@ -68,6 +80,10 @@ class OutpostConsumer(AuthJsonConsumer):
|
|||
if self.channel_name in state.channel_ids:
|
||||
state.channel_ids.remove(self.channel_name)
|
||||
state.save()
|
||||
GAUGE_OUTPOSTS_CONNECTED.labels(
|
||||
outpost=self.outpost.name,
|
||||
uid=self.last_uid,
|
||||
).dec()
|
||||
LOGGER.debug(
|
||||
"removed outpost instance from cache",
|
||||
outpost=self.outpost,
|
||||
|
@ -78,15 +94,29 @@ class OutpostConsumer(AuthJsonConsumer):
|
|||
msg = from_dict(WebsocketMessage, content)
|
||||
uid = msg.args.get("uuid", self.channel_name)
|
||||
self.last_uid = uid
|
||||
|
||||
state = OutpostState.for_instance_uid(self.outpost, uid)
|
||||
if self.channel_name not in state.channel_ids:
|
||||
state.channel_ids.append(self.channel_name)
|
||||
state.last_seen = datetime.now()
|
||||
|
||||
if not self.first_msg:
|
||||
GAUGE_OUTPOSTS_CONNECTED.labels(
|
||||
outpost=self.outpost.name,
|
||||
uid=self.last_uid,
|
||||
).inc()
|
||||
self.first_msg = True
|
||||
|
||||
if msg.instruction == WebsocketMessageInstruction.HELLO:
|
||||
state.version = msg.args.get("version", None)
|
||||
state.build_hash = msg.args.get("buildHash", "")
|
||||
elif msg.instruction == WebsocketMessageInstruction.ACK:
|
||||
return
|
||||
GAUGE_OUTPOSTS_LAST_UPDATE.labels(
|
||||
outpost=self.outpost.name,
|
||||
uid=self.last_uid or "",
|
||||
version=state.version or "",
|
||||
).set_to_current_time()
|
||||
state.save(timeout=OUTPOST_HELLO_INTERVAL * 1.5)
|
||||
|
||||
response = WebsocketMessage(instruction=WebsocketMessageInstruction.ACK)
|
||||
|
|
|
@ -5,6 +5,7 @@ from typing import Iterator, Optional
|
|||
|
||||
from django.core.cache import cache
|
||||
from django.http import HttpRequest
|
||||
from prometheus_client import Histogram
|
||||
from sentry_sdk.hub import Hub
|
||||
from sentry_sdk.tracing import Span
|
||||
from structlog.stdlib import BoundLogger, get_logger
|
||||
|
@ -18,8 +19,19 @@ from authentik.policies.models import (
|
|||
)
|
||||
from authentik.policies.process import PolicyProcess, cache_key
|
||||
from authentik.policies.types import PolicyRequest, PolicyResult
|
||||
from authentik.root.monitoring import UpdatingGauge
|
||||
|
||||
CURRENT_PROCESS = current_process()
|
||||
GAUGE_POLICIES_CACHED = UpdatingGauge(
|
||||
"authentik_policies_cached",
|
||||
"Cached Policies",
|
||||
update_func=lambda: len(cache.keys("policy_*")),
|
||||
)
|
||||
HIST_POLICIES_BUILD_TIME = Histogram(
|
||||
"authentik_policies_build_time",
|
||||
"Execution times complete policy result to an object",
|
||||
["object_name", "object_type", "user"],
|
||||
)
|
||||
|
||||
|
||||
class PolicyProcessInfo:
|
||||
|
@ -92,7 +104,13 @@ class PolicyEngine:
|
|||
|
||||
def build(self) -> "PolicyEngine":
|
||||
"""Build wrapper which monitors performance"""
|
||||
with Hub.current.start_span(op="policy.engine.build") as span:
|
||||
with Hub.current.start_span(
|
||||
op="policy.engine.build"
|
||||
) as span, HIST_POLICIES_BUILD_TIME.labels(
|
||||
object_name=self.__pbm,
|
||||
object_type=f"{self.__pbm._meta.app_label}.{self.__pbm._meta.model_name}",
|
||||
user=self.request.user,
|
||||
).time():
|
||||
span: Span
|
||||
span.set_data("pbm", self.__pbm)
|
||||
span.set_data("request", self.request)
|
||||
|
|
|
@ -111,14 +111,30 @@ class PolicyBinding(SerializerModel):
|
|||
|
||||
return PolicyBindingSerializer
|
||||
|
||||
def __str__(self) -> str:
|
||||
suffix = ""
|
||||
@property
|
||||
def target_type(self) -> str:
|
||||
"""Get the target type this binding is applied to"""
|
||||
if self.policy:
|
||||
suffix = f"Policy {self.policy.name}"
|
||||
return "policy"
|
||||
if self.group:
|
||||
suffix = f"Group {self.group.name}"
|
||||
return "group"
|
||||
if self.user:
|
||||
suffix = f"User {self.user.name}"
|
||||
return "user"
|
||||
return "invalid"
|
||||
|
||||
@property
|
||||
def target_name(self) -> str:
|
||||
"""Get the target name this binding is applied to"""
|
||||
if self.policy:
|
||||
return self.policy.name
|
||||
if self.group:
|
||||
return self.group.name
|
||||
if self.user:
|
||||
return self.user.name
|
||||
return "invalid"
|
||||
|
||||
def __str__(self) -> str:
|
||||
suffix = f"{self.target_type.title()} {self.target_name}"
|
||||
try:
|
||||
return f"Binding from {self.target} #{self.order} to {suffix}"
|
||||
except PolicyBinding.target.RelatedObjectDoesNotExist: # pylint: disable=no-member
|
||||
|
|
|
@ -5,6 +5,7 @@ from traceback import format_tb
|
|||
from typing import Optional
|
||||
|
||||
from django.core.cache import cache
|
||||
from prometheus_client import Histogram
|
||||
from sentry_sdk.hub import Hub
|
||||
from sentry_sdk.tracing import Span
|
||||
from structlog.stdlib import get_logger
|
||||
|
@ -19,6 +20,18 @@ TRACEBACK_HEADER = "Traceback (most recent call last):\n"
|
|||
|
||||
FORK_CTX = get_context("fork")
|
||||
PROCESS_CLASS = FORK_CTX.Process
|
||||
HIST_POLICIES_EXECUTION_TIME = Histogram(
|
||||
"authentik_policies_execution_time",
|
||||
"Execution times for single policies",
|
||||
[
|
||||
"binding_order",
|
||||
"binding_target_type",
|
||||
"binding_target_name",
|
||||
"object_name",
|
||||
"object_type",
|
||||
"user",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def cache_key(binding: PolicyBinding, request: PolicyRequest) -> str:
|
||||
|
@ -121,7 +134,14 @@ class PolicyProcess(PROCESS_CLASS):
|
|||
"""Task wrapper to run policy checking"""
|
||||
with Hub.current.start_span(
|
||||
op="policy.process.execute",
|
||||
) as span:
|
||||
) as span, HIST_POLICIES_EXECUTION_TIME.labels(
|
||||
binding_order=self.binding.order,
|
||||
binding_target_type=self.binding.target_type,
|
||||
binding_target_name=self.binding.target_name,
|
||||
object_name=self.request.obj,
|
||||
object_type=f"{self.request.obj._meta.app_label}.{self.request.obj._meta.model_name}",
|
||||
user=str(self.request.user),
|
||||
).time():
|
||||
span: Span
|
||||
span.set_data("policy", self.binding.policy)
|
||||
span.set_data("request", self.request)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
"""Metrics view"""
|
||||
from base64 import b64encode
|
||||
from typing import Callable
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connections
|
||||
|
@ -8,8 +9,30 @@ from django.http import HttpRequest, HttpResponse
|
|||
from django.views import View
|
||||
from django_prometheus.exports import ExportToDjangoView
|
||||
from django_redis import get_redis_connection
|
||||
from prometheus_client import Gauge
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from authentik.admin.api.workers import GAUGE_WORKERS
|
||||
from authentik.events.monitored_tasks import TaskInfo
|
||||
from authentik.root.celery import CELERY_APP
|
||||
|
||||
|
||||
class UpdatingGauge(Gauge):
|
||||
"""Gauge which fetches its own value from an update function.
|
||||
|
||||
Update function is called on instantiate"""
|
||||
|
||||
def __init__(self, *args, update_func: Callable, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._update_func = update_func
|
||||
self.update()
|
||||
|
||||
def update(self):
|
||||
"""Set value from update function"""
|
||||
val = self._update_func()
|
||||
if val:
|
||||
self.set(val)
|
||||
|
||||
|
||||
class MetricsView(View):
|
||||
"""Wrapper around ExportToDjangoView, using http-basic auth"""
|
||||
|
@ -20,12 +43,18 @@ class MetricsView(View):
|
|||
auth_type, _, given_credentials = auth_header.partition(" ")
|
||||
credentials = f"monitor:{settings.SECRET_KEY}"
|
||||
expected = b64encode(str.encode(credentials)).decode()
|
||||
|
||||
if auth_type != "Basic" or given_credentials != expected:
|
||||
authed = auth_type == "Basic" and given_credentials == expected
|
||||
if not authed and not settings.DEBUG:
|
||||
response = HttpResponse(status=401)
|
||||
response["WWW-Authenticate"] = 'Basic realm="authentik-monitoring"'
|
||||
return response
|
||||
|
||||
count = len(CELERY_APP.control.ping(timeout=0.5))
|
||||
GAUGE_WORKERS.set(count)
|
||||
|
||||
for task in TaskInfo.all().values():
|
||||
task.set_prom_metrics()
|
||||
|
||||
return ExportToDjangoView(request)
|
||||
|
||||
|
||||
|
|
|
@ -256,7 +256,7 @@ CHANNEL_LAYERS = {
|
|||
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.postgresql",
|
||||
"ENGINE": "django_prometheus.db.backends.postgresql",
|
||||
"HOST": CONFIG.y("postgresql.host"),
|
||||
"NAME": CONFIG.y("postgresql.name"),
|
||||
"USER": CONFIG.y("postgresql.user"),
|
||||
|
@ -334,6 +334,10 @@ CELERY_RESULT_BACKEND = (
|
|||
DBBACKUP_STORAGE = "django.core.files.storage.FileSystemStorage"
|
||||
DBBACKUP_STORAGE_OPTIONS = {"location": "./backups" if DEBUG else "/backups"}
|
||||
DBBACKUP_FILENAME_TEMPLATE = "authentik-backup-{datetime}.sql"
|
||||
DBBACKUP_CONNECTOR_MAPPING = {
|
||||
"django_prometheus.db.backends.postgresql": "dbbackup.db.postgresql.PgDumpConnector",
|
||||
}
|
||||
|
||||
if CONFIG.y("postgresql.s3_backup"):
|
||||
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
|
||||
DBBACKUP_STORAGE_OPTIONS = {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
openapi: 3.0.3
|
||||
info:
|
||||
title: authentik
|
||||
version: 2021.5.3
|
||||
version: 2021.5.4
|
||||
description: Making authentication simple.
|
||||
contact:
|
||||
email: hello@beryju.org
|
||||
|
|
Reference in a new issue