Prometheus metrics (#914)

* admin: add worker metrics

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* admin: add version metrics

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* events: add gauge for system tasks

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* outposts: add gauge for last hello and connection status

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* root: re-add prometheus metrics to database

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* root: allow access to metrics without credentials when debug is on

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* root: add UpdatingGauge to auto-set value on load

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* flows: add metrics for cache and building

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* policies: add metrics for policy engine

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* events: add histogram for task durations

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* events: revert to gauge because values are updated on export view

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* core: add gauge to count all models

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>

* events: add metrics for events

Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>
This commit is contained in:
Jens L 2021-05-23 20:29:34 +02:00 committed by GitHub
parent a5cd9fa141
commit 53e2b2c784
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 258 additions and 20 deletions

View file

@ -22,7 +22,7 @@ class TaskSerializer(PassiveSerializer):
task_name = CharField() task_name = CharField()
task_description = CharField() task_description = CharField()
task_finish_timestamp = DateTimeField(source="finish_timestamp") task_finish_timestamp = DateTimeField(source="finish_time")
status = ChoiceField( status = ChoiceField(
source="result.status.name", source="result.status.name",

View file

@ -1,5 +1,6 @@
"""authentik administration overview""" """authentik administration overview"""
from drf_spectacular.utils import extend_schema, inline_serializer from drf_spectacular.utils import extend_schema, inline_serializer
from prometheus_client import Gauge
from rest_framework.fields import IntegerField from rest_framework.fields import IntegerField
from rest_framework.permissions import IsAdminUser from rest_framework.permissions import IsAdminUser
from rest_framework.request import Request from rest_framework.request import Request
@ -8,6 +9,8 @@ from rest_framework.views import APIView
from authentik.root.celery import CELERY_APP from authentik.root.celery import CELERY_APP
GAUGE_WORKERS = Gauge("authentik_admin_workers", "Currently connected workers")
class WorkerView(APIView): class WorkerView(APIView):
"""Get currently connected worker count.""" """Get currently connected worker count."""
@ -19,4 +22,5 @@ class WorkerView(APIView):
) )
def get(self, request: Request) -> Response: def get(self, request: Request) -> Response:
"""Get currently connected worker count.""" """Get currently connected worker count."""
return Response({"count": len(CELERY_APP.control.ping(timeout=0.5))}) count = len(CELERY_APP.control.ping(timeout=0.5))
return Response({"count": count})

View file

@ -1,13 +1,15 @@
"""authentik admin tasks""" """authentik admin tasks"""
import re import re
from os import environ
from django.core.cache import cache from django.core.cache import cache
from django.core.validators import URLValidator from django.core.validators import URLValidator
from packaging.version import parse from packaging.version import parse
from prometheus_client import Info
from requests import RequestException, get from requests import RequestException, get
from structlog.stdlib import get_logger from structlog.stdlib import get_logger
from authentik import __version__ from authentik import ENV_GIT_HASH_KEY, __version__
from authentik.events.models import Event, EventAction from authentik.events.models import Event, EventAction
from authentik.events.monitored_tasks import MonitoredTask, TaskResult, TaskResultStatus from authentik.events.monitored_tasks import MonitoredTask, TaskResult, TaskResultStatus
from authentik.root.celery import CELERY_APP from authentik.root.celery import CELERY_APP
@ -17,6 +19,18 @@ VERSION_CACHE_KEY = "authentik_latest_version"
VERSION_CACHE_TIMEOUT = 8 * 60 * 60 # 8 hours VERSION_CACHE_TIMEOUT = 8 * 60 * 60 # 8 hours
# Chop of the first ^ because we want to search the entire string # Chop of the first ^ because we want to search the entire string
URL_FINDER = URLValidator.regex.pattern[1:] URL_FINDER = URLValidator.regex.pattern[1:]
PROM_INFO = Info("authentik_version", "Currently running authentik version")
def _set_prom_info():
"""Set prometheus info for version"""
PROM_INFO.info(
{
"version": __version__,
"latest": cache.get(VERSION_CACHE_KEY, ""),
"build_hash": environ.get(ENV_GIT_HASH_KEY, ""),
}
)
@CELERY_APP.task(bind=True, base=MonitoredTask) @CELERY_APP.task(bind=True, base=MonitoredTask)
@ -36,6 +50,7 @@ def update_latest_version(self: MonitoredTask):
TaskResultStatus.SUCCESSFUL, ["Successfully updated latest Version"] TaskResultStatus.SUCCESSFUL, ["Successfully updated latest Version"]
) )
) )
_set_prom_info()
# Check if upstream version is newer than what we're running, # Check if upstream version is newer than what we're running,
# and if no event exists yet, create one. # and if no event exists yet, create one.
local_version = parse(__version__) local_version = parse(__version__)
@ -53,3 +68,6 @@ def update_latest_version(self: MonitoredTask):
except (RequestException, IndexError) as exc: except (RequestException, IndexError) as exc:
cache.set(VERSION_CACHE_KEY, "0.0.0", VERSION_CACHE_TIMEOUT) cache.set(VERSION_CACHE_KEY, "0.0.0", VERSION_CACHE_TIMEOUT)
self.set_status(TaskResult(TaskResultStatus.ERROR).with_error(exc)) self.set_status(TaskResult(TaskResultStatus.ERROR).with_error(exc))
_set_prom_info()

View file

@ -2,6 +2,10 @@
from importlib import import_module from importlib import import_module
from django.apps import AppConfig from django.apps import AppConfig
from django.db import ProgrammingError
from authentik.core.signals import GAUGE_MODELS
from authentik.lib.utils.reflection import get_apps
class AuthentikCoreConfig(AppConfig): class AuthentikCoreConfig(AppConfig):
@ -15,3 +19,12 @@ class AuthentikCoreConfig(AppConfig):
def ready(self): def ready(self):
import_module("authentik.core.signals") import_module("authentik.core.signals")
import_module("authentik.core.managed") import_module("authentik.core.managed")
try:
for app in get_apps():
for model in app.get_models():
GAUGE_MODELS.labels(
model_name=model._meta.model_name,
app=model._meta.app_label,
).set(model.objects.count())
except ProgrammingError:
pass

View file

@ -1,20 +1,31 @@
"""authentik core signals""" """authentik core signals"""
from django.core.cache import cache from django.core.cache import cache
from django.core.signals import Signal from django.core.signals import Signal
from django.db.models import Model
from django.db.models.signals import post_save from django.db.models.signals import post_save
from django.dispatch import receiver from django.dispatch import receiver
from prometheus_client import Gauge
# Arguments: user: User, password: str # Arguments: user: User, password: str
password_changed = Signal() password_changed = Signal()
GAUGE_MODELS = Gauge(
"authentik_models", "Count of various objects", ["model_name", "app"]
)
@receiver(post_save) @receiver(post_save)
# pylint: disable=unused-argument # pylint: disable=unused-argument
def post_save_application(sender, instance, created: bool, **_): def post_save_application(sender: type[Model], instance, created: bool, **_):
"""Clear user's application cache upon application creation""" """Clear user's application cache upon application creation"""
from authentik.core.api.applications import user_app_cache_key from authentik.core.api.applications import user_app_cache_key
from authentik.core.models import Application from authentik.core.models import Application
GAUGE_MODELS.labels(
model_name=sender._meta.model_name,
app=sender._meta.app_label,
).set(sender.objects.count())
if sender != Application: if sender != Application:
return return
if not created: # pragma: no cover if not created: # pragma: no cover

View file

@ -1,7 +1,10 @@
"""authentik events app""" """authentik events app"""
from datetime import timedelta
from importlib import import_module from importlib import import_module
from django.apps import AppConfig from django.apps import AppConfig
from django.db import ProgrammingError
from django.utils.timezone import datetime
class AuthentikEventsConfig(AppConfig): class AuthentikEventsConfig(AppConfig):
@ -13,3 +16,12 @@ class AuthentikEventsConfig(AppConfig):
def ready(self): def ready(self):
import_module("authentik.events.signals") import_module("authentik.events.signals")
try:
from authentik.events.models import Event
date_from = datetime.now() - timedelta(days=1)
for event in Event.objects.filter(created__gte=date_from):
event._set_prom_metrics()
except ProgrammingError:
pass

View file

@ -11,6 +11,7 @@ from django.http import HttpRequest
from django.utils.timezone import now from django.utils.timezone import now
from django.utils.translation import gettext as _ from django.utils.translation import gettext as _
from geoip2.errors import GeoIP2Error from geoip2.errors import GeoIP2Error
from prometheus_client import Gauge
from requests import RequestException, post from requests import RequestException, post
from structlog.stdlib import get_logger from structlog.stdlib import get_logger
@ -28,6 +29,11 @@ from authentik.policies.models import PolicyBindingModel
from authentik.stages.email.utils import TemplateEmailMessage from authentik.stages.email.utils import TemplateEmailMessage
LOGGER = get_logger("authentik.events") LOGGER = get_logger("authentik.events")
GAUGE_EVENTS = Gauge(
"authentik_events",
"Events in authentik",
["action", "user_username", "app", "client_ip"],
)
def default_event_duration(): def default_event_duration():
@ -169,6 +175,14 @@ class Event(ExpiringModel):
except GeoIP2Error as exc: except GeoIP2Error as exc:
LOGGER.warning("Failed to add geoIP Data to event", exc=exc) LOGGER.warning("Failed to add geoIP Data to event", exc=exc)
def _set_prom_metrics(self):
GAUGE_EVENTS.labels(
action=self.action,
user_username=self.user.get("username"),
app=self.app,
client_ip=self.client_ip,
).set(self.created.timestamp())
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if self._state.adding: if self._state.adding:
LOGGER.debug( LOGGER.debug(
@ -178,7 +192,8 @@ class Event(ExpiringModel):
client_ip=self.client_ip, client_ip=self.client_ip,
user=self.user, user=self.user,
) )
return super().save(*args, **kwargs) super().save(*args, **kwargs)
self._set_prom_metrics()
@property @property
def summary(self) -> str: def summary(self) -> str:

View file

@ -2,14 +2,22 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from timeit import default_timer
from traceback import format_tb from traceback import format_tb
from typing import Any, Optional from typing import Any, Optional
from celery import Task from celery import Task
from django.core.cache import cache from django.core.cache import cache
from prometheus_client import Gauge
from authentik.events.models import Event, EventAction from authentik.events.models import Event, EventAction
GAUGE_TASKS = Gauge(
"authentik_system_tasks",
"System tasks and their status",
["task_name", "task_uid", "status"],
)
class TaskResultStatus(Enum): class TaskResultStatus(Enum):
"""Possible states of tasks""" """Possible states of tasks"""
@ -43,7 +51,9 @@ class TaskInfo:
"""Info about a task run""" """Info about a task run"""
task_name: str task_name: str
finish_timestamp: datetime start_timestamp: float
finish_timestamp: float
finish_time: datetime
result: TaskResult result: TaskResult
@ -73,12 +83,25 @@ class TaskInfo:
"""Delete task info from cache""" """Delete task info from cache"""
return cache.delete(f"task_{self.task_name}") return cache.delete(f"task_{self.task_name}")
def set_prom_metrics(self):
"""Update prometheus metrics"""
start = default_timer()
if hasattr(self, "start_timestamp"):
start = self.start_timestamp
duration = max(self.finish_timestamp - start, 0)
GAUGE_TASKS.labels(
task_name=self.task_name,
task_uid=self.result.uid or "",
status=self.result.status,
).set(duration)
def save(self, timeout_hours=6): def save(self, timeout_hours=6):
"""Save task into cache""" """Save task into cache"""
key = f"task_{self.task_name}" key = f"task_{self.task_name}"
if self.result.uid: if self.result.uid:
key += f"_{self.result.uid}" key += f"_{self.result.uid}"
self.task_name += f"_{self.result.uid}" self.task_name += f"_{self.result.uid}"
self.set_prom_metrics()
cache.set(key, self, timeout=timeout_hours * 60 * 60) cache.set(key, self, timeout=timeout_hours * 60 * 60)
@ -98,6 +121,7 @@ class MonitoredTask(Task):
self._uid = None self._uid = None
self._result = TaskResult(status=TaskResultStatus.ERROR, messages=[]) self._result = TaskResult(status=TaskResultStatus.ERROR, messages=[])
self.result_timeout_hours = 6 self.result_timeout_hours = 6
self.start = default_timer()
def set_uid(self, uid: str): def set_uid(self, uid: str):
"""Set UID, so in the case of an unexpected error its saved correctly""" """Set UID, so in the case of an unexpected error its saved correctly"""
@ -117,7 +141,9 @@ class MonitoredTask(Task):
TaskInfo( TaskInfo(
task_name=self.__name__, task_name=self.__name__,
task_description=self.__doc__, task_description=self.__doc__,
finish_timestamp=datetime.now(), start_timestamp=self.start,
finish_timestamp=default_timer(),
finish_time=datetime.now(),
result=self._result, result=self._result,
task_call_module=self.__module__, task_call_module=self.__module__,
task_call_func=self.__name__, task_call_func=self.__name__,
@ -133,7 +159,9 @@ class MonitoredTask(Task):
TaskInfo( TaskInfo(
task_name=self.__name__, task_name=self.__name__,
task_description=self.__doc__, task_description=self.__doc__,
finish_timestamp=datetime.now(), start_timestamp=self.start,
finish_timestamp=default_timer(),
finish_time=datetime.now(),
result=self._result, result=self._result,
task_call_module=self.__module__, task_call_module=self.__module__,
task_call_func=self.__name__, task_call_func=self.__name__,
@ -151,3 +179,7 @@ class MonitoredTask(Task):
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
raise NotImplementedError raise NotImplementedError
for task in TaskInfo.all().values():
task.set_prom_metrics()

View file

@ -4,6 +4,7 @@ from typing import Any, Optional
from django.core.cache import cache from django.core.cache import cache
from django.http import HttpRequest from django.http import HttpRequest
from prometheus_client import Histogram
from sentry_sdk.hub import Hub from sentry_sdk.hub import Hub
from sentry_sdk.tracing import Span from sentry_sdk.tracing import Span
from structlog.stdlib import BoundLogger, get_logger from structlog.stdlib import BoundLogger, get_logger
@ -14,6 +15,7 @@ from authentik.flows.exceptions import EmptyFlowException, FlowNonApplicableExce
from authentik.flows.markers import ReevaluateMarker, StageMarker from authentik.flows.markers import ReevaluateMarker, StageMarker
from authentik.flows.models import Flow, FlowStageBinding, Stage from authentik.flows.models import Flow, FlowStageBinding, Stage
from authentik.policies.engine import PolicyEngine from authentik.policies.engine import PolicyEngine
from authentik.root.monitoring import UpdatingGauge
LOGGER = get_logger() LOGGER = get_logger()
PLAN_CONTEXT_PENDING_USER = "pending_user" PLAN_CONTEXT_PENDING_USER = "pending_user"
@ -21,6 +23,16 @@ PLAN_CONTEXT_SSO = "is_sso"
PLAN_CONTEXT_REDIRECT = "redirect" PLAN_CONTEXT_REDIRECT = "redirect"
PLAN_CONTEXT_APPLICATION = "application" PLAN_CONTEXT_APPLICATION = "application"
PLAN_CONTEXT_SOURCE = "source" PLAN_CONTEXT_SOURCE = "source"
GAUGE_FLOWS_CACHED = UpdatingGauge(
"authentik_flows_cached",
"Cached flows",
update_func=lambda: len(cache.keys("flow_*")),
)
HIST_FLOWS_PLAN_TIME = Histogram(
"authentik_flows_plan_time",
"Duration to build a plan for a flow",
["flow_slug"],
)
def cache_key(flow: Flow, user: Optional[User] = None) -> str: def cache_key(flow: Flow, user: Optional[User] = None) -> str:
@ -146,6 +158,7 @@ class FlowPlanner:
) )
plan = self._build_plan(user, request, default_context) plan = self._build_plan(user, request, default_context)
cache.set(cache_key(self.flow, user), plan) cache.set(cache_key(self.flow, user), plan)
GAUGE_FLOWS_CACHED.update()
if not plan.stages and not self.allow_empty_flows: if not plan.stages and not self.allow_empty_flows:
raise EmptyFlowException() raise EmptyFlowException()
return plan return plan
@ -158,7 +171,9 @@ class FlowPlanner:
) -> FlowPlan: ) -> FlowPlan:
"""Build flow plan by checking each stage in their respective """Build flow plan by checking each stage in their respective
order and checking the applied policies""" order and checking the applied policies"""
with Hub.current.start_span(op="flow.planner.build_plan") as span: with Hub.current.start_span(
op="flow.planner.build_plan"
) as span, HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug).time():
span: Span span: Span
span.set_data("flow", self.flow) span.set_data("flow", self.flow)
span.set_data("user", user) span.set_data("user", user)
@ -202,6 +217,7 @@ class FlowPlanner:
marker = ReevaluateMarker(binding=binding, user=user) marker = ReevaluateMarker(binding=binding, user=user)
if stage: if stage:
plan.append(stage, marker) plan.append(stage, marker)
HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug)
self._logger.debug( self._logger.debug(
"f(plan): finished building", "f(plan): finished building",
) )

View file

@ -8,11 +8,21 @@ from channels.exceptions import DenyConnection
from dacite import from_dict from dacite import from_dict
from dacite.data import Data from dacite.data import Data
from guardian.shortcuts import get_objects_for_user from guardian.shortcuts import get_objects_for_user
from prometheus_client import Gauge
from structlog.stdlib import get_logger from structlog.stdlib import get_logger
from authentik.core.channels import AuthJsonConsumer from authentik.core.channels import AuthJsonConsumer
from authentik.outposts.models import OUTPOST_HELLO_INTERVAL, Outpost, OutpostState from authentik.outposts.models import OUTPOST_HELLO_INTERVAL, Outpost, OutpostState
GAUGE_OUTPOSTS_CONNECTED = Gauge(
"authentik_outposts_connected", "Currently connected outposts", ["outpost", "uid"]
)
GAUGE_OUTPOSTS_LAST_UPDATE = Gauge(
"authentik_outposts_last_update",
"Last update from any outpost",
["outpost", "uid", "version"],
)
LOGGER = get_logger() LOGGER = get_logger()
@ -44,6 +54,8 @@ class OutpostConsumer(AuthJsonConsumer):
last_uid: Optional[str] = None last_uid: Optional[str] = None
first_msg = False
def connect(self): def connect(self):
super().connect() super().connect()
uuid = self.scope["url_route"]["kwargs"]["pk"] uuid = self.scope["url_route"]["kwargs"]["pk"]
@ -68,6 +80,10 @@ class OutpostConsumer(AuthJsonConsumer):
if self.channel_name in state.channel_ids: if self.channel_name in state.channel_ids:
state.channel_ids.remove(self.channel_name) state.channel_ids.remove(self.channel_name)
state.save() state.save()
GAUGE_OUTPOSTS_CONNECTED.labels(
outpost=self.outpost.name,
uid=self.last_uid,
).dec()
LOGGER.debug( LOGGER.debug(
"removed outpost instance from cache", "removed outpost instance from cache",
outpost=self.outpost, outpost=self.outpost,
@ -78,15 +94,29 @@ class OutpostConsumer(AuthJsonConsumer):
msg = from_dict(WebsocketMessage, content) msg = from_dict(WebsocketMessage, content)
uid = msg.args.get("uuid", self.channel_name) uid = msg.args.get("uuid", self.channel_name)
self.last_uid = uid self.last_uid = uid
state = OutpostState.for_instance_uid(self.outpost, uid) state = OutpostState.for_instance_uid(self.outpost, uid)
if self.channel_name not in state.channel_ids: if self.channel_name not in state.channel_ids:
state.channel_ids.append(self.channel_name) state.channel_ids.append(self.channel_name)
state.last_seen = datetime.now() state.last_seen = datetime.now()
if not self.first_msg:
GAUGE_OUTPOSTS_CONNECTED.labels(
outpost=self.outpost.name,
uid=self.last_uid,
).inc()
self.first_msg = True
if msg.instruction == WebsocketMessageInstruction.HELLO: if msg.instruction == WebsocketMessageInstruction.HELLO:
state.version = msg.args.get("version", None) state.version = msg.args.get("version", None)
state.build_hash = msg.args.get("buildHash", "") state.build_hash = msg.args.get("buildHash", "")
elif msg.instruction == WebsocketMessageInstruction.ACK: elif msg.instruction == WebsocketMessageInstruction.ACK:
return return
GAUGE_OUTPOSTS_LAST_UPDATE.labels(
outpost=self.outpost.name,
uid=self.last_uid or "",
version=state.version or "",
).set_to_current_time()
state.save(timeout=OUTPOST_HELLO_INTERVAL * 1.5) state.save(timeout=OUTPOST_HELLO_INTERVAL * 1.5)
response = WebsocketMessage(instruction=WebsocketMessageInstruction.ACK) response = WebsocketMessage(instruction=WebsocketMessageInstruction.ACK)

View file

@ -5,6 +5,7 @@ from typing import Iterator, Optional
from django.core.cache import cache from django.core.cache import cache
from django.http import HttpRequest from django.http import HttpRequest
from prometheus_client import Histogram
from sentry_sdk.hub import Hub from sentry_sdk.hub import Hub
from sentry_sdk.tracing import Span from sentry_sdk.tracing import Span
from structlog.stdlib import BoundLogger, get_logger from structlog.stdlib import BoundLogger, get_logger
@ -18,8 +19,19 @@ from authentik.policies.models import (
) )
from authentik.policies.process import PolicyProcess, cache_key from authentik.policies.process import PolicyProcess, cache_key
from authentik.policies.types import PolicyRequest, PolicyResult from authentik.policies.types import PolicyRequest, PolicyResult
from authentik.root.monitoring import UpdatingGauge
CURRENT_PROCESS = current_process() CURRENT_PROCESS = current_process()
GAUGE_POLICIES_CACHED = UpdatingGauge(
"authentik_policies_cached",
"Cached Policies",
update_func=lambda: len(cache.keys("policy_*")),
)
HIST_POLICIES_BUILD_TIME = Histogram(
"authentik_policies_build_time",
"Execution times complete policy result to an object",
["object_name", "object_type", "user"],
)
class PolicyProcessInfo: class PolicyProcessInfo:
@ -92,7 +104,13 @@ class PolicyEngine:
def build(self) -> "PolicyEngine": def build(self) -> "PolicyEngine":
"""Build wrapper which monitors performance""" """Build wrapper which monitors performance"""
with Hub.current.start_span(op="policy.engine.build") as span: with Hub.current.start_span(
op="policy.engine.build"
) as span, HIST_POLICIES_BUILD_TIME.labels(
object_name=self.__pbm,
object_type=f"{self.__pbm._meta.app_label}.{self.__pbm._meta.model_name}",
user=self.request.user,
).time():
span: Span span: Span
span.set_data("pbm", self.__pbm) span.set_data("pbm", self.__pbm)
span.set_data("request", self.request) span.set_data("request", self.request)

View file

@ -111,14 +111,30 @@ class PolicyBinding(SerializerModel):
return PolicyBindingSerializer return PolicyBindingSerializer
def __str__(self) -> str: @property
suffix = "" def target_type(self) -> str:
"""Get the target type this binding is applied to"""
if self.policy: if self.policy:
suffix = f"Policy {self.policy.name}" return "policy"
if self.group: if self.group:
suffix = f"Group {self.group.name}" return "group"
if self.user: if self.user:
suffix = f"User {self.user.name}" return "user"
return "invalid"
@property
def target_name(self) -> str:
"""Get the target name this binding is applied to"""
if self.policy:
return self.policy.name
if self.group:
return self.group.name
if self.user:
return self.user.name
return "invalid"
def __str__(self) -> str:
suffix = f"{self.target_type.title()} {self.target_name}"
try: try:
return f"Binding from {self.target} #{self.order} to {suffix}" return f"Binding from {self.target} #{self.order} to {suffix}"
except PolicyBinding.target.RelatedObjectDoesNotExist: # pylint: disable=no-member except PolicyBinding.target.RelatedObjectDoesNotExist: # pylint: disable=no-member

View file

@ -5,6 +5,7 @@ from traceback import format_tb
from typing import Optional from typing import Optional
from django.core.cache import cache from django.core.cache import cache
from prometheus_client import Histogram
from sentry_sdk.hub import Hub from sentry_sdk.hub import Hub
from sentry_sdk.tracing import Span from sentry_sdk.tracing import Span
from structlog.stdlib import get_logger from structlog.stdlib import get_logger
@ -19,6 +20,18 @@ TRACEBACK_HEADER = "Traceback (most recent call last):\n"
FORK_CTX = get_context("fork") FORK_CTX = get_context("fork")
PROCESS_CLASS = FORK_CTX.Process PROCESS_CLASS = FORK_CTX.Process
HIST_POLICIES_EXECUTION_TIME = Histogram(
"authentik_policies_execution_time",
"Execution times for single policies",
[
"binding_order",
"binding_target_type",
"binding_target_name",
"object_name",
"object_type",
"user",
],
)
def cache_key(binding: PolicyBinding, request: PolicyRequest) -> str: def cache_key(binding: PolicyBinding, request: PolicyRequest) -> str:
@ -121,7 +134,14 @@ class PolicyProcess(PROCESS_CLASS):
"""Task wrapper to run policy checking""" """Task wrapper to run policy checking"""
with Hub.current.start_span( with Hub.current.start_span(
op="policy.process.execute", op="policy.process.execute",
) as span: ) as span, HIST_POLICIES_EXECUTION_TIME.labels(
binding_order=self.binding.order,
binding_target_type=self.binding.target_type,
binding_target_name=self.binding.target_name,
object_name=self.request.obj,
object_type=f"{self.request.obj._meta.app_label}.{self.request.obj._meta.model_name}",
user=str(self.request.user),
).time():
span: Span span: Span
span.set_data("policy", self.binding.policy) span.set_data("policy", self.binding.policy)
span.set_data("request", self.request) span.set_data("request", self.request)

View file

@ -1,5 +1,6 @@
"""Metrics view""" """Metrics view"""
from base64 import b64encode from base64 import b64encode
from typing import Callable
from django.conf import settings from django.conf import settings
from django.db import connections from django.db import connections
@ -8,8 +9,30 @@ from django.http import HttpRequest, HttpResponse
from django.views import View from django.views import View
from django_prometheus.exports import ExportToDjangoView from django_prometheus.exports import ExportToDjangoView
from django_redis import get_redis_connection from django_redis import get_redis_connection
from prometheus_client import Gauge
from redis.exceptions import RedisError from redis.exceptions import RedisError
from authentik.admin.api.workers import GAUGE_WORKERS
from authentik.events.monitored_tasks import TaskInfo
from authentik.root.celery import CELERY_APP
class UpdatingGauge(Gauge):
"""Gauge which fetches its own value from an update function.
Update function is called on instantiate"""
def __init__(self, *args, update_func: Callable, **kwargs):
super().__init__(*args, **kwargs)
self._update_func = update_func
self.update()
def update(self):
"""Set value from update function"""
val = self._update_func()
if val:
self.set(val)
class MetricsView(View): class MetricsView(View):
"""Wrapper around ExportToDjangoView, using http-basic auth""" """Wrapper around ExportToDjangoView, using http-basic auth"""
@ -20,12 +43,18 @@ class MetricsView(View):
auth_type, _, given_credentials = auth_header.partition(" ") auth_type, _, given_credentials = auth_header.partition(" ")
credentials = f"monitor:{settings.SECRET_KEY}" credentials = f"monitor:{settings.SECRET_KEY}"
expected = b64encode(str.encode(credentials)).decode() expected = b64encode(str.encode(credentials)).decode()
authed = auth_type == "Basic" and given_credentials == expected
if auth_type != "Basic" or given_credentials != expected: if not authed and not settings.DEBUG:
response = HttpResponse(status=401) response = HttpResponse(status=401)
response["WWW-Authenticate"] = 'Basic realm="authentik-monitoring"' response["WWW-Authenticate"] = 'Basic realm="authentik-monitoring"'
return response return response
count = len(CELERY_APP.control.ping(timeout=0.5))
GAUGE_WORKERS.set(count)
for task in TaskInfo.all().values():
task.set_prom_metrics()
return ExportToDjangoView(request) return ExportToDjangoView(request)

View file

@ -256,7 +256,7 @@ CHANNEL_LAYERS = {
DATABASES = { DATABASES = {
"default": { "default": {
"ENGINE": "django.db.backends.postgresql", "ENGINE": "django_prometheus.db.backends.postgresql",
"HOST": CONFIG.y("postgresql.host"), "HOST": CONFIG.y("postgresql.host"),
"NAME": CONFIG.y("postgresql.name"), "NAME": CONFIG.y("postgresql.name"),
"USER": CONFIG.y("postgresql.user"), "USER": CONFIG.y("postgresql.user"),
@ -334,6 +334,10 @@ CELERY_RESULT_BACKEND = (
DBBACKUP_STORAGE = "django.core.files.storage.FileSystemStorage" DBBACKUP_STORAGE = "django.core.files.storage.FileSystemStorage"
DBBACKUP_STORAGE_OPTIONS = {"location": "./backups" if DEBUG else "/backups"} DBBACKUP_STORAGE_OPTIONS = {"location": "./backups" if DEBUG else "/backups"}
DBBACKUP_FILENAME_TEMPLATE = "authentik-backup-{datetime}.sql" DBBACKUP_FILENAME_TEMPLATE = "authentik-backup-{datetime}.sql"
DBBACKUP_CONNECTOR_MAPPING = {
"django_prometheus.db.backends.postgresql": "dbbackup.db.postgresql.PgDumpConnector",
}
if CONFIG.y("postgresql.s3_backup"): if CONFIG.y("postgresql.s3_backup"):
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
DBBACKUP_STORAGE_OPTIONS = { DBBACKUP_STORAGE_OPTIONS = {

View file

@ -1,7 +1,7 @@
openapi: 3.0.3 openapi: 3.0.3
info: info:
title: authentik title: authentik
version: 2021.5.3 version: 2021.5.4
description: Making authentication simple. description: Making authentication simple.
contact: contact:
email: hello@beryju.org email: hello@beryju.org