Prometheus metrics (#914)
* admin: add worker metrics Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * admin: add version metrics Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add gauge for system tasks Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * outposts: add gauge for last hello and connection status Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: re-add prometheus metrics to database Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: allow access to metrics without credentials when debug is on Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * root: add UpdatingGauge to auto-set value on load Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * flows: add metrics for cache and building Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * policies: add metrics for policy engine Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add histogram for task durations Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: revert to gauge because values are updated on export view Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * core: add gauge to count all models Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org> * events: add metrics for events Signed-off-by: Jens Langhammer <jens.langhammer@beryju.org>
This commit is contained in:
parent
a5cd9fa141
commit
53e2b2c784
|
@ -22,7 +22,7 @@ class TaskSerializer(PassiveSerializer):
|
||||||
|
|
||||||
task_name = CharField()
|
task_name = CharField()
|
||||||
task_description = CharField()
|
task_description = CharField()
|
||||||
task_finish_timestamp = DateTimeField(source="finish_timestamp")
|
task_finish_timestamp = DateTimeField(source="finish_time")
|
||||||
|
|
||||||
status = ChoiceField(
|
status = ChoiceField(
|
||||||
source="result.status.name",
|
source="result.status.name",
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
"""authentik administration overview"""
|
"""authentik administration overview"""
|
||||||
from drf_spectacular.utils import extend_schema, inline_serializer
|
from drf_spectacular.utils import extend_schema, inline_serializer
|
||||||
|
from prometheus_client import Gauge
|
||||||
from rest_framework.fields import IntegerField
|
from rest_framework.fields import IntegerField
|
||||||
from rest_framework.permissions import IsAdminUser
|
from rest_framework.permissions import IsAdminUser
|
||||||
from rest_framework.request import Request
|
from rest_framework.request import Request
|
||||||
|
@ -8,6 +9,8 @@ from rest_framework.views import APIView
|
||||||
|
|
||||||
from authentik.root.celery import CELERY_APP
|
from authentik.root.celery import CELERY_APP
|
||||||
|
|
||||||
|
GAUGE_WORKERS = Gauge("authentik_admin_workers", "Currently connected workers")
|
||||||
|
|
||||||
|
|
||||||
class WorkerView(APIView):
|
class WorkerView(APIView):
|
||||||
"""Get currently connected worker count."""
|
"""Get currently connected worker count."""
|
||||||
|
@ -19,4 +22,5 @@ class WorkerView(APIView):
|
||||||
)
|
)
|
||||||
def get(self, request: Request) -> Response:
|
def get(self, request: Request) -> Response:
|
||||||
"""Get currently connected worker count."""
|
"""Get currently connected worker count."""
|
||||||
return Response({"count": len(CELERY_APP.control.ping(timeout=0.5))})
|
count = len(CELERY_APP.control.ping(timeout=0.5))
|
||||||
|
return Response({"count": count})
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
"""authentik admin tasks"""
|
"""authentik admin tasks"""
|
||||||
import re
|
import re
|
||||||
|
from os import environ
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from django.core.validators import URLValidator
|
from django.core.validators import URLValidator
|
||||||
from packaging.version import parse
|
from packaging.version import parse
|
||||||
|
from prometheus_client import Info
|
||||||
from requests import RequestException, get
|
from requests import RequestException, get
|
||||||
from structlog.stdlib import get_logger
|
from structlog.stdlib import get_logger
|
||||||
|
|
||||||
from authentik import __version__
|
from authentik import ENV_GIT_HASH_KEY, __version__
|
||||||
from authentik.events.models import Event, EventAction
|
from authentik.events.models import Event, EventAction
|
||||||
from authentik.events.monitored_tasks import MonitoredTask, TaskResult, TaskResultStatus
|
from authentik.events.monitored_tasks import MonitoredTask, TaskResult, TaskResultStatus
|
||||||
from authentik.root.celery import CELERY_APP
|
from authentik.root.celery import CELERY_APP
|
||||||
|
@ -17,6 +19,18 @@ VERSION_CACHE_KEY = "authentik_latest_version"
|
||||||
VERSION_CACHE_TIMEOUT = 8 * 60 * 60 # 8 hours
|
VERSION_CACHE_TIMEOUT = 8 * 60 * 60 # 8 hours
|
||||||
# Chop of the first ^ because we want to search the entire string
|
# Chop of the first ^ because we want to search the entire string
|
||||||
URL_FINDER = URLValidator.regex.pattern[1:]
|
URL_FINDER = URLValidator.regex.pattern[1:]
|
||||||
|
PROM_INFO = Info("authentik_version", "Currently running authentik version")
|
||||||
|
|
||||||
|
|
||||||
|
def _set_prom_info():
|
||||||
|
"""Set prometheus info for version"""
|
||||||
|
PROM_INFO.info(
|
||||||
|
{
|
||||||
|
"version": __version__,
|
||||||
|
"latest": cache.get(VERSION_CACHE_KEY, ""),
|
||||||
|
"build_hash": environ.get(ENV_GIT_HASH_KEY, ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@CELERY_APP.task(bind=True, base=MonitoredTask)
|
@CELERY_APP.task(bind=True, base=MonitoredTask)
|
||||||
|
@ -36,6 +50,7 @@ def update_latest_version(self: MonitoredTask):
|
||||||
TaskResultStatus.SUCCESSFUL, ["Successfully updated latest Version"]
|
TaskResultStatus.SUCCESSFUL, ["Successfully updated latest Version"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
_set_prom_info()
|
||||||
# Check if upstream version is newer than what we're running,
|
# Check if upstream version is newer than what we're running,
|
||||||
# and if no event exists yet, create one.
|
# and if no event exists yet, create one.
|
||||||
local_version = parse(__version__)
|
local_version = parse(__version__)
|
||||||
|
@ -53,3 +68,6 @@ def update_latest_version(self: MonitoredTask):
|
||||||
except (RequestException, IndexError) as exc:
|
except (RequestException, IndexError) as exc:
|
||||||
cache.set(VERSION_CACHE_KEY, "0.0.0", VERSION_CACHE_TIMEOUT)
|
cache.set(VERSION_CACHE_KEY, "0.0.0", VERSION_CACHE_TIMEOUT)
|
||||||
self.set_status(TaskResult(TaskResultStatus.ERROR).with_error(exc))
|
self.set_status(TaskResult(TaskResultStatus.ERROR).with_error(exc))
|
||||||
|
|
||||||
|
|
||||||
|
_set_prom_info()
|
||||||
|
|
|
@ -2,6 +2,10 @@
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
from django.db import ProgrammingError
|
||||||
|
|
||||||
|
from authentik.core.signals import GAUGE_MODELS
|
||||||
|
from authentik.lib.utils.reflection import get_apps
|
||||||
|
|
||||||
|
|
||||||
class AuthentikCoreConfig(AppConfig):
|
class AuthentikCoreConfig(AppConfig):
|
||||||
|
@ -15,3 +19,12 @@ class AuthentikCoreConfig(AppConfig):
|
||||||
def ready(self):
|
def ready(self):
|
||||||
import_module("authentik.core.signals")
|
import_module("authentik.core.signals")
|
||||||
import_module("authentik.core.managed")
|
import_module("authentik.core.managed")
|
||||||
|
try:
|
||||||
|
for app in get_apps():
|
||||||
|
for model in app.get_models():
|
||||||
|
GAUGE_MODELS.labels(
|
||||||
|
model_name=model._meta.model_name,
|
||||||
|
app=model._meta.app_label,
|
||||||
|
).set(model.objects.count())
|
||||||
|
except ProgrammingError:
|
||||||
|
pass
|
||||||
|
|
|
@ -1,20 +1,31 @@
|
||||||
"""authentik core signals"""
|
"""authentik core signals"""
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from django.core.signals import Signal
|
from django.core.signals import Signal
|
||||||
|
from django.db.models import Model
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
|
from prometheus_client import Gauge
|
||||||
|
|
||||||
# Arguments: user: User, password: str
|
# Arguments: user: User, password: str
|
||||||
password_changed = Signal()
|
password_changed = Signal()
|
||||||
|
|
||||||
|
GAUGE_MODELS = Gauge(
|
||||||
|
"authentik_models", "Count of various objects", ["model_name", "app"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@receiver(post_save)
|
@receiver(post_save)
|
||||||
# pylint: disable=unused-argument
|
# pylint: disable=unused-argument
|
||||||
def post_save_application(sender, instance, created: bool, **_):
|
def post_save_application(sender: type[Model], instance, created: bool, **_):
|
||||||
"""Clear user's application cache upon application creation"""
|
"""Clear user's application cache upon application creation"""
|
||||||
from authentik.core.api.applications import user_app_cache_key
|
from authentik.core.api.applications import user_app_cache_key
|
||||||
from authentik.core.models import Application
|
from authentik.core.models import Application
|
||||||
|
|
||||||
|
GAUGE_MODELS.labels(
|
||||||
|
model_name=sender._meta.model_name,
|
||||||
|
app=sender._meta.app_label,
|
||||||
|
).set(sender.objects.count())
|
||||||
|
|
||||||
if sender != Application:
|
if sender != Application:
|
||||||
return
|
return
|
||||||
if not created: # pragma: no cover
|
if not created: # pragma: no cover
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
"""authentik events app"""
|
"""authentik events app"""
|
||||||
|
from datetime import timedelta
|
||||||
from importlib import import_module
|
from importlib import import_module
|
||||||
|
|
||||||
from django.apps import AppConfig
|
from django.apps import AppConfig
|
||||||
|
from django.db import ProgrammingError
|
||||||
|
from django.utils.timezone import datetime
|
||||||
|
|
||||||
|
|
||||||
class AuthentikEventsConfig(AppConfig):
|
class AuthentikEventsConfig(AppConfig):
|
||||||
|
@ -13,3 +16,12 @@ class AuthentikEventsConfig(AppConfig):
|
||||||
|
|
||||||
def ready(self):
|
def ready(self):
|
||||||
import_module("authentik.events.signals")
|
import_module("authentik.events.signals")
|
||||||
|
try:
|
||||||
|
from authentik.events.models import Event
|
||||||
|
|
||||||
|
date_from = datetime.now() - timedelta(days=1)
|
||||||
|
|
||||||
|
for event in Event.objects.filter(created__gte=date_from):
|
||||||
|
event._set_prom_metrics()
|
||||||
|
except ProgrammingError:
|
||||||
|
pass
|
||||||
|
|
|
@ -11,6 +11,7 @@ from django.http import HttpRequest
|
||||||
from django.utils.timezone import now
|
from django.utils.timezone import now
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
from geoip2.errors import GeoIP2Error
|
from geoip2.errors import GeoIP2Error
|
||||||
|
from prometheus_client import Gauge
|
||||||
from requests import RequestException, post
|
from requests import RequestException, post
|
||||||
from structlog.stdlib import get_logger
|
from structlog.stdlib import get_logger
|
||||||
|
|
||||||
|
@ -28,6 +29,11 @@ from authentik.policies.models import PolicyBindingModel
|
||||||
from authentik.stages.email.utils import TemplateEmailMessage
|
from authentik.stages.email.utils import TemplateEmailMessage
|
||||||
|
|
||||||
LOGGER = get_logger("authentik.events")
|
LOGGER = get_logger("authentik.events")
|
||||||
|
GAUGE_EVENTS = Gauge(
|
||||||
|
"authentik_events",
|
||||||
|
"Events in authentik",
|
||||||
|
["action", "user_username", "app", "client_ip"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def default_event_duration():
|
def default_event_duration():
|
||||||
|
@ -169,6 +175,14 @@ class Event(ExpiringModel):
|
||||||
except GeoIP2Error as exc:
|
except GeoIP2Error as exc:
|
||||||
LOGGER.warning("Failed to add geoIP Data to event", exc=exc)
|
LOGGER.warning("Failed to add geoIP Data to event", exc=exc)
|
||||||
|
|
||||||
|
def _set_prom_metrics(self):
|
||||||
|
GAUGE_EVENTS.labels(
|
||||||
|
action=self.action,
|
||||||
|
user_username=self.user.get("username"),
|
||||||
|
app=self.app,
|
||||||
|
client_ip=self.client_ip,
|
||||||
|
).set(self.created.timestamp())
|
||||||
|
|
||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
if self._state.adding:
|
if self._state.adding:
|
||||||
LOGGER.debug(
|
LOGGER.debug(
|
||||||
|
@ -178,7 +192,8 @@ class Event(ExpiringModel):
|
||||||
client_ip=self.client_ip,
|
client_ip=self.client_ip,
|
||||||
user=self.user,
|
user=self.user,
|
||||||
)
|
)
|
||||||
return super().save(*args, **kwargs)
|
super().save(*args, **kwargs)
|
||||||
|
self._set_prom_metrics()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def summary(self) -> str:
|
def summary(self) -> str:
|
||||||
|
|
|
@ -2,14 +2,22 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from timeit import default_timer
|
||||||
from traceback import format_tb
|
from traceback import format_tb
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
from celery import Task
|
from celery import Task
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
from prometheus_client import Gauge
|
||||||
|
|
||||||
from authentik.events.models import Event, EventAction
|
from authentik.events.models import Event, EventAction
|
||||||
|
|
||||||
|
GAUGE_TASKS = Gauge(
|
||||||
|
"authentik_system_tasks",
|
||||||
|
"System tasks and their status",
|
||||||
|
["task_name", "task_uid", "status"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TaskResultStatus(Enum):
|
class TaskResultStatus(Enum):
|
||||||
"""Possible states of tasks"""
|
"""Possible states of tasks"""
|
||||||
|
@ -43,7 +51,9 @@ class TaskInfo:
|
||||||
"""Info about a task run"""
|
"""Info about a task run"""
|
||||||
|
|
||||||
task_name: str
|
task_name: str
|
||||||
finish_timestamp: datetime
|
start_timestamp: float
|
||||||
|
finish_timestamp: float
|
||||||
|
finish_time: datetime
|
||||||
|
|
||||||
result: TaskResult
|
result: TaskResult
|
||||||
|
|
||||||
|
@ -73,12 +83,25 @@ class TaskInfo:
|
||||||
"""Delete task info from cache"""
|
"""Delete task info from cache"""
|
||||||
return cache.delete(f"task_{self.task_name}")
|
return cache.delete(f"task_{self.task_name}")
|
||||||
|
|
||||||
|
def set_prom_metrics(self):
|
||||||
|
"""Update prometheus metrics"""
|
||||||
|
start = default_timer()
|
||||||
|
if hasattr(self, "start_timestamp"):
|
||||||
|
start = self.start_timestamp
|
||||||
|
duration = max(self.finish_timestamp - start, 0)
|
||||||
|
GAUGE_TASKS.labels(
|
||||||
|
task_name=self.task_name,
|
||||||
|
task_uid=self.result.uid or "",
|
||||||
|
status=self.result.status,
|
||||||
|
).set(duration)
|
||||||
|
|
||||||
def save(self, timeout_hours=6):
|
def save(self, timeout_hours=6):
|
||||||
"""Save task into cache"""
|
"""Save task into cache"""
|
||||||
key = f"task_{self.task_name}"
|
key = f"task_{self.task_name}"
|
||||||
if self.result.uid:
|
if self.result.uid:
|
||||||
key += f"_{self.result.uid}"
|
key += f"_{self.result.uid}"
|
||||||
self.task_name += f"_{self.result.uid}"
|
self.task_name += f"_{self.result.uid}"
|
||||||
|
self.set_prom_metrics()
|
||||||
cache.set(key, self, timeout=timeout_hours * 60 * 60)
|
cache.set(key, self, timeout=timeout_hours * 60 * 60)
|
||||||
|
|
||||||
|
|
||||||
|
@ -98,6 +121,7 @@ class MonitoredTask(Task):
|
||||||
self._uid = None
|
self._uid = None
|
||||||
self._result = TaskResult(status=TaskResultStatus.ERROR, messages=[])
|
self._result = TaskResult(status=TaskResultStatus.ERROR, messages=[])
|
||||||
self.result_timeout_hours = 6
|
self.result_timeout_hours = 6
|
||||||
|
self.start = default_timer()
|
||||||
|
|
||||||
def set_uid(self, uid: str):
|
def set_uid(self, uid: str):
|
||||||
"""Set UID, so in the case of an unexpected error its saved correctly"""
|
"""Set UID, so in the case of an unexpected error its saved correctly"""
|
||||||
|
@ -117,7 +141,9 @@ class MonitoredTask(Task):
|
||||||
TaskInfo(
|
TaskInfo(
|
||||||
task_name=self.__name__,
|
task_name=self.__name__,
|
||||||
task_description=self.__doc__,
|
task_description=self.__doc__,
|
||||||
finish_timestamp=datetime.now(),
|
start_timestamp=self.start,
|
||||||
|
finish_timestamp=default_timer(),
|
||||||
|
finish_time=datetime.now(),
|
||||||
result=self._result,
|
result=self._result,
|
||||||
task_call_module=self.__module__,
|
task_call_module=self.__module__,
|
||||||
task_call_func=self.__name__,
|
task_call_func=self.__name__,
|
||||||
|
@ -133,7 +159,9 @@ class MonitoredTask(Task):
|
||||||
TaskInfo(
|
TaskInfo(
|
||||||
task_name=self.__name__,
|
task_name=self.__name__,
|
||||||
task_description=self.__doc__,
|
task_description=self.__doc__,
|
||||||
finish_timestamp=datetime.now(),
|
start_timestamp=self.start,
|
||||||
|
finish_timestamp=default_timer(),
|
||||||
|
finish_time=datetime.now(),
|
||||||
result=self._result,
|
result=self._result,
|
||||||
task_call_module=self.__module__,
|
task_call_module=self.__module__,
|
||||||
task_call_func=self.__name__,
|
task_call_func=self.__name__,
|
||||||
|
@ -151,3 +179,7 @@ class MonitoredTask(Task):
|
||||||
|
|
||||||
def run(self, *args, **kwargs):
|
def run(self, *args, **kwargs):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
for task in TaskInfo.all().values():
|
||||||
|
task.set_prom_metrics()
|
||||||
|
|
|
@ -4,6 +4,7 @@ from typing import Any, Optional
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
|
from prometheus_client import Histogram
|
||||||
from sentry_sdk.hub import Hub
|
from sentry_sdk.hub import Hub
|
||||||
from sentry_sdk.tracing import Span
|
from sentry_sdk.tracing import Span
|
||||||
from structlog.stdlib import BoundLogger, get_logger
|
from structlog.stdlib import BoundLogger, get_logger
|
||||||
|
@ -14,6 +15,7 @@ from authentik.flows.exceptions import EmptyFlowException, FlowNonApplicableExce
|
||||||
from authentik.flows.markers import ReevaluateMarker, StageMarker
|
from authentik.flows.markers import ReevaluateMarker, StageMarker
|
||||||
from authentik.flows.models import Flow, FlowStageBinding, Stage
|
from authentik.flows.models import Flow, FlowStageBinding, Stage
|
||||||
from authentik.policies.engine import PolicyEngine
|
from authentik.policies.engine import PolicyEngine
|
||||||
|
from authentik.root.monitoring import UpdatingGauge
|
||||||
|
|
||||||
LOGGER = get_logger()
|
LOGGER = get_logger()
|
||||||
PLAN_CONTEXT_PENDING_USER = "pending_user"
|
PLAN_CONTEXT_PENDING_USER = "pending_user"
|
||||||
|
@ -21,6 +23,16 @@ PLAN_CONTEXT_SSO = "is_sso"
|
||||||
PLAN_CONTEXT_REDIRECT = "redirect"
|
PLAN_CONTEXT_REDIRECT = "redirect"
|
||||||
PLAN_CONTEXT_APPLICATION = "application"
|
PLAN_CONTEXT_APPLICATION = "application"
|
||||||
PLAN_CONTEXT_SOURCE = "source"
|
PLAN_CONTEXT_SOURCE = "source"
|
||||||
|
GAUGE_FLOWS_CACHED = UpdatingGauge(
|
||||||
|
"authentik_flows_cached",
|
||||||
|
"Cached flows",
|
||||||
|
update_func=lambda: len(cache.keys("flow_*")),
|
||||||
|
)
|
||||||
|
HIST_FLOWS_PLAN_TIME = Histogram(
|
||||||
|
"authentik_flows_plan_time",
|
||||||
|
"Duration to build a plan for a flow",
|
||||||
|
["flow_slug"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def cache_key(flow: Flow, user: Optional[User] = None) -> str:
|
def cache_key(flow: Flow, user: Optional[User] = None) -> str:
|
||||||
|
@ -146,6 +158,7 @@ class FlowPlanner:
|
||||||
)
|
)
|
||||||
plan = self._build_plan(user, request, default_context)
|
plan = self._build_plan(user, request, default_context)
|
||||||
cache.set(cache_key(self.flow, user), plan)
|
cache.set(cache_key(self.flow, user), plan)
|
||||||
|
GAUGE_FLOWS_CACHED.update()
|
||||||
if not plan.stages and not self.allow_empty_flows:
|
if not plan.stages and not self.allow_empty_flows:
|
||||||
raise EmptyFlowException()
|
raise EmptyFlowException()
|
||||||
return plan
|
return plan
|
||||||
|
@ -158,7 +171,9 @@ class FlowPlanner:
|
||||||
) -> FlowPlan:
|
) -> FlowPlan:
|
||||||
"""Build flow plan by checking each stage in their respective
|
"""Build flow plan by checking each stage in their respective
|
||||||
order and checking the applied policies"""
|
order and checking the applied policies"""
|
||||||
with Hub.current.start_span(op="flow.planner.build_plan") as span:
|
with Hub.current.start_span(
|
||||||
|
op="flow.planner.build_plan"
|
||||||
|
) as span, HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug).time():
|
||||||
span: Span
|
span: Span
|
||||||
span.set_data("flow", self.flow)
|
span.set_data("flow", self.flow)
|
||||||
span.set_data("user", user)
|
span.set_data("user", user)
|
||||||
|
@ -202,6 +217,7 @@ class FlowPlanner:
|
||||||
marker = ReevaluateMarker(binding=binding, user=user)
|
marker = ReevaluateMarker(binding=binding, user=user)
|
||||||
if stage:
|
if stage:
|
||||||
plan.append(stage, marker)
|
plan.append(stage, marker)
|
||||||
|
HIST_FLOWS_PLAN_TIME.labels(flow_slug=self.flow.slug)
|
||||||
self._logger.debug(
|
self._logger.debug(
|
||||||
"f(plan): finished building",
|
"f(plan): finished building",
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,11 +8,21 @@ from channels.exceptions import DenyConnection
|
||||||
from dacite import from_dict
|
from dacite import from_dict
|
||||||
from dacite.data import Data
|
from dacite.data import Data
|
||||||
from guardian.shortcuts import get_objects_for_user
|
from guardian.shortcuts import get_objects_for_user
|
||||||
|
from prometheus_client import Gauge
|
||||||
from structlog.stdlib import get_logger
|
from structlog.stdlib import get_logger
|
||||||
|
|
||||||
from authentik.core.channels import AuthJsonConsumer
|
from authentik.core.channels import AuthJsonConsumer
|
||||||
from authentik.outposts.models import OUTPOST_HELLO_INTERVAL, Outpost, OutpostState
|
from authentik.outposts.models import OUTPOST_HELLO_INTERVAL, Outpost, OutpostState
|
||||||
|
|
||||||
|
GAUGE_OUTPOSTS_CONNECTED = Gauge(
|
||||||
|
"authentik_outposts_connected", "Currently connected outposts", ["outpost", "uid"]
|
||||||
|
)
|
||||||
|
GAUGE_OUTPOSTS_LAST_UPDATE = Gauge(
|
||||||
|
"authentik_outposts_last_update",
|
||||||
|
"Last update from any outpost",
|
||||||
|
["outpost", "uid", "version"],
|
||||||
|
)
|
||||||
|
|
||||||
LOGGER = get_logger()
|
LOGGER = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,6 +54,8 @@ class OutpostConsumer(AuthJsonConsumer):
|
||||||
|
|
||||||
last_uid: Optional[str] = None
|
last_uid: Optional[str] = None
|
||||||
|
|
||||||
|
first_msg = False
|
||||||
|
|
||||||
def connect(self):
|
def connect(self):
|
||||||
super().connect()
|
super().connect()
|
||||||
uuid = self.scope["url_route"]["kwargs"]["pk"]
|
uuid = self.scope["url_route"]["kwargs"]["pk"]
|
||||||
|
@ -68,6 +80,10 @@ class OutpostConsumer(AuthJsonConsumer):
|
||||||
if self.channel_name in state.channel_ids:
|
if self.channel_name in state.channel_ids:
|
||||||
state.channel_ids.remove(self.channel_name)
|
state.channel_ids.remove(self.channel_name)
|
||||||
state.save()
|
state.save()
|
||||||
|
GAUGE_OUTPOSTS_CONNECTED.labels(
|
||||||
|
outpost=self.outpost.name,
|
||||||
|
uid=self.last_uid,
|
||||||
|
).dec()
|
||||||
LOGGER.debug(
|
LOGGER.debug(
|
||||||
"removed outpost instance from cache",
|
"removed outpost instance from cache",
|
||||||
outpost=self.outpost,
|
outpost=self.outpost,
|
||||||
|
@ -78,15 +94,29 @@ class OutpostConsumer(AuthJsonConsumer):
|
||||||
msg = from_dict(WebsocketMessage, content)
|
msg = from_dict(WebsocketMessage, content)
|
||||||
uid = msg.args.get("uuid", self.channel_name)
|
uid = msg.args.get("uuid", self.channel_name)
|
||||||
self.last_uid = uid
|
self.last_uid = uid
|
||||||
|
|
||||||
state = OutpostState.for_instance_uid(self.outpost, uid)
|
state = OutpostState.for_instance_uid(self.outpost, uid)
|
||||||
if self.channel_name not in state.channel_ids:
|
if self.channel_name not in state.channel_ids:
|
||||||
state.channel_ids.append(self.channel_name)
|
state.channel_ids.append(self.channel_name)
|
||||||
state.last_seen = datetime.now()
|
state.last_seen = datetime.now()
|
||||||
|
|
||||||
|
if not self.first_msg:
|
||||||
|
GAUGE_OUTPOSTS_CONNECTED.labels(
|
||||||
|
outpost=self.outpost.name,
|
||||||
|
uid=self.last_uid,
|
||||||
|
).inc()
|
||||||
|
self.first_msg = True
|
||||||
|
|
||||||
if msg.instruction == WebsocketMessageInstruction.HELLO:
|
if msg.instruction == WebsocketMessageInstruction.HELLO:
|
||||||
state.version = msg.args.get("version", None)
|
state.version = msg.args.get("version", None)
|
||||||
state.build_hash = msg.args.get("buildHash", "")
|
state.build_hash = msg.args.get("buildHash", "")
|
||||||
elif msg.instruction == WebsocketMessageInstruction.ACK:
|
elif msg.instruction == WebsocketMessageInstruction.ACK:
|
||||||
return
|
return
|
||||||
|
GAUGE_OUTPOSTS_LAST_UPDATE.labels(
|
||||||
|
outpost=self.outpost.name,
|
||||||
|
uid=self.last_uid or "",
|
||||||
|
version=state.version or "",
|
||||||
|
).set_to_current_time()
|
||||||
state.save(timeout=OUTPOST_HELLO_INTERVAL * 1.5)
|
state.save(timeout=OUTPOST_HELLO_INTERVAL * 1.5)
|
||||||
|
|
||||||
response = WebsocketMessage(instruction=WebsocketMessageInstruction.ACK)
|
response = WebsocketMessage(instruction=WebsocketMessageInstruction.ACK)
|
||||||
|
|
|
@ -5,6 +5,7 @@ from typing import Iterator, Optional
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
from django.http import HttpRequest
|
from django.http import HttpRequest
|
||||||
|
from prometheus_client import Histogram
|
||||||
from sentry_sdk.hub import Hub
|
from sentry_sdk.hub import Hub
|
||||||
from sentry_sdk.tracing import Span
|
from sentry_sdk.tracing import Span
|
||||||
from structlog.stdlib import BoundLogger, get_logger
|
from structlog.stdlib import BoundLogger, get_logger
|
||||||
|
@ -18,8 +19,19 @@ from authentik.policies.models import (
|
||||||
)
|
)
|
||||||
from authentik.policies.process import PolicyProcess, cache_key
|
from authentik.policies.process import PolicyProcess, cache_key
|
||||||
from authentik.policies.types import PolicyRequest, PolicyResult
|
from authentik.policies.types import PolicyRequest, PolicyResult
|
||||||
|
from authentik.root.monitoring import UpdatingGauge
|
||||||
|
|
||||||
CURRENT_PROCESS = current_process()
|
CURRENT_PROCESS = current_process()
|
||||||
|
GAUGE_POLICIES_CACHED = UpdatingGauge(
|
||||||
|
"authentik_policies_cached",
|
||||||
|
"Cached Policies",
|
||||||
|
update_func=lambda: len(cache.keys("policy_*")),
|
||||||
|
)
|
||||||
|
HIST_POLICIES_BUILD_TIME = Histogram(
|
||||||
|
"authentik_policies_build_time",
|
||||||
|
"Execution times complete policy result to an object",
|
||||||
|
["object_name", "object_type", "user"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PolicyProcessInfo:
|
class PolicyProcessInfo:
|
||||||
|
@ -92,7 +104,13 @@ class PolicyEngine:
|
||||||
|
|
||||||
def build(self) -> "PolicyEngine":
|
def build(self) -> "PolicyEngine":
|
||||||
"""Build wrapper which monitors performance"""
|
"""Build wrapper which monitors performance"""
|
||||||
with Hub.current.start_span(op="policy.engine.build") as span:
|
with Hub.current.start_span(
|
||||||
|
op="policy.engine.build"
|
||||||
|
) as span, HIST_POLICIES_BUILD_TIME.labels(
|
||||||
|
object_name=self.__pbm,
|
||||||
|
object_type=f"{self.__pbm._meta.app_label}.{self.__pbm._meta.model_name}",
|
||||||
|
user=self.request.user,
|
||||||
|
).time():
|
||||||
span: Span
|
span: Span
|
||||||
span.set_data("pbm", self.__pbm)
|
span.set_data("pbm", self.__pbm)
|
||||||
span.set_data("request", self.request)
|
span.set_data("request", self.request)
|
||||||
|
|
|
@ -111,14 +111,30 @@ class PolicyBinding(SerializerModel):
|
||||||
|
|
||||||
return PolicyBindingSerializer
|
return PolicyBindingSerializer
|
||||||
|
|
||||||
def __str__(self) -> str:
|
@property
|
||||||
suffix = ""
|
def target_type(self) -> str:
|
||||||
|
"""Get the target type this binding is applied to"""
|
||||||
if self.policy:
|
if self.policy:
|
||||||
suffix = f"Policy {self.policy.name}"
|
return "policy"
|
||||||
if self.group:
|
if self.group:
|
||||||
suffix = f"Group {self.group.name}"
|
return "group"
|
||||||
if self.user:
|
if self.user:
|
||||||
suffix = f"User {self.user.name}"
|
return "user"
|
||||||
|
return "invalid"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def target_name(self) -> str:
|
||||||
|
"""Get the target name this binding is applied to"""
|
||||||
|
if self.policy:
|
||||||
|
return self.policy.name
|
||||||
|
if self.group:
|
||||||
|
return self.group.name
|
||||||
|
if self.user:
|
||||||
|
return self.user.name
|
||||||
|
return "invalid"
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
suffix = f"{self.target_type.title()} {self.target_name}"
|
||||||
try:
|
try:
|
||||||
return f"Binding from {self.target} #{self.order} to {suffix}"
|
return f"Binding from {self.target} #{self.order} to {suffix}"
|
||||||
except PolicyBinding.target.RelatedObjectDoesNotExist: # pylint: disable=no-member
|
except PolicyBinding.target.RelatedObjectDoesNotExist: # pylint: disable=no-member
|
||||||
|
|
|
@ -5,6 +5,7 @@ from traceback import format_tb
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
from prometheus_client import Histogram
|
||||||
from sentry_sdk.hub import Hub
|
from sentry_sdk.hub import Hub
|
||||||
from sentry_sdk.tracing import Span
|
from sentry_sdk.tracing import Span
|
||||||
from structlog.stdlib import get_logger
|
from structlog.stdlib import get_logger
|
||||||
|
@ -19,6 +20,18 @@ TRACEBACK_HEADER = "Traceback (most recent call last):\n"
|
||||||
|
|
||||||
FORK_CTX = get_context("fork")
|
FORK_CTX = get_context("fork")
|
||||||
PROCESS_CLASS = FORK_CTX.Process
|
PROCESS_CLASS = FORK_CTX.Process
|
||||||
|
HIST_POLICIES_EXECUTION_TIME = Histogram(
|
||||||
|
"authentik_policies_execution_time",
|
||||||
|
"Execution times for single policies",
|
||||||
|
[
|
||||||
|
"binding_order",
|
||||||
|
"binding_target_type",
|
||||||
|
"binding_target_name",
|
||||||
|
"object_name",
|
||||||
|
"object_type",
|
||||||
|
"user",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def cache_key(binding: PolicyBinding, request: PolicyRequest) -> str:
|
def cache_key(binding: PolicyBinding, request: PolicyRequest) -> str:
|
||||||
|
@ -121,7 +134,14 @@ class PolicyProcess(PROCESS_CLASS):
|
||||||
"""Task wrapper to run policy checking"""
|
"""Task wrapper to run policy checking"""
|
||||||
with Hub.current.start_span(
|
with Hub.current.start_span(
|
||||||
op="policy.process.execute",
|
op="policy.process.execute",
|
||||||
) as span:
|
) as span, HIST_POLICIES_EXECUTION_TIME.labels(
|
||||||
|
binding_order=self.binding.order,
|
||||||
|
binding_target_type=self.binding.target_type,
|
||||||
|
binding_target_name=self.binding.target_name,
|
||||||
|
object_name=self.request.obj,
|
||||||
|
object_type=f"{self.request.obj._meta.app_label}.{self.request.obj._meta.model_name}",
|
||||||
|
user=str(self.request.user),
|
||||||
|
).time():
|
||||||
span: Span
|
span: Span
|
||||||
span.set_data("policy", self.binding.policy)
|
span.set_data("policy", self.binding.policy)
|
||||||
span.set_data("request", self.request)
|
span.set_data("request", self.request)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
"""Metrics view"""
|
"""Metrics view"""
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import connections
|
from django.db import connections
|
||||||
|
@ -8,8 +9,30 @@ from django.http import HttpRequest, HttpResponse
|
||||||
from django.views import View
|
from django.views import View
|
||||||
from django_prometheus.exports import ExportToDjangoView
|
from django_prometheus.exports import ExportToDjangoView
|
||||||
from django_redis import get_redis_connection
|
from django_redis import get_redis_connection
|
||||||
|
from prometheus_client import Gauge
|
||||||
from redis.exceptions import RedisError
|
from redis.exceptions import RedisError
|
||||||
|
|
||||||
|
from authentik.admin.api.workers import GAUGE_WORKERS
|
||||||
|
from authentik.events.monitored_tasks import TaskInfo
|
||||||
|
from authentik.root.celery import CELERY_APP
|
||||||
|
|
||||||
|
|
||||||
|
class UpdatingGauge(Gauge):
|
||||||
|
"""Gauge which fetches its own value from an update function.
|
||||||
|
|
||||||
|
Update function is called on instantiate"""
|
||||||
|
|
||||||
|
def __init__(self, *args, update_func: Callable, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self._update_func = update_func
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
"""Set value from update function"""
|
||||||
|
val = self._update_func()
|
||||||
|
if val:
|
||||||
|
self.set(val)
|
||||||
|
|
||||||
|
|
||||||
class MetricsView(View):
|
class MetricsView(View):
|
||||||
"""Wrapper around ExportToDjangoView, using http-basic auth"""
|
"""Wrapper around ExportToDjangoView, using http-basic auth"""
|
||||||
|
@ -20,12 +43,18 @@ class MetricsView(View):
|
||||||
auth_type, _, given_credentials = auth_header.partition(" ")
|
auth_type, _, given_credentials = auth_header.partition(" ")
|
||||||
credentials = f"monitor:{settings.SECRET_KEY}"
|
credentials = f"monitor:{settings.SECRET_KEY}"
|
||||||
expected = b64encode(str.encode(credentials)).decode()
|
expected = b64encode(str.encode(credentials)).decode()
|
||||||
|
authed = auth_type == "Basic" and given_credentials == expected
|
||||||
if auth_type != "Basic" or given_credentials != expected:
|
if not authed and not settings.DEBUG:
|
||||||
response = HttpResponse(status=401)
|
response = HttpResponse(status=401)
|
||||||
response["WWW-Authenticate"] = 'Basic realm="authentik-monitoring"'
|
response["WWW-Authenticate"] = 'Basic realm="authentik-monitoring"'
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
count = len(CELERY_APP.control.ping(timeout=0.5))
|
||||||
|
GAUGE_WORKERS.set(count)
|
||||||
|
|
||||||
|
for task in TaskInfo.all().values():
|
||||||
|
task.set_prom_metrics()
|
||||||
|
|
||||||
return ExportToDjangoView(request)
|
return ExportToDjangoView(request)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -256,7 +256,7 @@ CHANNEL_LAYERS = {
|
||||||
|
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
"default": {
|
"default": {
|
||||||
"ENGINE": "django.db.backends.postgresql",
|
"ENGINE": "django_prometheus.db.backends.postgresql",
|
||||||
"HOST": CONFIG.y("postgresql.host"),
|
"HOST": CONFIG.y("postgresql.host"),
|
||||||
"NAME": CONFIG.y("postgresql.name"),
|
"NAME": CONFIG.y("postgresql.name"),
|
||||||
"USER": CONFIG.y("postgresql.user"),
|
"USER": CONFIG.y("postgresql.user"),
|
||||||
|
@ -334,6 +334,10 @@ CELERY_RESULT_BACKEND = (
|
||||||
DBBACKUP_STORAGE = "django.core.files.storage.FileSystemStorage"
|
DBBACKUP_STORAGE = "django.core.files.storage.FileSystemStorage"
|
||||||
DBBACKUP_STORAGE_OPTIONS = {"location": "./backups" if DEBUG else "/backups"}
|
DBBACKUP_STORAGE_OPTIONS = {"location": "./backups" if DEBUG else "/backups"}
|
||||||
DBBACKUP_FILENAME_TEMPLATE = "authentik-backup-{datetime}.sql"
|
DBBACKUP_FILENAME_TEMPLATE = "authentik-backup-{datetime}.sql"
|
||||||
|
DBBACKUP_CONNECTOR_MAPPING = {
|
||||||
|
"django_prometheus.db.backends.postgresql": "dbbackup.db.postgresql.PgDumpConnector",
|
||||||
|
}
|
||||||
|
|
||||||
if CONFIG.y("postgresql.s3_backup"):
|
if CONFIG.y("postgresql.s3_backup"):
|
||||||
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
|
DBBACKUP_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
|
||||||
DBBACKUP_STORAGE_OPTIONS = {
|
DBBACKUP_STORAGE_OPTIONS = {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
openapi: 3.0.3
|
openapi: 3.0.3
|
||||||
info:
|
info:
|
||||||
title: authentik
|
title: authentik
|
||||||
version: 2021.5.3
|
version: 2021.5.4
|
||||||
description: Making authentication simple.
|
description: Making authentication simple.
|
||||||
contact:
|
contact:
|
||||||
email: hello@beryju.org
|
email: hello@beryju.org
|
||||||
|
|
Reference in a new issue