From f6b8171624578bcb6a1bcae4847d4a5ec8fa7d8c Mon Sep 17 00:00:00 2001 From: Jens Langhammer Date: Fri, 16 Oct 2020 11:31:31 +0200 Subject: [PATCH] outposts: improve controller error handling --- passbook/outposts/controllers/base.py | 5 ++ passbook/outposts/controllers/docker.py | 75 +++++++++++---------- passbook/outposts/controllers/kubernetes.py | 30 +++++---- 3 files changed, 61 insertions(+), 49 deletions(-) diff --git a/passbook/outposts/controllers/base.py b/passbook/outposts/controllers/base.py index 568eb4469..e7c9e250c 100644 --- a/passbook/outposts/controllers/base.py +++ b/passbook/outposts/controllers/base.py @@ -3,9 +3,14 @@ from typing import Dict from structlog import get_logger +from passbook.lib.sentry import SentryIgnoredException from passbook.outposts.models import Outpost +class ControllerException(SentryIgnoredException): + """Exception raise when anything fails during controller run""" + + class BaseController: """Base Outpost deployment controller""" diff --git a/passbook/outposts/controllers/docker.py b/passbook/outposts/controllers/docker.py index fd133c360..b1b281f6d 100644 --- a/passbook/outposts/controllers/docker.py +++ b/passbook/outposts/controllers/docker.py @@ -2,12 +2,12 @@ from typing import Dict, Tuple from docker import DockerClient, from_env -from docker.errors import NotFound +from docker.errors import DockerException, NotFound from docker.models.containers import Container from yaml import safe_dump from passbook import __version__ -from passbook.outposts.controllers.base import BaseController +from passbook.outposts.controllers.base import BaseController, ControllerException class DockerController(BaseController): @@ -62,43 +62,46 @@ class DockerController(BaseController): ) def run(self): - container, has_been_created = self._get_container() - if has_been_created: - return None - # Check if the container is out of date, delete it and retry - if len(container.image.tags) > 0: - tag: str = container.image.tags[0] - _, _, version = tag.partition(":") - if version != __version__: - self.logger.info( - "Container has mismatched version, re-creating...", - has=version, - should=__version__, - ) + try: + container, has_been_created = self._get_container() + if has_been_created: + return None + # Check if the container is out of date, delete it and retry + if len(container.image.tags) > 0: + tag: str = container.image.tags[0] + _, _, version = tag.partition(":") + if version != __version__: + self.logger.info( + "Container has mismatched version, re-creating...", + has=version, + should=__version__, + ) + container.kill() + container.remove(force=True) + return self.run() + # Check that container values match our values + if self._comp_env(container): + self.logger.info("Container has outdated config, re-creating...") container.kill() container.remove(force=True) return self.run() - # Check that container values match our values - if self._comp_env(container): - self.logger.info("Container has outdated config, re-creating...") - container.kill() - container.remove(force=True) - return self.run() - # Check that container is healthy - if ( - container.status == "running" - and container.attrs.get("State", {}).get("Health", {}).get("Status", "") - != "healthy" - ): - # At this point we know the config is correct, but the container isn't healthy, - # so we just restart it with the same config - self.logger.info("Container is unhealthy, restarting...") - container.restart() - # Check that container is running - if container.status != "running": - self.logger.info("Container is not running, restarting...") - container.start() - return None + # Check that container is healthy + if ( + container.status == "running" + and container.attrs.get("State", {}).get("Health", {}).get("Status", "") + != "healthy" + ): + # At this point we know the config is correct, but the container isn't healthy, + # so we just restart it with the same config + self.logger.info("Container is unhealthy, restarting...") + container.restart() + # Check that container is running + if container.status != "running": + self.logger.info("Container is not running, restarting...") + container.start() + return None + except DockerException as exc: + raise ControllerException from exc def get_static_deployment(self) -> str: """Generate docker-compose yaml for proxy, version 3.5""" diff --git a/passbook/outposts/controllers/kubernetes.py b/passbook/outposts/controllers/kubernetes.py index ed28bd636..3cac744f8 100644 --- a/passbook/outposts/controllers/kubernetes.py +++ b/passbook/outposts/controllers/kubernetes.py @@ -1,11 +1,12 @@ """Kubernetes deployment controller""" from io import StringIO +from kubernetes.client import OpenApiException from kubernetes.config import load_incluster_config, load_kube_config from kubernetes.config.config_exception import ConfigException from yaml import dump_all -from passbook.outposts.controllers.base import BaseController +from passbook.outposts.controllers.base import BaseController, ControllerException from passbook.outposts.controllers.k8s.deployment import DeploymentReconciler from passbook.outposts.controllers.k8s.secret import SecretReconciler from passbook.outposts.controllers.k8s.service import ServiceReconciler @@ -23,21 +24,24 @@ class KubernetesController(BaseController): def run(self): """Called by scheduled task to reconcile deployment/service/etc""" - namespace = self.outpost.config.kubernetes_namespace + try: + namespace = self.outpost.config.kubernetes_namespace - secret_reconciler = SecretReconciler(self.outpost) - secret_reconciler.namespace = namespace - secret_reconciler.run() + secret_reconciler = SecretReconciler(self.outpost) + secret_reconciler.namespace = namespace + secret_reconciler.run() - deployment_reconciler = DeploymentReconciler(self.outpost) - deployment_reconciler.namespace = namespace - deployment_reconciler.deployment_ports = self.deployment_ports - deployment_reconciler.run() + deployment_reconciler = DeploymentReconciler(self.outpost) + deployment_reconciler.namespace = namespace + deployment_reconciler.deployment_ports = self.deployment_ports + deployment_reconciler.run() - service_reconciler = ServiceReconciler(self.outpost) - service_reconciler.namespace = namespace - service_reconciler.deployment_ports = self.deployment_ports - service_reconciler.run() + service_reconciler = ServiceReconciler(self.outpost) + service_reconciler.namespace = namespace + service_reconciler.deployment_ports = self.deployment_ports + service_reconciler.run() + except OpenApiException as exc: + raise ControllerException from exc def get_static_deployment(self) -> str: secret_reconciler = SecretReconciler(self.outpost)