Skip to content
Snippets Groups Projects
terminate_router.py 16.13 KiB
"""A workflow that terminates a router.

To terminate a router, the workflow `terminate_router` is used. The operator is presented with an input form that
requires once again a trouble ticket number. On top of this, there is also the option whether this workflow should
remove all configuration on the router, and whether IPAM entries related to this device should be removed.

The workflow consists of the following steps:

- Deprovision IPAM resources (if selected).
- Try to remove configuration form the router (if selected).
- Commit removal of configuration (if selected).
- For Nokia devices: remove interfaces from Netbox.
- Remove the device from LibreNMS.
- For PE routers: apply the archiving license in Kentik.
- Set the subscription status to `TERMINATED`.
"""

import ipaddress
import json
import logging

from orchestrator.forms import SubmitFormPage
from orchestrator.forms.validators import Label
from orchestrator.targets import Target
from orchestrator.types import SubscriptionLifecycle
from orchestrator.utils.errors import ProcessFailureError
from orchestrator.utils.json import json_dumps
from orchestrator.workflow import StepList, begin, conditional, done, step, workflow
from orchestrator.workflows.steps import (
    resync,
    set_status,
    store_process_subscription,
    unsync,
)
from orchestrator.workflows.utils import wrap_modify_initial_input_form
from pydantic_forms.types import FormGenerator, State, UUIDstr
from requests import HTTPError

from gso.products.product_blocks.router import RouterRole
from gso.products.product_types.router import Router
from gso.services import infoblox
from gso.services.kentik_client import KentikClient
from gso.services.librenms_client import LibreNMSClient
from gso.services.lso_client import LSOState, lso_interaction
from gso.services.mailer import send_mail
from gso.services.netbox_client import NetboxClient
from gso.settings import load_oss_params
from gso.utils.helpers import generate_inventory_for_routers
from gso.utils.shared_enums import Vendor
from gso.utils.types.tt_number import TTNumber

logger = logging.getLogger(__name__)


def initial_input_form_generator(subscription_id: UUIDstr) -> FormGenerator:
    """Let the operator decide whether to delete configuration on the router, and clear up IPAM resources."""
    router = Router.from_subscription(subscription_id)

    class TerminateForm(SubmitFormPage):
        if router.status == SubscriptionLifecycle.INITIAL:
            info_label_2: Label = (
                "This will immediately mark the subscription as terminated, preventing any other workflows from "
                "interacting with this product subscription."
            )
            info_label_3: Label = "ONLY EXECUTE THIS WORKFLOW WHEN YOU ARE ABSOLUTELY SURE WHAT YOU ARE DOING."

        tt_number: TTNumber
        termination_label: Label = "Please confirm whether configuration should get removed from the router."
        remove_configuration: bool = True
        update_ibgp_mesh_label: Label = "Please confirm whether the iBGP mesh should get updated."
        update_ibgp_mesh: bool = True

    user_input = yield TerminateForm
    return user_input.model_dump() | {
        "router_is_nokia": router.router.vendor == Vendor.NOKIA,
        "router_role": router.router.router_role,
    }


@step("Deprovision loopback IPs from IPAM")
def deprovision_loopback_ips(subscription: Router) -> dict:
    """Clear up the loopback addresses from IPAM."""
    infoblox.delete_host_by_ip(ipaddress.IPv4Address(subscription.router.router_lo_ipv4_address))

    return {"subscription": subscription}


@step("[DRY RUN] Remove configuration from router")
def remove_config_from_router_dry(subscription: Router, process_id: UUIDstr, tt_number: str) -> LSOState:
    """Remove configuration from the router, first as a dry run."""
    extra_vars = {
        "wfo_router_json": json.loads(json_dumps(subscription)),
        "dry_run": True,
        "verb": "terminate",
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - Terminating "
        f"{subscription.router.router_fqdn}",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/base_config.yaml",
        "inventory": {"all": {"hosts": {subscription.router.router_fqdn: None}}},
        "extra_vars": extra_vars,
    }


@step("[FOR REAL] Remove configuration from router")
def remove_config_from_router_real(subscription: Router, process_id: UUIDstr, tt_number: str) -> LSOState:
    """Remove configuration from the router."""
    extra_vars = {
        "wfo_router_json": json.loads(json_dumps(subscription)),
        "dry_run": False,
        "verb": "terminate",
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - Terminating "
        f"{subscription.router.router_fqdn}",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/base_config.yaml",
        "inventory": {"all": {"hosts": {subscription.router.router_fqdn: None}}},
        "extra_vars": extra_vars,
    }


@step("Remove Device from Netbox")
def remove_device_from_netbox(subscription: Router) -> dict[str, Router]:
    """Remove the device from Netbox."""
    NetboxClient().delete_device(subscription.router.router_fqdn)
    return {"subscription": subscription}


@step("[DRY RUN] Remove P router from all the PE routers")
def remove_p_from_all_pe_dry(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a dry run of removing the terminated router from all the PE routers."""
    extra_vars = {
        "dry_run": True,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from all the PE routers",
        "verb": "remove_p_from_pe",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.PE),
        "extra_vars": extra_vars,
    }


@step("[REAL RUN] Remove P router from all the PE routers")
def remove_p_from_all_pe_real(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a real run of removing the terminated router from all the PE routers."""
    extra_vars = {
        "dry_run": False,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from all the PE routers",
        "verb": "remove_p_from_pe",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.PE),
        "extra_vars": extra_vars,
    }


@step("[DRY RUN] Remove PE router from all the PE routers")
def remove_pe_from_all_pe_dry(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a dry run of removing the terminated PE router from the PE router mesh."""
    extra_vars = {
        "dry_run": True,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from all the PE routers",
        "verb": "remove_pe_from_pe",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.PE, exclude_routers=[subscription.router.router_fqdn]),
        "extra_vars": extra_vars,
    }


@step("[REAL RUN] Remove all PE routers from all the PE routers")
def remove_pe_from_all_pe_real(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a real run of removing terminated PE router from PE the router mesh."""
    extra_vars = {
        "dry_run": False,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from iBGP mesh",
        "verb": "remove_pe_from_pe",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.PE, exclude_routers=[subscription.router.router_fqdn]),
        "extra_vars": extra_vars,
    }


@step("[DRY RUN] Remove PE router from all the P routers")
def remove_pe_from_all_p_dry(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a dry run of removing PE router from all P routers."""
    extra_vars = {
        "dry_run": True,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from all the P routers",
        "verb": "remove_pe_from_p",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.P),
        "extra_vars": extra_vars,
    }


@step("[REAL RUN] Remove PE router from all P routers")
def remove_pe_from_all_p_real(subscription: Router, tt_number: str, process_id: UUIDstr) -> LSOState:
    """Perform a real run of removing PE router from all P routers."""
    extra_vars = {
        "dry_run": False,
        "subscription": json.loads(json_dumps(subscription)),
        "commit_comment": f"GSO_PROCESS_ID: {process_id} - TT_NUMBER: {tt_number} - "
        f"Remove {subscription.router.router_fqdn} from all the P routers",
        "verb": "remove_pe_from_p",
    }

    return {
        "playbook_name": "gap_ansible/playbooks/update_ibgp_mesh.yaml",
        "inventory": generate_inventory_for_routers(RouterRole.P),
        "extra_vars": extra_vars,
    }


@step("Remove Device from Librenms")
def remove_device_from_librenms(subscription: Router) -> State:
    """Remove the device from LibreNMS."""
    try:
        LibreNMSClient().remove_device(subscription.router.router_fqdn)
    except HTTPError as e:
        response = e.response.json()
        if response["message"] == f"Device {subscription.router.router_fqdn} not found":
            return {
                "librenms_error": str(e.response.json()),
                "librenms_device": "Device not found, please confirm nothing needs to be removed from LibreNMS",
            }
        raise ProcessFailureError(message="LibreNMS error", details=response) from None
    return {"librenms_device": "Device removed from LibreNMS"}


@step("Apply the archiving license in Kentik")
def kentik_apply_archive_license(subscription: Router, process_id: UUIDstr) -> State:
    """Apply the archiving license to a PE router in Kentik.

    This includes setting the flow rate to one flow per second, and the BGP type to `none`. Service Management will also
    be emailed to inform them of an archiving license being consumed. If this step is unsuccessful, an email is sent to
    inform them as well. This could be caused by a device being missing in Kentik, or having no more licenses available.
    """
    kentik_client = KentikClient()
    oss_params = load_oss_params()

    kentik_device = kentik_client.get_device_by_name(subscription.router.router_fqdn)
    #  Attempt fetching the device from Kentik.
    if "id" not in kentik_device and subscription.router.vendor == Vendor.JUNIPER:
        #  If the device is a Juniper, there is a chance that the FQDN is written with underscores as delimiter.
        #  We try again when fetching the device was unsuccessful the first time.
        kentik_device = kentik_client.get_device_by_name(subscription.router.router_fqdn.replace(".", "_"))
    #  If still unsuccessful after two attempts, we give up and alert externally by email.
    if "id" not in kentik_device:
        send_mail(
            "[GSO][Kentik] Failed to terminate router",
            f"During the execution of a router termination workflow in GSO, we were unable to find the device "
            f"{subscription.router.router_fqdn}.\nPlease update this device manually in Kentik.\n\n"
            f"For reference, the workflow run can be found at: "
            f"{oss_params.GENERAL.public_hostname}/workflows/{process_id}\n\nRegards, the GÉANT Automation Platform.",
            destination=oss_params.EMAIL.kentik_email_destinations,
        )
        return {"kentik_device": f"Device {subscription.router.router_fqdn} not found in Kentik, no license applied!"}

    #  Send an email if we are out of archiving licenses.
    kentik_archive_plan = kentik_client.get_plan_by_name(oss_params.KENTIK.archive_license_key)
    if len(kentik_archive_plan["devices"]) >= kentik_archive_plan["max_devices"]:
        send_mail(
            "[GSO][Kentik] Failed to apply historical license",
            f"During the execution of a router termination workflow on GSO, we were unable to apply a historical "
            f"license to device {subscription.router.router_fqdn}.\nNo changes have been made, please update this "
            f"device manually.\nIt appears we have run out of available historical licenses, all "
            f"{kentik_archive_plan["max_devices"]} licenses are currently in use.\n\nFor reference, the workflow run "
            f"can be found at: {oss_params.GENERAL.public_hostname}/workflows/{process_id}\n\nRegards, the GÉANT "
            f"Automation Platform.",
            destination=oss_params.EMAIL.kentik_email_destinations,
        )
        return {"kentik_device": "No more archiving licenses available. Nothing is updated in Kentik."}

    updated_device = {
        "device": {"plan_id": kentik_archive_plan["id"], "device_sample_rate": 1, "device_bgp_type": "none"}
    }
    kentik_device = kentik_client.update_device(kentik_device["id"], updated_device)
    send_mail(
        "[GSO][Kentik] Historical license has been applied",
        f"A historical license has been applied to device {subscription.router.router_fqdn}.\n"
        f"Currently, {len(kentik_archive_plan["devices"]) + 1} out of {kentik_archive_plan["max_devices"]} historical "
        f"licenses are in use.\n\nFor reference, the workflow run can be found at: "
        f"{oss_params.GENERAL.public_hostname}/workflows/{process_id}\n\nRegards, the GÉANT Automation Platform.",
        destination=oss_params.EMAIL.kentik_email_destinations,
    )

    return {"kentik_device": kentik_device}


@workflow(
    "Terminate router",
    initial_input_form=wrap_modify_initial_input_form(initial_input_form_generator),
    target=Target.TERMINATE,
)
def terminate_router() -> StepList:
    """Terminate a router subscription.

    * Let the operator decide whether to delete IPAM resources, and remove configuration from the router
    * Clear up IPAM resources, if selected by the operator
    * Disable and delete configuration on the router, if selected by the operator
    * Mark the subscription as terminated in the service database
    """
    run_config_steps = conditional(lambda state: state["remove_configuration"])
    update_ibgp_mesh = conditional(lambda state: state["update_ibgp_mesh"])
    router_is_nokia = conditional(lambda state: state["router_is_nokia"])
    router_is_pe = conditional(lambda state: state["router_role"] == RouterRole.PE)
    router_is_p = conditional(lambda state: state["router_role"] == RouterRole.P)

    return (
        begin
        >> store_process_subscription(Target.TERMINATE)
        >> unsync
        >> update_ibgp_mesh(router_is_p(lso_interaction(remove_p_from_all_pe_dry)))
        >> update_ibgp_mesh(router_is_p(lso_interaction(remove_p_from_all_pe_real)))
        >> update_ibgp_mesh(router_is_pe(lso_interaction(remove_pe_from_all_pe_dry)))
        >> update_ibgp_mesh(router_is_pe(lso_interaction(remove_pe_from_all_pe_real)))
        >> update_ibgp_mesh(router_is_pe(lso_interaction(remove_pe_from_all_p_dry)))
        >> update_ibgp_mesh(router_is_pe(lso_interaction(remove_pe_from_all_p_real)))
        >> deprovision_loopback_ips
        >> run_config_steps(lso_interaction(remove_config_from_router_dry))
        >> run_config_steps(lso_interaction(remove_config_from_router_real))
        >> router_is_nokia(remove_device_from_netbox)
        >> remove_device_from_librenms
        >> router_is_pe(kentik_apply_archive_license)
        >> set_status(SubscriptionLifecycle.TERMINATED)
        >> resync
        >> done
    )