You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

163 lines
5.6 KiB
Python

2 years ago
"""A basic kernel monitor with autorestarting.
This watches a kernel's state using KernelManager.is_alive and auto
restarts the kernel if it dies.
It is an incomplete base class, and must be subclassed.
"""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import time
from traitlets import Bool
from traitlets import default
from traitlets import Dict
from traitlets import Float
from traitlets import Instance
from traitlets import Integer
from traitlets.config.configurable import LoggingConfigurable
class KernelRestarter(LoggingConfigurable):
"""Monitor and autorestart a kernel."""
kernel_manager = Instance("jupyter_client.KernelManager")
debug = Bool(
False,
config=True,
help="""Whether to include every poll event in debugging output.
Has to be set explicitly, because there will be *a lot* of output.
""",
)
time_to_dead = Float(3.0, config=True, help="""Kernel heartbeat interval in seconds.""")
stable_start_time = Float(
10.0,
config=True,
help="""The time in seconds to consider the kernel to have completed a stable start up.""",
)
restart_limit = Integer(
5,
config=True,
help="""The number of consecutive autorestarts before the kernel is presumed dead.""",
)
random_ports_until_alive = Bool(
True,
config=True,
help="""Whether to choose new random ports when restarting before the kernel is alive.""",
)
_restarting = Bool(False)
_restart_count = Integer(0)
_initial_startup = Bool(True)
_last_dead = Float()
@default("_last_dead")
def _default_last_dead(self):
return time.time()
callbacks = Dict()
def _callbacks_default(self):
return dict(restart=[], dead=[])
def start(self):
"""Start the polling of the kernel."""
raise NotImplementedError("Must be implemented in a subclass")
def stop(self):
"""Stop the kernel polling."""
raise NotImplementedError("Must be implemented in a subclass")
def add_callback(self, f, event="restart"):
"""register a callback to fire on a particular event
Possible values for event:
'restart' (default): kernel has died, and will be restarted.
'dead': restart has failed, kernel will be left dead.
"""
self.callbacks[event].append(f)
def remove_callback(self, f, event="restart"):
"""unregister a callback to fire on a particular event
Possible values for event:
'restart' (default): kernel has died, and will be restarted.
'dead': restart has failed, kernel will be left dead.
"""
try:
self.callbacks[event].remove(f)
except ValueError:
pass
def _fire_callbacks(self, event):
"""fire our callbacks for a particular event"""
for callback in self.callbacks[event]:
try:
callback()
except Exception:
self.log.error(
"KernelRestarter: %s callback %r failed",
event,
callback,
exc_info=True,
)
def poll(self):
if self.debug:
self.log.debug("Polling kernel...")
if self.kernel_manager.shutting_down:
self.log.debug("Kernel shutdown in progress...")
return
now = time.time()
if not self.kernel_manager.is_alive():
self._last_dead = now
if self._restarting:
self._restart_count += 1
else:
self._restart_count = 1
if self._restart_count > self.restart_limit:
self.log.warning("KernelRestarter: restart failed")
self._fire_callbacks("dead")
self._restarting = False
self._restart_count = 0
self.stop()
else:
newports = self.random_ports_until_alive and self._initial_startup
self.log.info(
"KernelRestarter: restarting kernel (%i/%i), %s random ports",
self._restart_count,
self.restart_limit,
"new" if newports else "keep",
)
self._fire_callbacks("restart")
self.kernel_manager.restart_kernel(now=True, newports=newports)
self._restarting = True
else:
# Since `is_alive` only tests that the kernel process is alive, it does not
# indicate that the kernel has successfully completed startup. To solve this
# correctly, we would need to wait for a kernel info reply, but it is not
# necessarily appropriate to start a kernel client + channels in the
# restarter. Therefore, we use "has been alive continuously for X time" as a
# heuristic for a stable start up.
# See https://github.com/jupyter/jupyter_client/pull/717 for details.
stable_start_time = self.stable_start_time
if self.kernel_manager.provisioner:
stable_start_time = self.kernel_manager.provisioner.get_stable_start_time(
recommended=stable_start_time
)
if self._initial_startup and now - self._last_dead >= stable_start_time:
self._initial_startup = False
if self._restarting and now - self._last_dead >= stable_start_time:
self.log.debug("KernelRestarter: restart apparently succeeded")
self._restarting = False