You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
443 lines
16 KiB
Python
443 lines
16 KiB
Python
"""
|
|
Emit structured, discrete events when various actions happen.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import copy
|
|
import json
|
|
import logging
|
|
import typing as t
|
|
import warnings
|
|
from datetime import datetime, timezone
|
|
from importlib.metadata import version
|
|
|
|
from jsonschema import ValidationError
|
|
from packaging.version import parse
|
|
from traitlets import Dict, Instance, Set, default
|
|
from traitlets.config import Config, LoggingConfigurable
|
|
|
|
from .schema import SchemaType
|
|
from .schema_registry import SchemaRegistry
|
|
from .traits import Handlers
|
|
from .validators import JUPYTER_EVENTS_CORE_VALIDATOR
|
|
|
|
# Check if the version is greater than 3.1.0
|
|
version_info = version("python-json-logger")
|
|
if parse(version_info) >= parse("3.1.0"):
|
|
from pythonjsonlogger.json import JsonFormatter
|
|
else:
|
|
from pythonjsonlogger.jsonlogger import JsonFormatter # type: ignore[attr-defined]
|
|
|
|
# Increment this version when the metadata included with each event
|
|
# changes.
|
|
EVENTS_METADATA_VERSION = 1
|
|
|
|
|
|
class SchemaNotRegistered(Warning):
|
|
"""A warning to raise when an event is given to the logger
|
|
but its schema has not be registered with the EventLogger
|
|
"""
|
|
|
|
|
|
class ModifierError(Exception):
|
|
"""An exception to raise when a modifier does not
|
|
show the proper signature.
|
|
"""
|
|
|
|
|
|
class CoreMetadataError(Exception):
|
|
"""An exception raised when event core metadata is not valid."""
|
|
|
|
|
|
# Only show this warning on the first instance
|
|
# of each event type that fails to emit.
|
|
warnings.simplefilter("once", SchemaNotRegistered)
|
|
|
|
|
|
class ListenerError(Exception):
|
|
"""An exception to raise when a listener does not
|
|
show the proper signature.
|
|
"""
|
|
|
|
|
|
class EventLogger(LoggingConfigurable):
|
|
"""
|
|
An Event logger for emitting structured events.
|
|
|
|
Event schemas must be registered with the
|
|
EventLogger using the `register_schema` or
|
|
`register_schema_file` methods. Every schema
|
|
will be validated against Jupyter Event's metaschema.
|
|
"""
|
|
|
|
handlers = Handlers(
|
|
default_value=None,
|
|
allow_none=True,
|
|
help="""A list of logging.Handler instances to send events to.
|
|
|
|
When set to None (the default), all events are discarded.
|
|
""",
|
|
).tag(config=True)
|
|
|
|
schemas = Instance(
|
|
SchemaRegistry,
|
|
help="""The SchemaRegistry for caching validated schemas
|
|
and their jsonschema validators.
|
|
""",
|
|
)
|
|
|
|
_modifiers = Dict({}, help="A mapping of schemas to their list of modifiers.")
|
|
|
|
_modified_listeners = Dict({}, help="A mapping of schemas to the listeners of modified events.")
|
|
|
|
_unmodified_listeners = Dict(
|
|
{}, help="A mapping of schemas to the listeners of unmodified/raw events."
|
|
)
|
|
|
|
_active_listeners: set[asyncio.Task[t.Any]] = Set() # type:ignore[assignment]
|
|
|
|
async def gather_listeners(self) -> list[t.Any]:
|
|
"""Gather all of the active listeners."""
|
|
return await asyncio.gather(*self._active_listeners, return_exceptions=True)
|
|
|
|
@default("schemas")
|
|
def _default_schemas(self) -> SchemaRegistry:
|
|
return SchemaRegistry()
|
|
|
|
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
|
"""Initialize the logger."""
|
|
# We need to initialize the configurable before
|
|
# adding the logging handlers.
|
|
super().__init__(*args, **kwargs)
|
|
# Use a unique name for the logger so that multiple instances of EventLog do not write
|
|
# to each other's handlers.
|
|
log_name = __name__ + "." + str(id(self))
|
|
self._logger = logging.getLogger(log_name)
|
|
# We don't want events to show up in the default logs
|
|
self._logger.propagate = False
|
|
# We will use log.info to emit
|
|
self._logger.setLevel(logging.INFO)
|
|
# Add each handler to the logger and format the handlers.
|
|
if self.handlers:
|
|
for handler in self.handlers:
|
|
self.register_handler(handler)
|
|
|
|
def _load_config(
|
|
self,
|
|
cfg: Config,
|
|
section_names: list[str] | None = None, # noqa: ARG002
|
|
traits: list[str] | None = None, # type:ignore[override] # noqa: ARG002
|
|
) -> None:
|
|
"""Load EventLogger traits from a Config object, patching the
|
|
handlers trait in the Config object to avoid deepcopy errors.
|
|
"""
|
|
my_cfg = self._find_my_config(cfg)
|
|
handlers: list[logging.Handler] = my_cfg.pop("handlers", [])
|
|
|
|
# Turn handlers list into a pickeable function
|
|
def get_handlers() -> list[logging.Handler]:
|
|
return handlers
|
|
|
|
my_cfg["handlers"] = get_handlers
|
|
|
|
# Build a new eventlog config object.
|
|
eventlogger_cfg = Config({"EventLogger": my_cfg})
|
|
super()._load_config(eventlogger_cfg, section_names=None, traits=None)
|
|
|
|
def register_event_schema(self, schema: SchemaType) -> None:
|
|
"""Register this schema with the schema registry.
|
|
|
|
Get this registered schema using the EventLogger.schema.get() method.
|
|
"""
|
|
event_schema = self.schemas.register(schema) # type:ignore[arg-type]
|
|
key = event_schema.id
|
|
# It's possible that listeners and modifiers have been added for this
|
|
# schema before the schema is registered.
|
|
if key not in self._modifiers:
|
|
self._modifiers[key] = set()
|
|
if key not in self._modified_listeners:
|
|
self._modified_listeners[key] = set()
|
|
if key not in self._unmodified_listeners:
|
|
self._unmodified_listeners[key] = set()
|
|
|
|
def register_handler(self, handler: logging.Handler) -> None:
|
|
"""Register a new logging handler to the Event Logger.
|
|
|
|
All outgoing messages will be formatted as a JSON string.
|
|
"""
|
|
|
|
def _handle_message_field(record: t.Any, **kwargs: t.Any) -> str:
|
|
"""Python's logger always emits the "message" field with
|
|
the value as "null" unless it's present in the schema/data.
|
|
Message happens to be a common field for event logs,
|
|
so special case it here and only emit it if "message"
|
|
is found the in the schema's property list.
|
|
"""
|
|
schema = self.schemas.get(record["__schema__"])
|
|
if "message" not in schema.properties:
|
|
del record["message"]
|
|
return json.dumps(record, **kwargs)
|
|
|
|
formatter = JsonFormatter(
|
|
json_serializer=_handle_message_field,
|
|
)
|
|
handler.setFormatter(formatter)
|
|
self._logger.addHandler(handler)
|
|
if handler not in self.handlers:
|
|
self.handlers.append(handler)
|
|
|
|
def remove_handler(self, handler: logging.Handler) -> None:
|
|
"""Remove a logging handler from the logger and list of handlers."""
|
|
self._logger.removeHandler(handler)
|
|
if handler in self.handlers:
|
|
self.handlers.remove(handler)
|
|
|
|
def add_modifier(
|
|
self,
|
|
*,
|
|
schema_id: str | None = None,
|
|
modifier: t.Callable[[str, dict[str, t.Any]], dict[str, t.Any]],
|
|
) -> None:
|
|
"""Add a modifier (callable) to a registered event.
|
|
|
|
Parameters
|
|
----------
|
|
modifier: Callable
|
|
A callable function/method that executes when the named event occurs.
|
|
This method enforces a string signature for modifiers:
|
|
|
|
(schema_id: str, data: dict) -> dict:
|
|
"""
|
|
# Ensure that this is a callable function/method
|
|
if not callable(modifier):
|
|
msg = "`modifier` must be a callable" # type:ignore[unreachable]
|
|
raise TypeError(msg)
|
|
|
|
# If the schema ID and version is given, only add
|
|
# this modifier to that schema
|
|
if schema_id:
|
|
# If the schema hasn't been added yet,
|
|
# start a placeholder set.
|
|
modifiers = self._modifiers.get(schema_id, set())
|
|
modifiers.add(modifier)
|
|
self._modifiers[schema_id] = modifiers
|
|
return
|
|
for id_ in self._modifiers:
|
|
if schema_id is None or id_ == schema_id:
|
|
self._modifiers[id_].add(modifier)
|
|
|
|
def remove_modifier(
|
|
self,
|
|
*,
|
|
schema_id: str | None = None,
|
|
modifier: t.Callable[[str, dict[str, t.Any]], dict[str, t.Any]],
|
|
) -> None:
|
|
"""Remove a modifier from an event or all events.
|
|
|
|
Parameters
|
|
----------
|
|
schema_id: str
|
|
If given, remove this modifier only for a specific event type.
|
|
modifier: Callable[[str, dict], dict]
|
|
|
|
The modifier to remove.
|
|
"""
|
|
# If schema_id is given remove the modifier from this schema.
|
|
if schema_id:
|
|
self._modifiers[schema_id].discard(modifier)
|
|
# If no schema_id is given, remove the modifier from all events.
|
|
else:
|
|
for schema_id in self.schemas.schema_ids:
|
|
# Remove the modifier if it is found in the list.
|
|
self._modifiers[schema_id].discard(modifier)
|
|
self._modifiers[schema_id].discard(modifier)
|
|
|
|
def add_listener(
|
|
self,
|
|
*,
|
|
modified: bool = True,
|
|
schema_id: str | None = None,
|
|
listener: t.Callable[[EventLogger, str, dict[str, t.Any]], t.Coroutine[t.Any, t.Any, None]],
|
|
) -> None:
|
|
"""Add a listener (callable) to a registered event.
|
|
|
|
Parameters
|
|
----------
|
|
modified: bool
|
|
If True (default), listens to the data after it has been mutated/modified
|
|
by the list of modifiers.
|
|
schema_id: str
|
|
$id of the schema
|
|
listener: Callable
|
|
A callable function/method that executes when the named event occurs.
|
|
"""
|
|
if not callable(listener):
|
|
msg = "`listener` must be a callable" # type:ignore[unreachable]
|
|
raise TypeError(msg)
|
|
|
|
# If the schema ID and version is given, only add
|
|
# this modifier to that schema
|
|
if schema_id:
|
|
if modified:
|
|
# If the schema hasn't been added yet,
|
|
# start a placeholder set.
|
|
listeners = self._modified_listeners.get(schema_id, set())
|
|
listeners.add(listener)
|
|
self._modified_listeners[schema_id] = listeners
|
|
return
|
|
listeners = self._unmodified_listeners.get(schema_id, set())
|
|
listeners.add(listener)
|
|
self._unmodified_listeners[schema_id] = listeners
|
|
return
|
|
for id_ in self.schemas.schema_ids:
|
|
if schema_id is None or id_ == schema_id:
|
|
if modified:
|
|
self._modified_listeners[id_].add(listener)
|
|
else:
|
|
self._unmodified_listeners[id_].add(listener)
|
|
|
|
def remove_listener(
|
|
self,
|
|
*,
|
|
schema_id: str | None = None,
|
|
listener: t.Callable[[EventLogger, str, dict[str, t.Any]], t.Coroutine[t.Any, t.Any, None]],
|
|
) -> None:
|
|
"""Remove a listener from an event or all events.
|
|
|
|
Parameters
|
|
----------
|
|
schema_id: str
|
|
If given, remove this modifier only for a specific event type.
|
|
|
|
listener: Callable[[EventLogger, str, dict], dict]
|
|
The modifier to remove.
|
|
"""
|
|
# If schema_id is given remove the listener from this schema.
|
|
if schema_id:
|
|
self._modified_listeners[schema_id].discard(listener)
|
|
self._unmodified_listeners[schema_id].discard(listener)
|
|
# If no schema_id is given, remove the listener from all events.
|
|
else:
|
|
for schema_id in self.schemas.schema_ids:
|
|
# Remove the listener if it is found in the list.
|
|
self._modified_listeners[schema_id].discard(listener)
|
|
self._unmodified_listeners[schema_id].discard(listener)
|
|
|
|
def emit(
|
|
self, *, schema_id: str, data: dict[str, t.Any], timestamp_override: datetime | None = None
|
|
) -> dict[str, t.Any] | None:
|
|
"""
|
|
Record given event with schema has occurred.
|
|
|
|
Parameters
|
|
----------
|
|
schema_id: str
|
|
$id of the schema
|
|
data: dict
|
|
The event to record
|
|
timestamp_override: datetime, optional
|
|
Optionally override the event timestamp. By default it is set to the current timestamp.
|
|
|
|
Returns
|
|
-------
|
|
dict
|
|
The recorded event data
|
|
"""
|
|
# If no handlers are routing these events, there's no need to proceed.
|
|
if (
|
|
not self.handlers
|
|
and not self._modified_listeners.get(schema_id)
|
|
and not self._unmodified_listeners.get(schema_id)
|
|
):
|
|
return None
|
|
|
|
# If the schema hasn't been registered, raise a warning to make sure
|
|
# this was intended.
|
|
if schema_id not in self.schemas:
|
|
warnings.warn(
|
|
f"{schema_id} has not been registered yet. If "
|
|
"this was not intentional, please register the schema using the "
|
|
"`register_event_schema` method.",
|
|
SchemaNotRegistered,
|
|
stacklevel=2,
|
|
)
|
|
return None
|
|
|
|
schema = self.schemas.get(schema_id)
|
|
|
|
# Deep copy the data and modify the copy.
|
|
modified_data = copy.deepcopy(data)
|
|
for modifier in self._modifiers[schema.id]:
|
|
modified_data = modifier(schema_id=schema_id, data=modified_data)
|
|
|
|
if self._unmodified_listeners[schema.id]:
|
|
# Process this event, i.e. validate and modify (in place)
|
|
self.schemas.validate_event(schema_id, data)
|
|
|
|
# Validate the modified data.
|
|
self.schemas.validate_event(schema_id, modified_data)
|
|
|
|
# Generate the empty event capsule.
|
|
timestamp = (
|
|
datetime.now(tz=timezone.utc) if timestamp_override is None else timestamp_override
|
|
)
|
|
capsule = {
|
|
"__timestamp__": timestamp.isoformat() + "Z",
|
|
"__schema__": schema_id,
|
|
"__schema_version__": schema.version,
|
|
"__metadata_version__": EVENTS_METADATA_VERSION,
|
|
}
|
|
try:
|
|
JUPYTER_EVENTS_CORE_VALIDATOR.validate(capsule)
|
|
except ValidationError as err:
|
|
raise CoreMetadataError from err
|
|
|
|
capsule.update(modified_data)
|
|
|
|
self._logger.info(capsule)
|
|
|
|
# callback for removing from finished listeners
|
|
# from active listeners set.
|
|
def _listener_task_done(task: asyncio.Task[t.Any]) -> None:
|
|
# If an exception happens, log it to the main
|
|
# applications logger
|
|
err = task.exception()
|
|
if err:
|
|
self.log.error(err)
|
|
self._active_listeners.discard(task)
|
|
|
|
# Loop over listeners and execute them.
|
|
for listener in self._modified_listeners[schema_id]:
|
|
# Schedule this listener as a task and add
|
|
# it to the list of active listeners
|
|
task = asyncio.create_task(
|
|
listener(
|
|
logger=self,
|
|
schema_id=schema_id,
|
|
data=modified_data,
|
|
)
|
|
)
|
|
self._active_listeners.add(task)
|
|
|
|
# Adds the task and cleans it up later if needed.
|
|
task.add_done_callback(_listener_task_done)
|
|
|
|
for listener in self._unmodified_listeners[schema_id]:
|
|
task = asyncio.create_task(listener(logger=self, schema_id=schema_id, data=data))
|
|
self._active_listeners.add(task)
|
|
|
|
# Remove task from active listeners once its finished.
|
|
def _listener_task_done(task: asyncio.Task[t.Any]) -> None:
|
|
# If an exception happens, log it to the main
|
|
# applications logger
|
|
err = task.exception()
|
|
if err:
|
|
self.log.error(err)
|
|
self._active_listeners.discard(task)
|
|
|
|
# Adds the task and cleans it up later if needed.
|
|
task.add_done_callback(_listener_task_done)
|
|
|
|
return capsule
|