Source code for getml.pipeline.scores_container

# Copyright 2022 The SQLNet Company GmbH
#
# This file is licensed under the Elastic License 2.0 (ELv2).
# Refer to the LICENSE.txt file in the root of the repository
# for details.
#


"""
A container for storing a pipeline's scoring history.
"""

from __future__ import annotations

from typing import Callable, Dict, List, Sequence, Union

from getml.utilities.formatting import _Formatter

from .helpers import _unlist_maybe
from .metrics import _all_metrics, accuracy, auc, cross_entropy, mae, rmse, rsquared
from .score import ClassificationScore, Score


[docs]class Scores: """ Container which holds the history of all scores associated with a given pipeline. The container supports slicing and is sort- and filterable. """ # ---------------------------------------------------------------- def __init__(self, data: Sequence[Score], latest: Dict[str, List[float]]) -> None: self._latest = latest self.is_classification = all( isinstance(score, ClassificationScore) for score in data ) self.is_regression = not self.is_classification self.data = data self.sets_used = [score.set_used for score in data] # ---------------------------------------------------------------- def __getitem__(self, key: Union[int, slice, str]): if isinstance(key, int): return self.data[key] if isinstance(key, slice): scores_subset = self.data[key] return Scores(scores_subset, self._latest) if isinstance(key, str): # allow to access latest scores via their name for backward compatiblilty if key in _all_metrics: return self._latest[key] scores_subset = [score for score in self.data if score.set_used == key] return Scores(scores_subset, self._latest) raise TypeError( f"Scores can only be indexed by: int, slices, or str, not {type(key).__name__}" ) # ---------------------------------------------------------------- def __iter__(self): yield from self.data # ---------------------------------------------------------------- def __len__(self) -> int: return len(self.data) # ------------------------------------------------------------ def __repr__(self) -> str: return self._format()._render_string() # ------------------------------------------------------------ def _repr_html_(self) -> str: return self._format()._render_html() # ------------------------------------------------------------ def _format(self) -> _Formatter: headers = ["date time", "set used", "target"] if self.is_classification: headers += ["accuracy", "auc", "cross entropy"] if self.is_regression: headers += ["mae", "rmse", "rsquared"] rows = [list(vars(score).values()) for score in self.data] return _Formatter([headers], rows) # ---------------------------------------------------------------- @property def accuracy(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `accuracy` from the latest scoring run. """ return _unlist_maybe(self._latest[accuracy]) # ---------------------------------------------------------------- @property def auc(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `auc` from the latest scoring run. """ return _unlist_maybe(self._latest[auc]) # ---------------------------------------------------------------- @property def cross_entropy(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `cross entropy` from the latest scoring run. """ return _unlist_maybe(self._latest[cross_entropy]) # ----------------------------------------------------------------
[docs] def filter(self, conditional: Callable[[Score], bool]) -> Scores: """ Filters the scores container. Args: conditional (callable): A callable that evaluates to a boolean for a given item. Returns: :class:`getml.pipeline.Scores`: A container of filtered scores. Example: .. code-block:: python from datetime import datetime, timedelta one_week_ago = datetime.today() - timedelta(days=7) scores_last_week = pipe.scores.filter(lambda score: score.date_time >= one_week_ago) """ scores_filtered = [score for score in self.data if conditional(score)] return Scores(scores_filtered, self._latest)
# ---------------------------------------------------------------- @property def mae(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `mae` from the latest scoring run. """ return _unlist_maybe(self._latest[mae]) # ---------------------------------------------------------------- @property def rmse(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `rmse` from the latest scoring run. """ return _unlist_maybe(self._latest[rmse]) # ---------------------------------------------------------------- @property def rsquared(self) -> Union[float, List[float]]: """ A convenience wrapper to retrieve the `rsquared` from the latest scoring run. """ return _unlist_maybe(self._latest[rsquared]) # ----------------------------------------------------------------
[docs] def sort( self, key: Callable[[Score], Union[float, int, str]], descending: bool = False ) -> Scores: """ Sorts the scores container. Args: key (callable, optional): A callable that evaluates to a sort key for a given item. descending (bool, optional): Whether to sort in descending order. Return: :class:`getml.pipeline.Scores`: A container of sorted scores. Example: .. code-block:: python by_auc = pipe.scores.sort(key=lambda score: score.auc) most_recent_first = pipe.scores.sort(key=lambda score: score.date_time, descending=True) """ scores_sorted = sorted(self.data, key=key, reverse=descending) return Scores(scores_sorted, self._latest)