Source code for getml.pipeline.metrics

# Copyright 2021 The SQLNet Company GmbH

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

"""
Custom class for handling the metrics of a pipeline.
"""

import json

import numpy as np

import getml.communication as comm


[docs]class Metrics: """ Custom class for handling the metrics generated by the pipeline. Example: .. code-block:: python recall, precision = my_pipeline.metrics.precision_recall_curve() fpr, tpr = my_pipeline.metrics.roc_curve() """ # ---------------------------------------------------------------- def __init__(self, name): if not isinstance(name, str): raise ValueError("'name' must be a str.") self.name = name # ------------------------------------------------------------
[docs] def lift_curve(self, target_num=0): """ Returns the data for the lift curve, as displayed in the getML monitor. This requires that you call :meth:`~getml.pipeline.Pipeline.score` first. The data used for the curve will always be the data from the *last* time you called :meth:`~getml.pipeline.Pipeline.score`. Args: target_num (int): Indicates for which target you want to plot the lift curve. (Pipelines can have more than one target.) Return: (:class:`numpy.ndarray`, :class:`numpy.ndarray`): - The first array is the proportion of samples, usually displayed on the x-axis. - The second array is the lift, usually displayed on the y-axis. """ # ------------------------------------------------------------ cmd = dict() cmd["type_"] = "Pipeline.lift_curve" cmd["name_"] = self.name cmd["target_num_"] = target_num # ------------------------------------------------------------ sock = comm.send_and_receive_socket(cmd) msg = comm.recv_string(sock) if msg != "Success!": comm.engine_exception_handler(msg) # ------------------------------------------------------------ msg = comm.recv_string(sock) json_obj = json.loads(msg) # ------------------------------------------------------------ return (np.asarray(json_obj["proportion_"]), np.asarray(json_obj["lift_"]))
# ------------------------------------------------------------
[docs] def precision_recall_curve(self, target_num=0): """ Returns the data for the precision-recall curve, as displayed in the getML monitor. This requires that you call :meth:`~getml.pipeline.Pipeline.score` first. The data used for the curve will always be the data from the *last* time you called :meth:`~getml.pipeline.Pipeline.score`. Args: target_num (int): Indicates for which target you want to plot the lift curve. (Pipelines can have more than one target.) Return: (:class:`numpy.ndarray`, :class:`numpy.ndarray`): - The first array is the recall (a.k.a. true postive rate), usually displayed on the x-axis. - The second array is the precision, usually displayed on the y-axis. """ # ------------------------------------------------------------ cmd = dict() cmd["type_"] = "Pipeline.precision_recall_curve" cmd["name_"] = self.name cmd["target_num_"] = target_num # ------------------------------------------------------------ sock = comm.send_and_receive_socket(cmd) msg = comm.recv_string(sock) if msg != "Success!": comm.engine_exception_handler(msg) # ------------------------------------------------------------ msg = comm.recv_string(sock) json_obj = json.loads(msg) # ------------------------------------------------------------ return (np.asarray(json_obj["tpr_"]), np.asarray(json_obj["precision_"]))
# ------------------------------------------------------------
[docs] def roc_curve(self, target_num=0): """ Returns the data for the ROC curve, as displayed in the getML monitor. This requires that you call :meth:`~getml.pipeline.Pipeline.score` first. The data used for the curve will always be the data from the *last* time you called :meth:`~getml.pipeline.Pipeline.score`. Args: target_num (int): Indicates for which target you want to plot the lift curve. (Pipelines can have more than one target.) Return: (:class:`numpy.ndarray`, :class:`numpy.ndarray`): - The first array is the false positive rate, usually displayed on the x-axis. - The second array is the true positive rate, usually displayed on the y-axis. """ # ------------------------------------------------------------ cmd = dict() cmd["type_"] = "Pipeline.roc_curve" cmd["name_"] = self.name cmd["target_num_"] = target_num # ------------------------------------------------------------ sock = comm.send_and_receive_socket(cmd) msg = comm.recv_string(sock) if msg != "Success!": comm.engine_exception_handler(msg) # ------------------------------------------------------------ msg = comm.recv_string(sock) json_obj = json.loads(msg) # ------------------------------------------------------------ return (np.asarray(json_obj["fpr_"]), np.asarray(json_obj["tpr_"]))
# ----------------------------------------------------------------