Source code for getml.pipeline.metrics

# Copyright 2021 The SQLNet Company GmbH

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

"""
Custom class for handling the metrics of a pipeline.
"""

import json

import numpy as np

import getml.communication as comm


[docs]class Metrics:
    """
    Custom class for handling the
    metrics generated by the pipeline.

    Example:

        .. code-block:: python

            recall, precision = my_pipeline.metrics.precision_recall_curve()

            fpr, tpr = my_pipeline.metrics.roc_curve()
    """

    # ----------------------------------------------------------------

    def __init__(self, name):

        if not isinstance(name, str):
            raise ValueError("'name' must be a str.")

        self.name = name

    # ------------------------------------------------------------

[docs]    def lift_curve(self, target_num=0):
        """
        Returns the data for the lift curve, as displayed in the getML monitor.

        This requires that you call
        :meth:`~getml.pipeline.Pipeline.score` first. The data used
        for the curve will always be the data from the *last* time
        you called :meth:`~getml.pipeline.Pipeline.score`.

        Args:
            target_num (int):
                Indicates for which target you want to plot the lift
                curve. (Pipelines can have more than one target.)

        Return:
            (:class:`numpy.ndarray`, :class:`numpy.ndarray`):
                - The first array is the proportion of samples, usually
                  displayed on the x-axis.
                - The second array is the lift, usually
                  displayed on the y-axis.
        """
        # ------------------------------------------------------------

        cmd = dict()

        cmd["type_"] = "Pipeline.lift_curve"
        cmd["name_"] = self.name

        cmd["target_num_"] = target_num

        # ------------------------------------------------------------

        sock = comm.send_and_receive_socket(cmd)

        msg = comm.recv_string(sock)

        if msg != "Success!":
            comm.engine_exception_handler(msg)

        # ------------------------------------------------------------

        msg = comm.recv_string(sock)

        json_obj = json.loads(msg)

        # ------------------------------------------------------------

        return (np.asarray(json_obj["proportion_"]), np.asarray(json_obj["lift_"]))

    # ------------------------------------------------------------

[docs]    def precision_recall_curve(self, target_num=0):
        """
        Returns the data for the precision-recall curve, as displayed in the getML monitor.

        This requires that you call
        :meth:`~getml.pipeline.Pipeline.score` first. The data used
        for the curve will always be the data from the *last* time
        you called :meth:`~getml.pipeline.Pipeline.score`.

        Args:
            target_num (int):
                Indicates for which target you want to plot the lift
                curve. (Pipelines can have more than one target.)

        Return:
            (:class:`numpy.ndarray`, :class:`numpy.ndarray`):
                - The first array is the recall (a.k.a. true postive rate),
                  usually displayed on the x-axis.
                - The second array is the precision, usually
                  displayed on the y-axis.
        """
        # ------------------------------------------------------------

        cmd = dict()

        cmd["type_"] = "Pipeline.precision_recall_curve"
        cmd["name_"] = self.name

        cmd["target_num_"] = target_num

        # ------------------------------------------------------------

        sock = comm.send_and_receive_socket(cmd)

        msg = comm.recv_string(sock)

        if msg != "Success!":
            comm.engine_exception_handler(msg)

        # ------------------------------------------------------------

        msg = comm.recv_string(sock)

        json_obj = json.loads(msg)

        # ------------------------------------------------------------

        return (np.asarray(json_obj["tpr_"]), np.asarray(json_obj["precision_"]))

    # ------------------------------------------------------------

[docs]    def roc_curve(self, target_num=0):
        """
        Returns the data for the ROC curve, as displayed in the getML monitor.

        This requires that you call
        :meth:`~getml.pipeline.Pipeline.score` first. The data used
        for the curve will always be the data from the *last* time
        you called :meth:`~getml.pipeline.Pipeline.score`.

        Args:
            target_num (int):
                Indicates for which target you want to plot the lift
                curve. (Pipelines can have more than one target.)

        Return:
            (:class:`numpy.ndarray`, :class:`numpy.ndarray`):
                - The first array is the false positive rate, usually
                  displayed on the x-axis.
                - The second array is the true positive rate, usually
                  displayed on the y-axis.
        """
        # ------------------------------------------------------------

        cmd = dict()

        cmd["type_"] = "Pipeline.roc_curve"
        cmd["name_"] = self.name

        cmd["target_num_"] = target_num

        # ------------------------------------------------------------

        sock = comm.send_and_receive_socket(cmd)

        msg = comm.recv_string(sock)

        if msg != "Success!":
            comm.engine_exception_handler(msg)

        # ------------------------------------------------------------

        msg = comm.recv_string(sock)

        json_obj = json.loads(msg)

        # ------------------------------------------------------------

        return (np.asarray(json_obj["fpr_"]), np.asarray(json_obj["tpr_"]))

    # ----------------------------------------------------------------