Source code for

# Copyright 2021 The SQLNet Company GmbH

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.


"""Contains the actual columns."""

import numbers

import numpy as np

import getml.communication as comm
import getml.constants as constants

from .aggregation import Aggregation
from .collect_footer_data import _collect_footer_data
from .column import _Column
from .constants import (
from .format import _format
from .get_scalar import _get_scalar
from .last_change_from_col import _last_change_from_col
from .length import _length
from .length_property import _length_property
from .repr import _repr
from .repr_html import _repr_html
from .subroles import _subroles
from .to_numpy import _to_numpy
from .to_numpy_categorical import _to_numpy_categorical
from .unit import _unit
from .unique import _unique
from .unique_categorical import _unique_categorical

# -----------------------------------------------------------------------------

[docs]def arange(start=0, stop=None, step=1): """ Returns evenly spaced variables, within a given interval. Args: start (float, optional): The beginning of the interval. Defaults to 0. stop (float): The end of the interval. step (float, optional): The step taken. Defaults to 1. """ if stop is None: stop = start start = 0 if step is None: step = 1 if not isinstance(start, numbers.Real): raise TypeError("'start' must be a real number") if not isinstance(stop, numbers.Real): raise TypeError("'stop' must be a real number") if not isinstance(step, numbers.Real): raise TypeError("'step' must be a real number") col = FloatColumnView( operator="arange", operand1=None, operand2=None, ) col.cmd["start_"] = float(start) col.cmd["stop_"] = float(stop) col.cmd["step_"] = float(step) return col
# -----------------------------------------------------------------------------
[docs]def rowid(): """ Get the row numbers of the table. Returns: :class:``: (numerical) column containing the row id, starting with 0 """ return FloatColumnView(operator="rowid", operand1=None, operand2=None)
# ----------------------------------------------------------------------------- def _make_slicing_operand(column, slice): if not slice.stop: step = slice.step or 1 if isinstance(column.length, str): return (rowid() > slice.start) & ((rowid() - slice.start) % step == 0) else: return arange(slice.start or 0, column.length, slice.step) return arange(slice.start or 0, slice.stop, slice.step) # ----------------------------------------------------------------------------- def _value_to_cmd(val): cmd = dict() cmd["operator_"] = "const" cmd["value_"] = val if isinstance(val, bool): cmd["type_"] = BOOLEAN_COLUMN_VIEW return cmd if isinstance(val, str): cmd["type_"] = STRING_COLUMN_VIEW return cmd if isinstance(val, numbers.Number): cmd["type_"] = FLOAT_COLUMN_VIEW cmd["value_"] = float(val) return cmd if isinstance(val, np.datetime64): return _value_to_cmd(np.datetime64(val).astype("datetime64[s]").astype(float)) assert False, "Calling _value_to_cmd on an unknown type" return None # -----------------------------------------------------------------------------
[docs]class BooleanColumnView: """ Handle for a lazily evaluated boolean column view. Column views do not actually exist - they will be lazily evaluated when necessary. They can be used to take subselection of the data frame or to update other columns. Example: .. code-block:: python import numpy as np import as data import getml.engine as engine import as roles # ---------------- engine.set_project("examples") # ---------------- # Create a data frame from a JSON string json_str = \"\"\"{ "names": ["patrick", "alex", "phil", "ulrike"], "column_01": [2.4, 3.0, 1.2, 1.4], "join_key": ["0", "1", "2", "3"], "time_stamp": ["2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04"] }\"\"\" my_df = data.DataFrame( "MY DF", roles={ "unused_string": ["names", "join_key", "time_stamp"], "unused_float": ["column_01"]} ).read_json( json_str ) # ---------------- names = my_df["names"] # This is a virtual boolean column. a_or_p_in_names = names.contains("p") | names.contains("a") # Creates a view containing # only those entries, where "names" contains a or p. my_view = my_df[a_or_p_in_names] # ---------------- # Returns a new column, where all names # containing "rick" are replaced by "Patrick". # Again, columns are immutable - this returns an updated # version, but leaves the original column unchanged. new_names = names.update(names.contains("rick"), "Patrick") my_df["new_names"] = new_names # ---------------- # Boolean columns can also be used to # create binary target variables. target = (names == "phil") my_df["target"] = target my_df.set_role(target, # By the way, instead of using the # __setitem__ operator and .set_role(...) # you can just use .add(...). my_df.add(target, "target", """ def __init__(self, operator, operand1, operand2): self.cmd = dict() self.cmd["type_"] = BOOLEAN_COLUMN_VIEW self.cmd["operator_"] = operator if operand1 is not None: self.cmd["operand1_"] = self._parse_operand(operand1) if operand2 is not None: self.cmd["operand2_"] = self._parse_operand(operand2) # ----------------------------------------------------------------------------- def __and__(self, other): return BooleanColumnView( operator="and", operand1=self, operand2=other, ) # ----------------------------------------------------------------------------- def __eq__(self, other): return BooleanColumnView( operator="equal_to", operand1=self, operand2=other, ) # ----------------------------------------------------------------------------- def __invert__(self): return self.is_false() # ----------------------------------------------------------------------------- def __or__(self, other): return BooleanColumnView( operator="or", operand1=self, operand2=other, ) # ----------------------------------------------------------------------------- def __ne__(self, other): return BooleanColumnView( operator="not_equal_to", operand1=self, operand2=other, ) # ----------------------------------------------------------------------------- def __xor__(self, other): return BooleanColumnView( operator="xor", operand1=self, operand2=other, ) # ----------------------------------------------------------------------------- def _parse_operand(self, operand): if isinstance(operand, (bool, str, numbers.Number, np.datetime64)): return _value_to_cmd(operand) if not hasattr(operand, "cmd"): raise TypeError( """Operand for a BooleanColumnView must be a boolean, string, a number, a numpy.datetime64 or a!""" ) if self.cmd["operator_"] in ["and", "or", "not", "xor"]: if operand.cmd["type_"] != BOOLEAN_COLUMN_VIEW: raise TypeError("This operator can only be applied to a BooleanColumn!") return operand.cmd # -----------------------------------------------------------------------------
[docs] def to_numpy(self): """ Transform column to numpy array """ # ------------------------------------------- # Build command string cmd = dict() cmd["name_"] = "" cmd["type_"] = "BooleanColumn.get" cmd["col_"] = self.cmd # ------------------------------------------- # Send command to engine sock = comm.send_and_get_socket(cmd) msg = comm.recv_string(sock) # ------------------------------------------- # Make sure everything went well, receive data # and close connection if msg != "Found!": sock.close() comm.engine_exception_handler(msg) mat = comm.recv_boolean_column(sock) # ------------------------------------------- # Close connection, if necessary. sock.close() # ------------------------------------------- return mat.ravel()
# -----------------------------------------------------------------------------
[docs] def is_false(self): """Whether an entry is False - effectively inverts the Boolean column.""" return BooleanColumnView( operator="not", operand1=self, operand2=None, )
# -----------------------------------------------------------------------------
[docs] def as_num(self): """Transforms the boolean column into a numerical column""" return FloatColumnView( operator="boolean_as_num", operand1=self, operand2=None, )
# -----------------------------------------------------------------------------
[docs]class StringColumn(_Column): """Handle for categorical data that is kept in the getML engine Args: name (str, optional): Name of the categorical column. role (str, optional): Role that the column plays. df_name (str, optional): ``name`` instance variable of the :class:`~getml.DataFrame` containing this column. Examples: .. code-block:: python import numpy as np import as data import getml.engine as engine import as roles # ---------------- engine.set_project("examples") # ---------------- # Create a data frame from a JSON string json_str = \"\"\"{ "names": ["patrick", "alex", "phil", "ulrike"], "column_01": [2.4, 3.0, 1.2, 1.4], "join_key": ["0", "1", "2", "3"], "time_stamp": ["2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04"] }\"\"\" my_df = data.DataFrame( "MY DF", roles={ "unused_string": ["names", "join_key", "time_stamp"], "unused_float": ["column_01"]} ).read_json( json_str ) # ---------------- col1 = my_df["names"] # ---------------- col2 = col1.substr(4, 3) my_df.add(col2, "short_names", roles.categorical) # ---------------- # If you do not explicitly set a role, # the assigned role will either be # roles.unused_string. col3 = "user-" + col1 + "-" + col2 my_df["new_names"] = col3 my_df.set_role("new_names", roles.categorical) """ _num_columns = 0 def __init__(self, name="", role="categorical", df_name=""): super().__init__() StringColumn._num_columns += 1 if name == "": name = STRING_COLUMN + " " + str(StringColumn._num_columns) self.cmd = dict() self.cmd["df_name_"] = df_name self.cmd["name_"] = name self.cmd["role_"] = role self.cmd["type_"] = STRING_COLUMN
# -----------------------------------------------------------------------------
[docs]class StringColumnView: """ Lazily evaluated view on a :class:``. Columns views do not actually exist - they will be lazily evaluated when necessary. Examples: .. code-block:: python import numpy as np import as data import getml.engine as engine import as roles # ---------------- engine.set_project("examples") # ---------------- # Create a data frame from a JSON string json_str = \"\"\"{ "names": ["patrick", "alex", "phil", "ulrike"], "column_01": [2.4, 3.0, 1.2, 1.4], "join_key": ["0", "1", "2", "3"], "time_stamp": ["2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04"] }\"\"\" my_df = data.DataFrame( "MY DF", roles={ "unused_string": ["names", "join_key", "time_stamp"], "unused_float": ["column_01"]} ).read_json( json_str ) # ---------------- col1 = my_df["names"] # ---------------- # col2 is a virtual column. # The substring operation is not # executed yet. col2 = col1.substr(4, 3) # This is where the engine executes # the substring operation. my_df.add(col2, "short_names", roles.categorical) # ---------------- # If you do not explicitly set a role, # the assigned role will either be # roles.unused_string. # col3 is a virtual column. # The operation is not # executed yet. col3 = "user-" + col1 + "-" + col2 # This is where the operation is # is executed. my_df["new_names"] = col3 my_df.set_role("new_names", roles.categorical) """ def __init__(self, operator, operand1, operand2): self.cmd = dict() self.cmd["type_"] = STRING_COLUMN_VIEW self.cmd["operator_"] = operator if operand1 is not None: self.cmd["operand1_"] = self._parse_operand(operand1) if operand2 is not None: self.cmd["operand2_"] = self._parse_operand(operand2) # ----------------------------------------------------------------------------- def _parse_operand(self, operand): if isinstance(operand, str): return _value_to_cmd(operand) if not hasattr(operand, "cmd"): raise TypeError( """Operand for a StringColumnView must be a string or a column!""" ) oper = self.cmd["operator_"] optype = operand.cmd["type_"] if oper == "as_str": wrong_coltype = optype not in [ FLOAT_COLUMN, FLOAT_COLUMN_VIEW, BOOLEAN_COLUMN_VIEW, ] if wrong_coltype: raise TypeError( "This operator can only be applied to a FloatColumn or a BooleanColumn!" ) elif oper == "subselection": wrong_coltype = optype not in [ STRING_COLUMN, STRING_COLUMN_VIEW, FLOAT_COLUMN, FLOAT_COLUMN_VIEW, BOOLEAN_COLUMN_VIEW, ] if wrong_coltype: raise TypeError( "Columns or Views can only be subset by StringColumn) or a BooleanColumn!" ) else: wrong_coltype = optype not in [STRING_COLUMN, STRING_COLUMN_VIEW] if wrong_coltype: raise TypeError("This operator can only be applied to a StringColumn!") return operand.cmd
# -----------------------------------------------------------------------------
[docs]class FloatColumn(_Column): """Handle for numerical data in the engine. This is a handler for all numerical data in the getML engine, including time stamps. Args: name (str, optional): Name of the categorical column. role (str, optional): Role that the column plays. df_name (str, optional): ``name`` instance variable of the :class:`~getml.DataFrame` containing this column. Examples: .. code-block:: python import numpy as np import as data import getml.engine as engine import as roles # ---------------- engine.set_project("examples") # ---------------- # Create a data frame from a JSON string json_str = \"\"\"{ "names": ["patrick", "alex", "phil", "ulrike"], "column_01": [2.4, 3.0, 1.2, 1.4], "join_key": ["0", "1", "2", "3"], "time_stamp": ["2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04"] }\"\"\" my_df = data.DataFrame( "MY DF", roles={ "unused_string": ["names", "join_key", "time_stamp"], "unused_float": ["column_01"]} ).read_json( json_str ) # ---------------- col1 = my_df["column_01"] # ---------------- col2 = 2.0 - col1 my_df.add(col2, "name", roles.numerical) # ---------------- # If you do not explicitly set a role, # the assigned role will either be # roles.unused_float. col3 = (col1 + 2.0*col2) / 3.0 my_df["column_03"] = col3 my_df.set_role("column_03", roles.numerical) """ _num_columns = 0 def __init__(self, name="", role="numerical", df_name=""): super().__init__() FloatColumn._num_columns += 1 if name == "": name = FLOAT_COLUMN + " " + str(FloatColumn._num_columns) self.cmd = dict() self.cmd["df_name_"] = df_name self.cmd["name_"] = name self.cmd["role_"] = role self.cmd["type_"] = FLOAT_COLUMN
# -----------------------------------------------------------------------------
[docs]class FloatColumnView: """ Lazily evaluated view on a :class:``. Column views do not actually exist - they will be lazily evaluated when necessary. """ def __init__(self, operator, operand1, operand2): self.cmd = dict() self.cmd["type_"] = FLOAT_COLUMN_VIEW self.cmd["operator_"] = operator if operand1 is not None: self.cmd["operand1_"] = self._parse_operand(operand1) if operand2 is not None: self.cmd["operand2_"] = self._parse_operand(operand2) # ----------------------------------------------------------------------------- def _parse_operand(self, operand): if isinstance(operand, (numbers.Number, np.datetime64)): return _value_to_cmd(operand) if not hasattr(operand, "cmd"): raise TypeError( """Operand for a FloatColumnView must be a number or a column!""" ) special_ops = ["as_num", "as_ts", "boolean_as_num", "subselection"] oper = self.cmd["operator_"] optype = operand.cmd["type_"] if oper not in special_ops: wrong_coltype = optype not in [FLOAT_COLUMN, FLOAT_COLUMN_VIEW] if wrong_coltype: raise TypeError("This operator can only be applied to a FloatColumn!") if oper in special_ops and oper != "boolean_as_num" and oper != "subselection": wrong_coltype = optype not in [STRING_COLUMN, STRING_COLUMN_VIEW] if wrong_coltype: raise TypeError("This operator can only be applied to a StringColumn!") if oper == "boolean_as_num" and optype != BOOLEAN_COLUMN_VIEW: raise TypeError("This operator can only be applied to a BooleanColumn!") if oper == "subselection": wrong_coltype = optype not in [ STRING_COLUMN, STRING_COLUMN_VIEW, BOOLEAN_COLUMN_VIEW, FLOAT_COLUMN, FLOAT_COLUMN_VIEW, ] if wrong_coltype: raise TypeError( "The subselection operator can only be applied to FloatColumn!" ) return operand.cmd
# ----------------------------------------------------------------------------- def _abs(self): """Compute absolute value.""" return FloatColumnView(operator="abs", operand1=self, operand2=None) FloatColumn.abs = _abs # type: ignore FloatColumnView.abs = _abs # type: ignore # ----------------------------------------------------------------------------- def _acos(self): """Compute arc cosine.""" return FloatColumnView(operator="acos", operand1=self, operand2=None) FloatColumn.acos = _acos # type: ignore FloatColumnView.acos = _acos # type: ignore # ----------------------------------------------------------------------------- def _add(self, other): if isinstance(other, (StringColumn, StringColumnView, str)): return self.as_str() + other return FloatColumnView(operator="plus", operand1=self, operand2=other) def _radd(self, other): if isinstance(other, (StringColumn, StringColumnView, str)): return other + self.as_str() return FloatColumnView(operator="plus", operand1=other, operand2=self) FloatColumn.__add__ = _add # type: ignore FloatColumn.__radd__ = _radd # type: ignore FloatColumnView.__add__ = _add # type: ignore FloatColumnView.__radd__ = _radd # type: ignore # ----------------------------------------------------------------------------- def _assert_equal(self, alias="new_column"): """ ASSERT EQUAL aggregation. Throws an exception unless all values inserted into the aggregation are equal. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "assert_equal").get() FloatColumn.assert_equal = _assert_equal # type: ignore FloatColumnView.assert_equal = _assert_equal # type: ignore # ----------------------------------------------------------------------------- def _asin(self): """Compute arc sine.""" return FloatColumnView(operator="asin", operand1=self, operand2=None) FloatColumn.asin = _asin # type: ignore FloatColumnView.asin = _asin # type: ignore # ----------------------------------------------------------------------------- def _atan(self): """Compute arc tangent.""" return FloatColumnView(operator="atan", operand1=self, operand2=None) FloatColumn.atan = _atan # type: ignore FloatColumnView.atan = _atan # type: ignore # ----------------------------------------------------------------------------- def _avg(self, alias="new_column"): """ AVG aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "avg").get() FloatColumn.avg = _avg # type: ignore FloatColumnView.avg = _avg # type: ignore # ----------------------------------------------------------------------------- def _cbrt(self): """Compute cube root.""" return FloatColumnView(operator="cbrt", operand1=self, operand2=None) FloatColumn.cbrt = _cbrt # type: ignore FloatColumnView.cbrt = _cbrt # type: ignore # ----------------------------------------------------------------------------- def _ceil(self): """Round up value.""" return FloatColumnView(operator="ceil", operand1=self, operand2=None) FloatColumn.ceil = _ceil # type: ignore FloatColumnView.ceil = _ceil # type: ignore # ----------------------------------------------------------------------------- FloatColumnView._collect_footer_data = _collect_footer_data # type: ignore BooleanColumnView._collect_footer_data = _collect_footer_data # type: ignore StringColumnView._collect_footer_data = _collect_footer_data # type: ignore FloatColumn._collect_footer_data = _collect_footer_data # type: ignore StringColumn._collect_footer_data = _collect_footer_data # type: ignore # ----------------------------------------------------------------------------- def _to_str(col): if isinstance(col, (StringColumn, StringColumnView)): return col if isinstance(col, (FloatColumn, FloatColumnView, StringColumnView)): return col.as_str() return str(col) def _concat(self, other): return StringColumnView( operator="concat", operand1=self, operand2=_to_str(other), ) def _rconcat(self, other): return StringColumnView( operator="concat", operand1=_to_str(other), operand2=self, ) StringColumn.__add__ = _concat # type: ignore StringColumn.__radd__ = _rconcat # type: ignore StringColumnView.__add__ = _concat # type: ignore StringColumnView.__radd__ = _rconcat # type: ignore # ----------------------------------------------------------------------------- def _contains(self, other): """ Returns a boolean column indicating whether a string or column entry is contained in the corresponding entry of the other column. """ return BooleanColumnView( operator="contains", operand1=self, operand2=other, ) StringColumn.contains = _contains # type: ignore StringColumnView.contains = _contains # type: ignore # ----------------------------------------------------------------------------- def _cos(self): """Compute cosine.""" return FloatColumnView(operator="cos", operand1=self, operand2=None) FloatColumn.cos = _cos # type: ignore FloatColumnView.cos = _cos # type: ignore # ----------------------------------------------------------------------------- def _count(self, alias="new_column"): """ COUNT aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "count").get() FloatColumn.count = _count # type: ignore FloatColumnView.count = _count # type: ignore # ----------------------------------------------------------------------------- def _count_categorical(self, alias="new_column"): """ COUNT aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "count_categorical").get() StringColumn.count = _count_categorical # type: ignore StringColumnView.count = _count_categorical # type: ignore # ----------------------------------------------------------------------------- def _count_distinct(self, alias="new_column"): """ COUNT DISTINCT aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "count_distinct").get() StringColumn.count_distinct = _count_distinct # type: ignore StringColumnView.count_distinct = _count_distinct # type: ignore # ----------------------------------------------------------------------------- def _day(self): """Extract day (of the month) from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView(operator="day", operand1=self, operand2=None) = _day # type: ignore = _day # type: ignore # ----------------------------------------------------------------------------- def _eq(self, other): return BooleanColumnView( operator="equal_to", operand1=self, operand2=other, ) FloatColumn.__eq__ = _eq # type: ignore FloatColumnView.__eq__ = _eq # type: ignore StringColumn.__eq__ = _eq # type: ignore StringColumnView.__eq__ = _eq # type: ignore # ----------------------------------------------------------------------------- def _erf(self): """Compute error function.""" return FloatColumnView(operator="erf", operand1=self, operand2=None) FloatColumn.erf = _erf # type: ignore FloatColumnView.erf = _erf # type: ignore # ----------------------------------------------------------------------------- def _exp(self): """Compute exponential function.""" return FloatColumnView(operator="exp", operand1=self, operand2=None) FloatColumn.exp = _exp # type: ignore FloatColumnView.exp = _exp # type: ignore # ----------------------------------------------------------------------------- def _floor(self): """Round down value.""" return FloatColumnView(operator="floor", operand1=self, operand2=None) FloatColumn.floor = _floor # type: ignore FloatColumnView.floor = _floor # type: ignore # ----------------------------------------------------------------------------- FloatColumnView._format = _format # type: ignore BooleanColumnView._format = _format # type: ignore StringColumnView._format = _format # type: ignore FloatColumn._format = _format # type: ignore StringColumn._format = _format # type: ignore # ----------------------------------------------------------------------------- def _gamma(self): """Compute gamma function.""" return FloatColumnView( operator="tgamma", operand1=self, operand2=None, ) FloatColumn.gamma = _gamma # type: ignore FloatColumnView.gamma = _gamma # type: ignore # ----------------------------------------------------------------------------- def _ge(self, other): return BooleanColumnView( operator="greater_equal", operand1=self, operand2=other, ) FloatColumn.__ge__ = _ge # type: ignore FloatColumnView.__ge__ = _ge # type: ignore # ----------------------------------------------------------------------------- def _gt(self, other): return BooleanColumnView( operator="greater", operand1=self, operand2=other, ) FloatColumn.__gt__ = _gt # type: ignore FloatColumnView.__gt__ = _gt # type: ignore # ----------------------------------------------------------------------------- def _hour(self): """Extract hour (of the day) from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView(operator="hour", operand1=self, operand2=None) FloatColumn.hour = _hour # type: ignore FloatColumnView.hour = _hour # type: ignore # ----------------------------------------------------------------------------- def _is_inf(self): """Determine whether the value is infinite.""" return BooleanColumnView( operator="is_inf", operand1=self, operand2=None, ) FloatColumn.is_inf = _is_inf # type: ignore FloatColumnView.is_inf = _is_inf # type: ignore # ----------------------------------------------------------------------------- def _is_nan(self): """Determine whether the value is nan.""" return BooleanColumnView( operator="is_nan", operand1=self, operand2=None, ) FloatColumn.is_nan = _is_nan # type: ignore FloatColumnView.is_nan = _is_nan # type: ignore FloatColumn.is_null = _is_nan # type: ignore FloatColumnView.is_null = _is_nan # type: ignore # ----------------------------------------------------------------------------- def _is_null(self): """Determine whether the value is NULL.""" return self == "NULL" StringColumn.is_null = _is_null # type: ignore StringColumnView.is_null = _is_null # type: ignore # ----------------------------------------------------------------------------- FloatColumn.last_change = _last_change_from_col # type: ignore FloatColumnView.last_change = _last_change_from_col # type: ignore StringColumn.last_change = _last_change_from_col # type: ignore StringColumnView.last_change = _last_change_from_col # type: ignore BooleanColumnView.last_change = _last_change_from_col # type: ignore # ----------------------------------------------------------------------------- def _le(self, other): return BooleanColumnView( operator="less_equal", operand1=self, operand2=other, ) FloatColumn.__le__ = _le # type: ignore FloatColumnView.__le__ = _le # type: ignore # ----------------------------------------------------------------------------- BooleanColumnView.__len__ = _length # type: ignore FloatColumnView.__len__ = _length # type: ignore StringColumnView.__len__ = _length # type: ignore FloatColumn.__len__ = _length # type: ignore StringColumn.__len__ = _length # type: ignore # ----------------------------------------------------------------------------- BooleanColumnView.length = _length_property # type: ignore FloatColumnView.length = _length_property # type: ignore StringColumnView.length = _length_property # type: ignore FloatColumn.length = _length_property # type: ignore StringColumn.length = _length_property # type: ignore # ----------------------------------------------------------------------------- def _lgamma(self): """Compute log-gamma function.""" return FloatColumnView( operator="lgamma", operand1=self, operand2=None, ) FloatColumn.lgamma = _lgamma # type: ignore FloatColumnView.lgamma = _lgamma # type: ignore # ----------------------------------------------------------------------------- def _log(self): """Compute natural logarithm.""" return FloatColumnView(operator="log", operand1=self, operand2=None) FloatColumn.log = _log # type: ignore FloatColumnView.log = _log # type: ignore # ----------------------------------------------------------------------------- def _lt(self, other): return BooleanColumnView(operator="less", operand1=self, operand2=other) FloatColumn.__lt__ = _lt # type: ignore FloatColumnView.__lt__ = _lt # type: ignore # ----------------------------------------------------------------------------- def _max(self, alias="new_column"): """ MAX aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "max").get() FloatColumn.max = _max # type: ignore FloatColumnView.max = _max # type: ignore # ----------------------------------------------------------------------------- def _median(self, alias="new_column"): """ MEDIAN aggregation. **alias**: Name for the new column. """ return Aggregation(alias, self, "median").get() FloatColumn.median = _median # type: ignore FloatColumnView.median = _median # type: ignore # ----------------------------------------------------------------------------- def _min(self, alias="new_column"): """ MIN aggregation. **alias**: Name for the new column. """ return Aggregation(alias, self, "min").get() FloatColumn.min = _min # type: ignore FloatColumnView.min = _min # type: ignore # ----------------------------------------------------------------------------- def _minute(self): """Extract minute (of the hour) from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView( operator="minute", operand1=self, operand2=None, ) FloatColumn.minute = _minute # type: ignore FloatColumnView.minute = _minute # type: ignore # ----------------------------------------------------------------------------- def _mod(self, other): return FloatColumnView(operator="fmod", operand1=self, operand2=other) def _rmod(self, other): return FloatColumnView(operator="fmod", operand1=other, operand2=self) FloatColumn.__mod__ = _mod # type: ignore FloatColumn.__rmod__ = _rmod # type: ignore FloatColumnView.__mod__ = _mod # type: ignore FloatColumnView.__rmod__ = _rmod # type: ignore # ----------------------------------------------------------------------------- def _month(self): """ Extract month from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView(operator="month", operand1=self, operand2=None) FloatColumn.month = _month # type: ignore FloatColumnView.month = _month # type: ignore # ----------------------------------------------------------------------------- def _mul(self, other): return FloatColumnView( operator="multiplies", operand1=self, operand2=other, ) FloatColumn.__mul__ = _mul # type: ignore FloatColumn.__rmul__ = _mul # type: ignore FloatColumnView.__mul__ = _mul # type: ignore FloatColumnView.__rmul__ = _mul # type: ignore # ----------------------------------------------------------------------------- def _ne(self, other): return BooleanColumnView( operator="not_equal_to", operand1=self, operand2=other, ) FloatColumn.__ne__ = _ne # type: ignore FloatColumnView.__ne__ = _ne # type: ignore StringColumn.__ne__ = _ne # type: ignore StringColumnView.__ne__ = _ne # type: ignore # ----------------------------------------------------------------------------- def _neg(self): return FloatColumnView( operator="multiplies", operand1=self, operand2=-1.0, ) FloatColumn.__neg__ = _neg # type: ignore FloatColumnView.__neg__ = _neg # type: ignore # ----------------------------------------------------------------------------- def _pow(self, other): return FloatColumnView(operator="pow", operand1=self, operand2=other) def _rpow(self, other): return FloatColumnView(operator="pow", operand1=other, operand2=self) FloatColumn.__pow__ = _pow # type: ignore FloatColumn.__rpow__ = _rpow # type: ignore FloatColumnView.__pow__ = _pow # type: ignore FloatColumnView.__rpow__ = _rpow # type: ignore # ----------------------------------------------------------------------------- FloatColumn.__repr__ = _repr # type: ignore FloatColumnView.__repr__ = _repr # type: ignore StringColumn.__repr__ = _repr # type: ignore StringColumnView.__repr__ = _repr # type: ignore BooleanColumnView.__repr__ = _repr # type: ignore # ----------------------------------------------------------------------------- FloatColumn._repr_html_ = _repr_html # type: ignore FloatColumnView._repr_html_ = _repr_html # type: ignore StringColumn._repr_html_ = _repr_html # type: ignore StringColumnView._repr_html_ = _repr_html # type: ignore BooleanColumnView._repr_html_ = _repr_html # type: ignore # ----------------------------------------------------------------------------- def _round(self): """Round to nearest.""" return FloatColumnView(operator="round", operand1=self, operand2=None) FloatColumn.round = _round # type: ignore FloatColumnView.round = _round # type: ignore # ----------------------------------------------------------------------------- def _second(self): """Extract second (of the minute) from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView( operator="second", operand1=self, operand2=None, ) FloatColumn.second = _second # type: ignore FloatColumnView.second = _second # type: ignore # ----------------------------------------------------------------------------- def _sin(self): """Compute sine.""" return FloatColumnView(operator="sin", operand1=self, operand2=None) FloatColumn.sin = _sin # type: ignore FloatColumnView.sin = _sin # type: ignore # ----------------------------------------------------------------------------- def _sqrt(self): """Compute square root.""" return FloatColumnView(operator="sqrt", operand1=self, operand2=None) FloatColumn.sqrt = _sqrt # type: ignore FloatColumnView.sqrt = _sqrt # type: ignore # ----------------------------------------------------------------------------- def _stddev(self, alias="new_column"): """ STDDEV aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "stddev").get() FloatColumn.stddev = _stddev # type: ignore FloatColumnView.stddev = _stddev # type: ignore # ----------------------------------------------------------------------------- def _sub(self, other): return FloatColumnView( operator="minus", operand1=self, operand2=other, ) def _rsub(self, other): return FloatColumnView( operator="minus", operand1=other, operand2=self, ) FloatColumn.__sub__ = _sub # type: ignore FloatColumn.__rsub__ = _rsub # type: ignore FloatColumnView.__sub__ = _sub # type: ignore FloatColumnView.__rsub__ = _rsub # type: ignore # ------------------------------------------------------------------------- FloatColumnView.subroles = _subroles # type: ignore StringColumnView.subroles = _subroles # type: ignore FloatColumn.subroles = _subroles # type: ignore StringColumn.subroles = _subroles # type: ignore # ----------------------------------------------------------------------------- def _subselection_bool(self, indices): if isinstance(indices, numbers.Integral): return _get_scalar(self, indices) if isinstance(indices, slice): indices = _make_slicing_operand(self, indices) return BooleanColumnView( operator="subselection", operand1=self, operand2=indices, ) BooleanColumnView.__getitem__ = _subselection_bool # type: ignore # ----------------------------------------------------------------------------- def _subselection_float(self, indices): if isinstance(indices, numbers.Integral): return _get_scalar(self, indices) if isinstance(indices, slice): indices = _make_slicing_operand(self, indices) return FloatColumnView( operator="subselection", operand1=self, operand2=indices, ) FloatColumnView.__getitem__ = _subselection_float # type: ignore FloatColumn.__getitem__ = _subselection_float # type: ignore # ----------------------------------------------------------------------------- def _subselection_string(self, indices): if isinstance(indices, numbers.Integral): return _get_scalar(self, indices) if isinstance(indices, slice): indices = _make_slicing_operand(self, indices) return StringColumnView( operator="subselection", operand1=self, operand2=indices, ) StringColumnView.__getitem__ = _subselection_string # type: ignore StringColumn.__getitem__ = _subselection_string # type: ignore # ----------------------------------------------------------------------------- def _substr(self, begin, length): """ Return a substring for every element in the column. Args: begin (int): First position of the original string. length (int): Length of the extracted string. """ col = StringColumnView( operator="substr", operand1=self, operand2=None, ) col.cmd["begin_"] = begin col.cmd["len_"] = length return col StringColumn.substr = _substr # type: ignore StringColumnView.substr = _substr # type: ignore # ----------------------------------------------------------------------------- def _sum(self, alias="new_column"): """ SUM aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "sum").get() FloatColumn.sum = _sum # type: ignore FloatColumnView.sum = _sum # type: ignore # ----------------------------------------------------------------------------- def _tan(self): """Compute tangent.""" return FloatColumnView(operator="tan", operand1=self, operand2=None) FloatColumn.tan = _tan # type: ignore FloatColumnView.tan = _tan # type: ignore # ----------------------------------------------------------------------------- def _as_num(self): """Transforms a categorical column to a numerical column.""" return FloatColumnView( operator="as_num", operand1=self, operand2=None, ) StringColumn.as_num = _as_num # type: ignore StringColumnView.as_num = _as_num # type: ignore # ----------------------------------------------------------------------------- FloatColumn.to_numpy = _to_numpy # type: ignore FloatColumnView.to_numpy = _to_numpy # type: ignore # ----------------------------------------------------------------------------- StringColumn.to_numpy = _to_numpy_categorical # type: ignore StringColumnView.to_numpy = _to_numpy_categorical # type: ignore # ----------------------------------------------------------------------------- def _as_str(self): """Transforms column to a string.""" return StringColumnView( operator="as_str", operand1=self, operand2=None, ) FloatColumn.as_str = _as_str # type: ignore FloatColumnView.as_str = _as_str # type: ignore BooleanColumnView.as_str = _as_str # type: ignore # ----------------------------------------------------------------------------- def _as_ts(self, time_formats=None): """ Transforms a categorical column to a time stamp. Args: time_formats (str): Formats to be used to parse the time stamps. """ time_formats = time_formats or constants.TIME_FORMATS col = FloatColumnView(operator="as_ts", operand1=self, operand2=None) col.cmd["time_formats_"] = time_formats return col StringColumn.as_ts = _as_ts # type: ignore StringColumnView.as_ts = _as_ts # type: ignore # ----------------------------------------------------------------------------- def _truediv(self, other): return FloatColumnView( operator="divides", operand1=self, operand2=other, ) def _rtruediv(self, other): return FloatColumnView( operator="divides", operand1=other, operand2=self, ) FloatColumn.__truediv__ = _truediv # type: ignore FloatColumn.__rtruediv__ = _rtruediv # type: ignore FloatColumnView.__truediv__ = _truediv # type: ignore FloatColumnView.__rtruediv__ = _rtruediv # type: ignore # ----------------------------------------------------------------------------- FloatColumn.unique = _unique # type: ignore FloatColumnView.unique = _unique # type: ignore # ----------------------------------------------------------------------------- StringColumn.unique = _unique_categorical # type: ignore StringColumnView.unique = _unique_categorical # type: ignore # ------------------------------------------------------------------------- FloatColumnView.unit = _unit # type: ignore StringColumnView.unit = _unit # type: ignore FloatColumn.unit = _unit # type: ignore StringColumn.unit = _unit # type: ignore # ----------------------------------------------------------------------------- def _update(self, condition, values): """ Returns an updated version of this column. All entries for which the corresponding **condition** is True, are updated using the corresponding entry in **values**. Args: condition (Boolean column): Condition according to which the update is done values: Values to update with """ col = FloatColumnView( operator="update", operand1=self, operand2=values, ) if condition.cmd["type_"] != BOOLEAN_COLUMN_VIEW: raise TypeError("Condition for an update must be a Boolean column.") col.cmd["condition_"] = condition.cmd return col FloatColumn.update = _update # type: ignore FloatColumnView.update = _update # type: ignore # ----------------------------------------------------------------------------- def _update_categorical(self, condition, values): """ Returns an updated version of this column. All entries for which the corresponding **condition** is True, are updated using the corresponding entry in **values**. Args: condition (Boolean column): Condition according to which the update is done values: Values to update with """ col = StringColumnView( operator="update", operand1=self, operand2=values, ) if condition.cmd["type_"] != BOOLEAN_COLUMN_VIEW: raise TypeError("Condition for an update must be a Boolean column.") col.cmd["condition_"] = condition.cmd return col StringColumn.update = _update_categorical # type: ignore StringColumnView.update = _update_categorical # type: ignore # ----------------------------------------------------------------------------- def _var(self, alias="new_column"): """ VAR aggregation. Args: alias (str): Name for the new column. """ return Aggregation(alias, self, "var").get() FloatColumn.var = _var # type: ignore FloatColumnView.var = _var # type: ignore # ----------------------------------------------------------------------------- def _weekday(self): """Extract day of the week from a time stamp, Sunday being 0. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView( operator="weekday", operand1=self, operand2=None, ) FloatColumn.weekday = _weekday # type: ignore FloatColumnView.weekday = _weekday # type: ignore # ----------------------------------------------------------------------------- def _with_subroles_float(self, subroles, append=True): """ Returns a new column with new subroles. Args: subroles (str or List[str]): The subroles to be assigned. append (bool, optional): Whether you want to append the new subroles to the existing subroles. """ if isinstance(subroles, str): subroles = [subroles] if not isinstance(subroles, list): raise TypeError("'subroles' must be a str or a list of str.") if not isinstance(append, bool): raise TypeError("'append' must be a bool.") col = FloatColumnView( operator="with_subroles", operand1=self, operand2=None, ) col.cmd["subroles_"] = self.subroles + subroles if append else subroles return col FloatColumn.with_subroles = _with_subroles_float # type: ignore FloatColumnView.with_subroles = _with_subroles_float # type: ignore # ----------------------------------------------------------------------------- def _with_subroles_string(self, subroles, append=True): """ Returns a new column with new subroles. Args: subroles (str or List[str]): The subroles to be assigned. append (bool, optional): Whether you want to append the new subroles to the existing subroles. """ if isinstance(subroles, str): subroles = [subroles] if not isinstance(subroles, list): raise TypeError("'subroles' must be a str or a list of str.") if not isinstance(append, bool): raise TypeError("'append' must be a bool.") col = StringColumnView( operator="with_subroles", operand1=self, operand2=None, ) col.cmd["subroles_"] = self.subroles + subroles if append else subroles return col StringColumn.with_subroles = _with_subroles_string # type: ignore StringColumnView.with_subroles = _with_subroles_string # type: ignore # ----------------------------------------------------------------------------- def _with_unit_float(self, unit): """ Returns a new column with a new unit. Args: unit (str): The new unit. """ col = FloatColumnView( operator="with_unit", operand1=self, operand2=None, ) col.cmd["unit_"] = unit return col FloatColumn.with_unit = _with_unit_float # type: ignore FloatColumnView.with_unit = _with_unit_float # type: ignore # ----------------------------------------------------------------------------- def _with_unit_string(self, unit): """ Returns a new column with a new unit, Args: unit (str): The new unit. """ col = StringColumnView( operator="with_unit", operand1=self, operand2=None, ) col.cmd["unit_"] = unit return col StringColumn.with_unit = _with_unit_string # type: ignore StringColumnView.with_unit = _with_unit_string # type: ignore # ----------------------------------------------------------------------------- def _year(self): """ Extract year from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView(operator="year", operand1=self, operand2=None) FloatColumn.year = _year # type: ignore FloatColumnView.year = _year # type: ignore # ----------------------------------------------------------------------------- def _yearday(self): """ Extract day of the year from a time stamp. If the column is numerical, that number will be interpreted as the number of days since epoch time (January 1, 1970). """ return FloatColumnView( operator="yearday", operand1=self, operand2=None, ) FloatColumn.yearday = _yearday # type: ignore FloatColumnView.yearday = _yearday # type: ignore # -----------------------------------------------------------------------------