Source code for

# Copyright 2022 The SQLNet Company GmbH
# This file is licensed under the Elastic License 2.0 (ELv2).
# Refer to the LICENSE.txt file in the root of the repository
# for details.

Concatenates data.

from typing import Dict, Tuple

from import StringColumnView
from import from_value
from import rowid
from import concat as _concat
from import DataFrame
from import _is_non_empty_typed_list
from import View

[docs]def concat(name: str, **kwargs: DataFrame) -> Tuple[DataFrame, StringColumnView]: """ Concatenates several data frames into and produces a split column that keeps track of their origin. Args: name (str): The name of the data frame you would like to create. kwargs: The data frames you would like to concat with the name in which they should appear in the split column. Example: A common use case for this functionality are :class:``: .. code-block:: python data_train = getml.DataFrame.from_pandas( datatraining_pandas, name='data_train') data_validate = getml.DataFrame.from_pandas( datatest_pandas, name='data_validate') data_test = getml.DataFrame.from_pandas( datatest2_pandas, name='data_test') population, split = "population", train=data_train, validate=data_validate, test=data_test) ... time_series = population=population, split=split) """ if not _is_non_empty_typed_list(list(kwargs.values()), [DataFrame, View]): raise ValueError( "'kwargs' must be non-empty and contain getml.DataFrames " + "or" ) names = list(kwargs.keys()) first = kwargs[names[0]] population = first.copy(name) if isinstance(first, DataFrame) else first.to_df(name) split = from_value(names[0]) assert isinstance(split, StringColumnView), "Should be a StringColumnView" for new_df_name in names[1:]: split = split.update(rowid() > population.nrows(), new_df_name) # type: ignore population = _concat(name, [population, kwargs[new_df_name]]) return population, split[: population.nrows()] # type: ignore