Source code for getml.sqlite3.sniff_pandas

# Copyright 2022 The SQLNet Company GmbH
#
# This file is licensed under the Elastic License 2.0 (ELv2).
# Refer to the LICENSE.txt file in the root of the repository
# for details.
#


"""
Contains utility functions for siffing sqlite data types from pandas DataFrames.
"""

from typing import Dict, List

import pandas as pd  # type: ignore

from getml.data.helpers import _is_numerical_type

from .helpers import _generate_schema, _is_int_type

# ----------------------------------------------------------------------------


[docs]def sniff_pandas(table_name: str, data_frame: str) -> str: """ Sniffs a pandas data frame. Args: table_name (str): Name of the table in which the data is to be inserted. data_frame (pandas.DataFrame): The pandas.DataFrame to read into the table. Returns: str: Appropriate `CREATE TABLE` statement. """ # ------------------------------------------------------------ if not isinstance(table_name, str): raise TypeError("'table_name' must be a str") if not isinstance(data_frame, pd.DataFrame): raise TypeError("'data_frame' must be a pandas.DataFrame") # ------------------------------------------------------------ colnames = data_frame.columns coltypes = data_frame.dtypes sql_types: Dict[str, List[str]] = {"INTEGER": [], "REAL": [], "TEXT": []} for cname, ctype in zip(colnames, coltypes): if _is_int_type(ctype): sql_types["INTEGER"].append(cname) continue if _is_numerical_type(ctype): sql_types["REAL"].append(cname) else: sql_types["TEXT"].append(cname) return _generate_schema(table_name, sql_types)