Source code for getml.database.sniff_csv

# Copyright 2022 The SQLNet Company GmbH
#
# This file is licensed under the Elastic License 2.0 (ELv2).
# Refer to the LICENSE.txt file in the root of the repository
# for details.
#


"""
Sniffs a list of CSV files.
"""

from typing import Any, Dict, List, Optional, Union

import getml.communication as comm

from .connection import Connection
from .helpers import _retrieve_urls


[docs]def sniff_csv( name: str, fnames: Union[str, List[str]], num_lines_sniffed: int = 1000, quotechar: str = '"', sep: str = ",", skip: int = 0, colnames: Optional[List[str]] = None, conn: Optional[Connection] = None, ) -> str: """ Sniffs a list of CSV files. Args: name (str): Name of the table in which the data is to be inserted. fnames (List[str]): The list of CSV file names to be read. num_lines_sniffed (int, optional): Number of lines analyzed by the sniffer. quotechar (str, optional): The character used to wrap strings. Default:`"` sep (str, optional): The separator used for separating fields. Default:`,` skip (int, optional): Number of lines to skip at the beginning of each file (Default: 0). colnames(List[str] or None, optional): The first line of a CSV file usually contains the column names. When this is not the case, you need to explicitly pass them. conn (:class:`~getml.database.Connection`, optional): The database connection to be used. If you don't explicitly pass a connection, the engine will use the default connection. Returns: str: Appropriate `CREATE TABLE` statement. """ conn = conn or Connection() if not isinstance(fnames, list): fnames = [fnames] fnames_ = _retrieve_urls(fnames) cmd: Dict[str, Any] = {} cmd["name_"] = name cmd["type_"] = "Database.sniff_csv" cmd["fnames_"] = fnames_ cmd["num_lines_sniffed_"] = num_lines_sniffed cmd["quotechar_"] = quotechar cmd["sep_"] = sep cmd["skip_"] = skip cmd["conn_id_"] = conn.conn_id if colnames is not None: cmd["colnames_"] = colnames with comm.send_and_get_socket(cmd) as sock: msg = comm.recv_string(sock) if msg != "Success!": sock.close() comm.engine_exception_handler(msg) return comm.recv_string(sock)