You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			2384 lines
		
	
	
		
			85 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			2384 lines
		
	
	
		
			85 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
Module contains tools for processing files into DataFrames or other objects
 | 
						|
 | 
						|
GH#48849 provides a convenient way of deprecating keyword arguments
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from collections import (
 | 
						|
    abc,
 | 
						|
    defaultdict,
 | 
						|
)
 | 
						|
import csv
 | 
						|
import sys
 | 
						|
from textwrap import fill
 | 
						|
from typing import (
 | 
						|
    IO,
 | 
						|
    TYPE_CHECKING,
 | 
						|
    Any,
 | 
						|
    Callable,
 | 
						|
    Literal,
 | 
						|
    NamedTuple,
 | 
						|
    TypedDict,
 | 
						|
    overload,
 | 
						|
)
 | 
						|
import warnings
 | 
						|
 | 
						|
import numpy as np
 | 
						|
 | 
						|
from pandas._config import using_copy_on_write
 | 
						|
 | 
						|
from pandas._libs import lib
 | 
						|
from pandas._libs.parsers import STR_NA_VALUES
 | 
						|
from pandas.errors import (
 | 
						|
    AbstractMethodError,
 | 
						|
    ParserWarning,
 | 
						|
)
 | 
						|
from pandas.util._decorators import Appender
 | 
						|
from pandas.util._exceptions import find_stack_level
 | 
						|
from pandas.util._validators import check_dtype_backend
 | 
						|
 | 
						|
from pandas.core.dtypes.common import (
 | 
						|
    is_file_like,
 | 
						|
    is_float,
 | 
						|
    is_hashable,
 | 
						|
    is_integer,
 | 
						|
    is_list_like,
 | 
						|
    pandas_dtype,
 | 
						|
)
 | 
						|
 | 
						|
from pandas import Series
 | 
						|
from pandas.core.frame import DataFrame
 | 
						|
from pandas.core.indexes.api import RangeIndex
 | 
						|
from pandas.core.shared_docs import _shared_docs
 | 
						|
 | 
						|
from pandas.io.common import (
 | 
						|
    IOHandles,
 | 
						|
    get_handle,
 | 
						|
    stringify_path,
 | 
						|
    validate_header_arg,
 | 
						|
)
 | 
						|
from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper
 | 
						|
from pandas.io.parsers.base_parser import (
 | 
						|
    ParserBase,
 | 
						|
    is_index_col,
 | 
						|
    parser_defaults,
 | 
						|
)
 | 
						|
from pandas.io.parsers.c_parser_wrapper import CParserWrapper
 | 
						|
from pandas.io.parsers.python_parser import (
 | 
						|
    FixedWidthFieldParser,
 | 
						|
    PythonParser,
 | 
						|
)
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from collections.abc import (
 | 
						|
        Hashable,
 | 
						|
        Iterable,
 | 
						|
        Mapping,
 | 
						|
        Sequence,
 | 
						|
    )
 | 
						|
    from types import TracebackType
 | 
						|
 | 
						|
    from pandas._typing import (
 | 
						|
        CompressionOptions,
 | 
						|
        CSVEngine,
 | 
						|
        DtypeArg,
 | 
						|
        DtypeBackend,
 | 
						|
        FilePath,
 | 
						|
        IndexLabel,
 | 
						|
        ReadCsvBuffer,
 | 
						|
        Self,
 | 
						|
        StorageOptions,
 | 
						|
        UsecolsArgType,
 | 
						|
    )
 | 
						|
_doc_read_csv_and_table = (
 | 
						|
    r"""
 | 
						|
{summary}
 | 
						|
 | 
						|
Also supports optionally iterating or breaking of the file
 | 
						|
into chunks.
 | 
						|
 | 
						|
Additional help can be found in the online docs for
 | 
						|
`IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 | 
						|
 | 
						|
Parameters
 | 
						|
----------
 | 
						|
filepath_or_buffer : str, path object or file-like object
 | 
						|
    Any valid string path is acceptable. The string could be a URL. Valid
 | 
						|
    URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
 | 
						|
    expected. A local file could be: file://localhost/path/to/table.csv.
 | 
						|
 | 
						|
    If you want to pass in a path object, pandas accepts any ``os.PathLike``.
 | 
						|
 | 
						|
    By file-like object, we refer to objects with a ``read()`` method, such as
 | 
						|
    a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
 | 
						|
sep : str, default {_default_sep}
 | 
						|
    Character or regex pattern to treat as the delimiter. If ``sep=None``, the
 | 
						|
    C engine cannot automatically detect
 | 
						|
    the separator, but the Python parsing engine can, meaning the latter will
 | 
						|
    be used and automatically detect the separator from only the first valid
 | 
						|
    row of the file by Python's builtin sniffer tool, ``csv.Sniffer``.
 | 
						|
    In addition, separators longer than 1 character and different from
 | 
						|
    ``'\s+'`` will be interpreted as regular expressions and will also force
 | 
						|
    the use of the Python parsing engine. Note that regex delimiters are prone
 | 
						|
    to ignoring quoted data. Regex example: ``'\r\t'``.
 | 
						|
delimiter : str, optional
 | 
						|
    Alias for ``sep``.
 | 
						|
header : int, Sequence of int, 'infer' or None, default 'infer'
 | 
						|
    Row number(s) containing column labels and marking the start of the
 | 
						|
    data (zero-indexed). Default behavior is to infer the column names: if no ``names``
 | 
						|
    are passed the behavior is identical to ``header=0`` and column
 | 
						|
    names are inferred from the first line of the file, if column
 | 
						|
    names are passed explicitly to ``names`` then the behavior is identical to
 | 
						|
    ``header=None``. Explicitly pass ``header=0`` to be able to
 | 
						|
    replace existing names. The header can be a list of integers that
 | 
						|
    specify row locations for a :class:`~pandas.MultiIndex` on the columns
 | 
						|
    e.g. ``[0, 1, 3]``. Intervening rows that are not specified will be
 | 
						|
    skipped (e.g. 2 in this example is skipped). Note that this
 | 
						|
    parameter ignores commented lines and empty lines if
 | 
						|
    ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
 | 
						|
    data rather than the first line of the file.
 | 
						|
names : Sequence of Hashable, optional
 | 
						|
    Sequence of column labels to apply. If the file contains a header row,
 | 
						|
    then you should explicitly pass ``header=0`` to override the column names.
 | 
						|
    Duplicates in this list are not allowed.
 | 
						|
index_col : Hashable, Sequence of Hashable or False, optional
 | 
						|
  Column(s) to use as row label(s), denoted either by column labels or column
 | 
						|
  indices.  If a sequence of labels or indices is given, :class:`~pandas.MultiIndex`
 | 
						|
  will be formed for the row labels.
 | 
						|
 | 
						|
  Note: ``index_col=False`` can be used to force pandas to *not* use the first
 | 
						|
  column as the index, e.g., when you have a malformed file with delimiters at
 | 
						|
  the end of each line.
 | 
						|
usecols : Sequence of Hashable or Callable, optional
 | 
						|
    Subset of columns to select, denoted either by column labels or column indices.
 | 
						|
    If list-like, all elements must either
 | 
						|
    be positional (i.e. integer indices into the document columns) or strings
 | 
						|
    that correspond to column names provided either by the user in ``names`` or
 | 
						|
    inferred from the document header row(s). If ``names`` are given, the document
 | 
						|
    header row(s) are not taken into account. For example, a valid list-like
 | 
						|
    ``usecols`` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
 | 
						|
    Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
 | 
						|
    To instantiate a :class:`~pandas.DataFrame` from ``data`` with element order
 | 
						|
    preserved use ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]``
 | 
						|
    for columns in ``['foo', 'bar']`` order or
 | 
						|
    ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
 | 
						|
    for ``['bar', 'foo']`` order.
 | 
						|
 | 
						|
    If callable, the callable function will be evaluated against the column
 | 
						|
    names, returning names where the callable function evaluates to ``True``. An
 | 
						|
    example of a valid callable argument would be ``lambda x: x.upper() in
 | 
						|
    ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
 | 
						|
    parsing time and lower memory usage.
 | 
						|
dtype : dtype or dict of {{Hashable : dtype}}, optional
 | 
						|
    Data type(s) to apply to either the whole dataset or individual columns.
 | 
						|
    E.g., ``{{'a': np.float64, 'b': np.int32, 'c': 'Int64'}}``
 | 
						|
    Use ``str`` or ``object`` together with suitable ``na_values`` settings
 | 
						|
    to preserve and not interpret ``dtype``.
 | 
						|
    If ``converters`` are specified, they will be applied INSTEAD
 | 
						|
    of ``dtype`` conversion.
 | 
						|
 | 
						|
    .. versionadded:: 1.5.0
 | 
						|
 | 
						|
        Support for ``defaultdict`` was added. Specify a ``defaultdict`` as input where
 | 
						|
        the default determines the ``dtype`` of the columns which are not explicitly
 | 
						|
        listed.
 | 
						|
engine : {{'c', 'python', 'pyarrow'}}, optional
 | 
						|
    Parser engine to use. The C and pyarrow engines are faster, while the python engine
 | 
						|
    is currently more feature-complete. Multithreading is currently only supported by
 | 
						|
    the pyarrow engine.
 | 
						|
 | 
						|
    .. versionadded:: 1.4.0
 | 
						|
 | 
						|
        The 'pyarrow' engine was added as an *experimental* engine, and some features
 | 
						|
        are unsupported, or may not work correctly, with this engine.
 | 
						|
converters : dict of {{Hashable : Callable}}, optional
 | 
						|
    Functions for converting values in specified columns. Keys can either
 | 
						|
    be column labels or column indices.
 | 
						|
true_values : list, optional
 | 
						|
    Values to consider as ``True`` in addition to case-insensitive variants of 'True'.
 | 
						|
false_values : list, optional
 | 
						|
    Values to consider as ``False`` in addition to case-insensitive variants of 'False'.
 | 
						|
skipinitialspace : bool, default False
 | 
						|
    Skip spaces after delimiter.
 | 
						|
skiprows : int, list of int or Callable, optional
 | 
						|
    Line numbers to skip (0-indexed) or number of lines to skip (``int``)
 | 
						|
    at the start of the file.
 | 
						|
 | 
						|
    If callable, the callable function will be evaluated against the row
 | 
						|
    indices, returning ``True`` if the row should be skipped and ``False`` otherwise.
 | 
						|
    An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
 | 
						|
skipfooter : int, default 0
 | 
						|
    Number of lines at bottom of file to skip (Unsupported with ``engine='c'``).
 | 
						|
nrows : int, optional
 | 
						|
    Number of rows of file to read. Useful for reading pieces of large files.
 | 
						|
na_values : Hashable, Iterable of Hashable or dict of {{Hashable : Iterable}}, optional
 | 
						|
    Additional strings to recognize as ``NA``/``NaN``. If ``dict`` passed, specific
 | 
						|
    per-column ``NA`` values.  By default the following values are interpreted as
 | 
						|
    ``NaN``: " """
 | 
						|
    + fill('", "'.join(sorted(STR_NA_VALUES)), 70, subsequent_indent="    ")
 | 
						|
    + """ ".
 | 
						|
 | 
						|
keep_default_na : bool, default True
 | 
						|
    Whether or not to include the default ``NaN`` values when parsing the data.
 | 
						|
    Depending on whether ``na_values`` is passed in, the behavior is as follows:
 | 
						|
 | 
						|
    * If ``keep_default_na`` is ``True``, and ``na_values`` are specified, ``na_values``
 | 
						|
      is appended to the default ``NaN`` values used for parsing.
 | 
						|
    * If ``keep_default_na`` is ``True``, and ``na_values`` are not specified, only
 | 
						|
      the default ``NaN`` values are used for parsing.
 | 
						|
    * If ``keep_default_na`` is ``False``, and ``na_values`` are specified, only
 | 
						|
      the ``NaN`` values specified ``na_values`` are used for parsing.
 | 
						|
    * If ``keep_default_na`` is ``False``, and ``na_values`` are not specified, no
 | 
						|
      strings will be parsed as ``NaN``.
 | 
						|
 | 
						|
    Note that if ``na_filter`` is passed in as ``False``, the ``keep_default_na`` and
 | 
						|
    ``na_values`` parameters will be ignored.
 | 
						|
na_filter : bool, default True
 | 
						|
    Detect missing value markers (empty strings and the value of ``na_values``). In
 | 
						|
    data without any ``NA`` values, passing ``na_filter=False`` can improve the
 | 
						|
    performance of reading a large file.
 | 
						|
verbose : bool, default False
 | 
						|
    Indicate number of ``NA`` values placed in non-numeric columns.
 | 
						|
 | 
						|
    .. deprecated:: 2.2.0
 | 
						|
skip_blank_lines : bool, default True
 | 
						|
    If ``True``, skip over blank lines rather than interpreting as ``NaN`` values.
 | 
						|
parse_dates : bool, list of Hashable, list of lists or dict of {{Hashable : list}}, \
 | 
						|
default False
 | 
						|
    The behavior is as follows:
 | 
						|
 | 
						|
    * ``bool``. If ``True`` -> try parsing the index. Note: Automatically set to
 | 
						|
      ``True`` if ``date_format`` or ``date_parser`` arguments have been passed.
 | 
						|
    * ``list`` of ``int`` or names. e.g. If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3
 | 
						|
      each as a separate date column.
 | 
						|
    * ``list`` of ``list``. e.g.  If ``[[1, 3]]`` -> combine columns 1 and 3 and parse
 | 
						|
      as a single date column. Values are joined with a space before parsing.
 | 
						|
    * ``dict``, e.g. ``{{'foo' : [1, 3]}}`` -> parse columns 1, 3 as date and call
 | 
						|
      result 'foo'. Values are joined with a space before parsing.
 | 
						|
 | 
						|
    If a column or index cannot be represented as an array of ``datetime``,
 | 
						|
    say because of an unparsable value or a mixture of timezones, the column
 | 
						|
    or index will be returned unaltered as an ``object`` data type. For
 | 
						|
    non-standard ``datetime`` parsing, use :func:`~pandas.to_datetime` after
 | 
						|
    :func:`~pandas.read_csv`.
 | 
						|
 | 
						|
    Note: A fast-path exists for iso8601-formatted dates.
 | 
						|
infer_datetime_format : bool, default False
 | 
						|
    If ``True`` and ``parse_dates`` is enabled, pandas will attempt to infer the
 | 
						|
    format of the ``datetime`` strings in the columns, and if it can be inferred,
 | 
						|
    switch to a faster method of parsing them. In some cases this can increase
 | 
						|
    the parsing speed by 5-10x.
 | 
						|
 | 
						|
    .. deprecated:: 2.0.0
 | 
						|
        A strict version of this argument is now the default, passing it has no effect.
 | 
						|
 | 
						|
keep_date_col : bool, default False
 | 
						|
    If ``True`` and ``parse_dates`` specifies combining multiple columns then
 | 
						|
    keep the original columns.
 | 
						|
date_parser : Callable, optional
 | 
						|
    Function to use for converting a sequence of string columns to an array of
 | 
						|
    ``datetime`` instances. The default uses ``dateutil.parser.parser`` to do the
 | 
						|
    conversion. pandas will try to call ``date_parser`` in three different ways,
 | 
						|
    advancing to the next if an exception occurs: 1) Pass one or more arrays
 | 
						|
    (as defined by ``parse_dates``) as arguments; 2) concatenate (row-wise) the
 | 
						|
    string values from the columns defined by ``parse_dates`` into a single array
 | 
						|
    and pass that; and 3) call ``date_parser`` once for each row using one or
 | 
						|
    more strings (corresponding to the columns defined by ``parse_dates``) as
 | 
						|
    arguments.
 | 
						|
 | 
						|
    .. deprecated:: 2.0.0
 | 
						|
       Use ``date_format`` instead, or read in as ``object`` and then apply
 | 
						|
       :func:`~pandas.to_datetime` as-needed.
 | 
						|
date_format : str or dict of column -> format, optional
 | 
						|
    Format to use for parsing dates when used in conjunction with ``parse_dates``.
 | 
						|
    The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. See
 | 
						|
    `strftime documentation
 | 
						|
    <https://docs.python.org/3/library/datetime.html
 | 
						|
    #strftime-and-strptime-behavior>`_ for more information on choices, though
 | 
						|
    note that :const:`"%f"` will parse all the way up to nanoseconds.
 | 
						|
    You can also pass:
 | 
						|
 | 
						|
    - "ISO8601", to parse any `ISO8601 <https://en.wikipedia.org/wiki/ISO_8601>`_
 | 
						|
        time string (not necessarily in exactly the same format);
 | 
						|
    - "mixed", to infer the format for each element individually. This is risky,
 | 
						|
        and you should probably use it along with `dayfirst`.
 | 
						|
 | 
						|
    .. versionadded:: 2.0.0
 | 
						|
dayfirst : bool, default False
 | 
						|
    DD/MM format dates, international and European format.
 | 
						|
cache_dates : bool, default True
 | 
						|
    If ``True``, use a cache of unique, converted dates to apply the ``datetime``
 | 
						|
    conversion. May produce significant speed-up when parsing duplicate
 | 
						|
    date strings, especially ones with timezone offsets.
 | 
						|
 | 
						|
iterator : bool, default False
 | 
						|
    Return ``TextFileReader`` object for iteration or getting chunks with
 | 
						|
    ``get_chunk()``.
 | 
						|
chunksize : int, optional
 | 
						|
    Number of lines to read from the file per chunk. Passing a value will cause the
 | 
						|
    function to return a ``TextFileReader`` object for iteration.
 | 
						|
    See the `IO Tools docs
 | 
						|
    <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
 | 
						|
    for more information on ``iterator`` and ``chunksize``.
 | 
						|
 | 
						|
{decompression_options}
 | 
						|
 | 
						|
    .. versionchanged:: 1.4.0 Zstandard support.
 | 
						|
 | 
						|
thousands : str (length 1), optional
 | 
						|
    Character acting as the thousands separator in numerical values.
 | 
						|
decimal : str (length 1), default '.'
 | 
						|
    Character to recognize as decimal point (e.g., use ',' for European data).
 | 
						|
lineterminator : str (length 1), optional
 | 
						|
    Character used to denote a line break. Only valid with C parser.
 | 
						|
quotechar : str (length 1), optional
 | 
						|
    Character used to denote the start and end of a quoted item. Quoted
 | 
						|
    items can include the ``delimiter`` and it will be ignored.
 | 
						|
quoting : {{0 or csv.QUOTE_MINIMAL, 1 or csv.QUOTE_ALL, 2 or csv.QUOTE_NONNUMERIC, \
 | 
						|
3 or csv.QUOTE_NONE}}, default csv.QUOTE_MINIMAL
 | 
						|
    Control field quoting behavior per ``csv.QUOTE_*`` constants. Default is
 | 
						|
    ``csv.QUOTE_MINIMAL`` (i.e., 0) which implies that only fields containing special
 | 
						|
    characters are quoted (e.g., characters defined in ``quotechar``, ``delimiter``,
 | 
						|
    or ``lineterminator``.
 | 
						|
doublequote : bool, default True
 | 
						|
   When ``quotechar`` is specified and ``quoting`` is not ``QUOTE_NONE``, indicate
 | 
						|
   whether or not to interpret two consecutive ``quotechar`` elements INSIDE a
 | 
						|
   field as a single ``quotechar`` element.
 | 
						|
escapechar : str (length 1), optional
 | 
						|
    Character used to escape other characters.
 | 
						|
comment : str (length 1), optional
 | 
						|
    Character indicating that the remainder of line should not be parsed.
 | 
						|
    If found at the beginning
 | 
						|
    of a line, the line will be ignored altogether. This parameter must be a
 | 
						|
    single character. Like empty lines (as long as ``skip_blank_lines=True``),
 | 
						|
    fully commented lines are ignored by the parameter ``header`` but not by
 | 
						|
    ``skiprows``. For example, if ``comment='#'``, parsing
 | 
						|
    ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in ``'a,b,c'`` being
 | 
						|
    treated as the header.
 | 
						|
encoding : str, optional, default 'utf-8'
 | 
						|
    Encoding to use for UTF when reading/writing (ex. ``'utf-8'``). `List of Python
 | 
						|
    standard encodings
 | 
						|
    <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
 | 
						|
 | 
						|
encoding_errors : str, optional, default 'strict'
 | 
						|
    How encoding errors are treated. `List of possible values
 | 
						|
    <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
 | 
						|
 | 
						|
    .. versionadded:: 1.3.0
 | 
						|
 | 
						|
dialect : str or csv.Dialect, optional
 | 
						|
    If provided, this parameter will override values (default or not) for the
 | 
						|
    following parameters: ``delimiter``, ``doublequote``, ``escapechar``,
 | 
						|
    ``skipinitialspace``, ``quotechar``, and ``quoting``. If it is necessary to
 | 
						|
    override values, a ``ParserWarning`` will be issued. See ``csv.Dialect``
 | 
						|
    documentation for more details.
 | 
						|
on_bad_lines : {{'error', 'warn', 'skip'}} or Callable, default 'error'
 | 
						|
    Specifies what to do upon encountering a bad line (a line with too many fields).
 | 
						|
    Allowed values are :
 | 
						|
 | 
						|
    - ``'error'``, raise an Exception when a bad line is encountered.
 | 
						|
    - ``'warn'``, raise a warning when a bad line is encountered and skip that line.
 | 
						|
    - ``'skip'``, skip bad lines without raising or warning when they are encountered.
 | 
						|
 | 
						|
    .. versionadded:: 1.3.0
 | 
						|
 | 
						|
    .. versionadded:: 1.4.0
 | 
						|
 | 
						|
        - Callable, function with signature
 | 
						|
          ``(bad_line: list[str]) -> list[str] | None`` that will process a single
 | 
						|
          bad line. ``bad_line`` is a list of strings split by the ``sep``.
 | 
						|
          If the function returns ``None``, the bad line will be ignored.
 | 
						|
          If the function returns a new ``list`` of strings with more elements than
 | 
						|
          expected, a ``ParserWarning`` will be emitted while dropping extra elements.
 | 
						|
          Only supported when ``engine='python'``
 | 
						|
 | 
						|
    .. versionchanged:: 2.2.0
 | 
						|
 | 
						|
        - Callable, function with signature
 | 
						|
          as described in `pyarrow documentation
 | 
						|
          <https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html
 | 
						|
          #pyarrow.csv.ParseOptions.invalid_row_handler>`_ when ``engine='pyarrow'``
 | 
						|
 | 
						|
delim_whitespace : bool, default False
 | 
						|
    Specifies whether or not whitespace (e.g. ``' '`` or ``'\\t'``) will be
 | 
						|
    used as the ``sep`` delimiter. Equivalent to setting ``sep='\\s+'``. If this option
 | 
						|
    is set to ``True``, nothing should be passed in for the ``delimiter``
 | 
						|
    parameter.
 | 
						|
 | 
						|
    .. deprecated:: 2.2.0
 | 
						|
        Use ``sep="\\s+"`` instead.
 | 
						|
low_memory : bool, default True
 | 
						|
    Internally process the file in chunks, resulting in lower memory use
 | 
						|
    while parsing, but possibly mixed type inference.  To ensure no mixed
 | 
						|
    types either set ``False``, or specify the type with the ``dtype`` parameter.
 | 
						|
    Note that the entire file is read into a single :class:`~pandas.DataFrame`
 | 
						|
    regardless, use the ``chunksize`` or ``iterator`` parameter to return the data in
 | 
						|
    chunks. (Only valid with C parser).
 | 
						|
memory_map : bool, default False
 | 
						|
    If a filepath is provided for ``filepath_or_buffer``, map the file object
 | 
						|
    directly onto memory and access the data directly from there. Using this
 | 
						|
    option can improve performance because there is no longer any I/O overhead.
 | 
						|
float_precision : {{'high', 'legacy', 'round_trip'}}, optional
 | 
						|
    Specifies which converter the C engine should use for floating-point
 | 
						|
    values. The options are ``None`` or ``'high'`` for the ordinary converter,
 | 
						|
    ``'legacy'`` for the original lower precision pandas converter, and
 | 
						|
    ``'round_trip'`` for the round-trip converter.
 | 
						|
 | 
						|
{storage_options}
 | 
						|
 | 
						|
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
 | 
						|
    Back-end data type applied to the resultant :class:`DataFrame`
 | 
						|
    (still experimental). Behaviour is as follows:
 | 
						|
 | 
						|
    * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
 | 
						|
      (default).
 | 
						|
    * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
 | 
						|
      DataFrame.
 | 
						|
 | 
						|
    .. versionadded:: 2.0
 | 
						|
 | 
						|
Returns
 | 
						|
-------
 | 
						|
DataFrame or TextFileReader
 | 
						|
    A comma-separated values (csv) file is returned as two-dimensional
 | 
						|
    data structure with labeled axes.
 | 
						|
 | 
						|
See Also
 | 
						|
--------
 | 
						|
DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
 | 
						|
{see_also_func_name} : {see_also_func_summary}
 | 
						|
read_fwf : Read a table of fixed-width formatted lines into DataFrame.
 | 
						|
 | 
						|
Examples
 | 
						|
--------
 | 
						|
>>> pd.{func_name}('data.csv')  # doctest: +SKIP
 | 
						|
"""
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class _C_Parser_Defaults(TypedDict):
 | 
						|
    delim_whitespace: Literal[False]
 | 
						|
    na_filter: Literal[True]
 | 
						|
    low_memory: Literal[True]
 | 
						|
    memory_map: Literal[False]
 | 
						|
    float_precision: None
 | 
						|
 | 
						|
 | 
						|
_c_parser_defaults: _C_Parser_Defaults = {
 | 
						|
    "delim_whitespace": False,
 | 
						|
    "na_filter": True,
 | 
						|
    "low_memory": True,
 | 
						|
    "memory_map": False,
 | 
						|
    "float_precision": None,
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
class _Fwf_Defaults(TypedDict):
 | 
						|
    colspecs: Literal["infer"]
 | 
						|
    infer_nrows: Literal[100]
 | 
						|
    widths: None
 | 
						|
 | 
						|
 | 
						|
_fwf_defaults: _Fwf_Defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None}
 | 
						|
_c_unsupported = {"skipfooter"}
 | 
						|
_python_unsupported = {"low_memory", "float_precision"}
 | 
						|
_pyarrow_unsupported = {
 | 
						|
    "skipfooter",
 | 
						|
    "float_precision",
 | 
						|
    "chunksize",
 | 
						|
    "comment",
 | 
						|
    "nrows",
 | 
						|
    "thousands",
 | 
						|
    "memory_map",
 | 
						|
    "dialect",
 | 
						|
    "delim_whitespace",
 | 
						|
    "quoting",
 | 
						|
    "lineterminator",
 | 
						|
    "converters",
 | 
						|
    "iterator",
 | 
						|
    "dayfirst",
 | 
						|
    "verbose",
 | 
						|
    "skipinitialspace",
 | 
						|
    "low_memory",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
class _DeprecationConfig(NamedTuple):
 | 
						|
    default_value: Any
 | 
						|
    msg: str | None
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def validate_integer(name: str, val: None, min_val: int = ...) -> None:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def validate_integer(name: str, val: float, min_val: int = ...) -> int:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def validate_integer(name: str, val: int | None, min_val: int = ...) -> int | None:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
def validate_integer(
 | 
						|
    name: str, val: int | float | None, min_val: int = 0
 | 
						|
) -> int | None:
 | 
						|
    """
 | 
						|
    Checks whether the 'name' parameter for parsing is either
 | 
						|
    an integer OR float that can SAFELY be cast to an integer
 | 
						|
    without losing accuracy. Raises a ValueError if that is
 | 
						|
    not the case.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    name : str
 | 
						|
        Parameter name (used for error reporting)
 | 
						|
    val : int or float
 | 
						|
        The value to check
 | 
						|
    min_val : int
 | 
						|
        Minimum allowed value (val < min_val will result in a ValueError)
 | 
						|
    """
 | 
						|
    if val is None:
 | 
						|
        return val
 | 
						|
 | 
						|
    msg = f"'{name:s}' must be an integer >={min_val:d}"
 | 
						|
    if is_float(val):
 | 
						|
        if int(val) != val:
 | 
						|
            raise ValueError(msg)
 | 
						|
        val = int(val)
 | 
						|
    elif not (is_integer(val) and val >= min_val):
 | 
						|
        raise ValueError(msg)
 | 
						|
 | 
						|
    return int(val)
 | 
						|
 | 
						|
 | 
						|
def _validate_names(names: Sequence[Hashable] | None) -> None:
 | 
						|
    """
 | 
						|
    Raise ValueError if the `names` parameter contains duplicates or has an
 | 
						|
    invalid data type.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    names : array-like or None
 | 
						|
        An array containing a list of the names used for the output DataFrame.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError
 | 
						|
        If names are not unique or are not ordered (e.g. set).
 | 
						|
    """
 | 
						|
    if names is not None:
 | 
						|
        if len(names) != len(set(names)):
 | 
						|
            raise ValueError("Duplicate names are not allowed.")
 | 
						|
        if not (
 | 
						|
            is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView)
 | 
						|
        ):
 | 
						|
            raise ValueError("Names should be an ordered collection.")
 | 
						|
 | 
						|
 | 
						|
def _read(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    """Generic reader of line files."""
 | 
						|
    # if we pass a date_parser and parse_dates=False, we should not parse the
 | 
						|
    # dates GH#44366
 | 
						|
    if kwds.get("parse_dates", None) is None:
 | 
						|
        if (
 | 
						|
            kwds.get("date_parser", lib.no_default) is lib.no_default
 | 
						|
            and kwds.get("date_format", None) is None
 | 
						|
        ):
 | 
						|
            kwds["parse_dates"] = False
 | 
						|
        else:
 | 
						|
            kwds["parse_dates"] = True
 | 
						|
 | 
						|
    # Extract some of the arguments (pass chunksize on).
 | 
						|
    iterator = kwds.get("iterator", False)
 | 
						|
    chunksize = kwds.get("chunksize", None)
 | 
						|
    if kwds.get("engine") == "pyarrow":
 | 
						|
        if iterator:
 | 
						|
            raise ValueError(
 | 
						|
                "The 'iterator' option is not supported with the 'pyarrow' engine"
 | 
						|
            )
 | 
						|
 | 
						|
        if chunksize is not None:
 | 
						|
            raise ValueError(
 | 
						|
                "The 'chunksize' option is not supported with the 'pyarrow' engine"
 | 
						|
            )
 | 
						|
    else:
 | 
						|
        chunksize = validate_integer("chunksize", chunksize, 1)
 | 
						|
 | 
						|
    nrows = kwds.get("nrows", None)
 | 
						|
 | 
						|
    # Check for duplicates in names.
 | 
						|
    _validate_names(kwds.get("names", None))
 | 
						|
 | 
						|
    # Create the parser.
 | 
						|
    parser = TextFileReader(filepath_or_buffer, **kwds)
 | 
						|
 | 
						|
    if chunksize or iterator:
 | 
						|
        return parser
 | 
						|
 | 
						|
    with parser:
 | 
						|
        return parser.read(nrows)
 | 
						|
 | 
						|
 | 
						|
# iterator=True -> TextFileReader
 | 
						|
@overload
 | 
						|
def read_csv(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Hashable
 | 
						|
    | Iterable[Hashable]
 | 
						|
    | Mapping[Hashable, Iterable[Hashable]]
 | 
						|
    | None = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] | None = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: Literal[True],
 | 
						|
    chunksize: int | None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool | lib.NoDefault = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: Literal["high", "legacy"] | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# chunksize=int -> TextFileReader
 | 
						|
@overload
 | 
						|
def read_csv(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Hashable
 | 
						|
    | Iterable[Hashable]
 | 
						|
    | Mapping[Hashable, Iterable[Hashable]]
 | 
						|
    | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] | None = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    chunksize: int,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool | lib.NoDefault = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: Literal["high", "legacy"] | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# default case -> DataFrame
 | 
						|
@overload
 | 
						|
def read_csv(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Hashable
 | 
						|
    | Iterable[Hashable]
 | 
						|
    | Mapping[Hashable, Iterable[Hashable]]
 | 
						|
    | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] | None = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: Literal[False] = ...,
 | 
						|
    chunksize: None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool | lib.NoDefault = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: Literal["high", "legacy"] | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> DataFrame:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# Unions -> DataFrame | TextFileReader
 | 
						|
@overload
 | 
						|
def read_csv(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Hashable
 | 
						|
    | Iterable[Hashable]
 | 
						|
    | Mapping[Hashable, Iterable[Hashable]]
 | 
						|
    | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] | None = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    chunksize: int | None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool | lib.NoDefault = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: Literal["high", "legacy"] | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@Appender(
 | 
						|
    _doc_read_csv_and_table.format(
 | 
						|
        func_name="read_csv",
 | 
						|
        summary="Read a comma-separated values (csv) file into DataFrame.",
 | 
						|
        see_also_func_name="read_table",
 | 
						|
        see_also_func_summary="Read general delimited file into DataFrame.",
 | 
						|
        _default_sep="','",
 | 
						|
        storage_options=_shared_docs["storage_options"],
 | 
						|
        decompression_options=_shared_docs["decompression_options"]
 | 
						|
        % "filepath_or_buffer",
 | 
						|
    )
 | 
						|
)
 | 
						|
def read_csv(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = lib.no_default,
 | 
						|
    delimiter: str | None | lib.NoDefault = None,
 | 
						|
    # Column and Index Locations and Names
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = "infer",
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = None,
 | 
						|
    usecols: UsecolsArgType = None,
 | 
						|
    # General Parsing Configuration
 | 
						|
    dtype: DtypeArg | None = None,
 | 
						|
    engine: CSVEngine | None = None,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = None,
 | 
						|
    true_values: list | None = None,
 | 
						|
    false_values: list | None = None,
 | 
						|
    skipinitialspace: bool = False,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = None,
 | 
						|
    skipfooter: int = 0,
 | 
						|
    nrows: int | None = None,
 | 
						|
    # NA and Missing Data Handling
 | 
						|
    na_values: Hashable
 | 
						|
    | Iterable[Hashable]
 | 
						|
    | Mapping[Hashable, Iterable[Hashable]]
 | 
						|
    | None = None,
 | 
						|
    keep_default_na: bool = True,
 | 
						|
    na_filter: bool = True,
 | 
						|
    verbose: bool | lib.NoDefault = lib.no_default,
 | 
						|
    skip_blank_lines: bool = True,
 | 
						|
    # Datetime Handling
 | 
						|
    parse_dates: bool | Sequence[Hashable] | None = None,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
 | 
						|
    keep_date_col: bool | lib.NoDefault = lib.no_default,
 | 
						|
    date_parser: Callable | lib.NoDefault = lib.no_default,
 | 
						|
    date_format: str | dict[Hashable, str] | None = None,
 | 
						|
    dayfirst: bool = False,
 | 
						|
    cache_dates: bool = True,
 | 
						|
    # Iteration
 | 
						|
    iterator: bool = False,
 | 
						|
    chunksize: int | None = None,
 | 
						|
    # Quoting, Compression, and File Format
 | 
						|
    compression: CompressionOptions = "infer",
 | 
						|
    thousands: str | None = None,
 | 
						|
    decimal: str = ".",
 | 
						|
    lineterminator: str | None = None,
 | 
						|
    quotechar: str = '"',
 | 
						|
    quoting: int = csv.QUOTE_MINIMAL,
 | 
						|
    doublequote: bool = True,
 | 
						|
    escapechar: str | None = None,
 | 
						|
    comment: str | None = None,
 | 
						|
    encoding: str | None = None,
 | 
						|
    encoding_errors: str | None = "strict",
 | 
						|
    dialect: str | csv.Dialect | None = None,
 | 
						|
    # Error Handling
 | 
						|
    on_bad_lines: str = "error",
 | 
						|
    # Internal
 | 
						|
    delim_whitespace: bool | lib.NoDefault = lib.no_default,
 | 
						|
    low_memory: bool = _c_parser_defaults["low_memory"],
 | 
						|
    memory_map: bool = False,
 | 
						|
    float_precision: Literal["high", "legacy"] | None = None,
 | 
						|
    storage_options: StorageOptions | None = None,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    if keep_date_col is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'keep_date_col' keyword in pd.read_csv is deprecated and "
 | 
						|
            "will be removed in a future version. Explicitly remove unwanted "
 | 
						|
            "columns after parsing instead.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        keep_date_col = False
 | 
						|
 | 
						|
    if lib.is_list_like(parse_dates):
 | 
						|
        # GH#55569
 | 
						|
        depr = False
 | 
						|
        # error: Item "bool" of "bool | Sequence[Hashable] | None" has no
 | 
						|
        # attribute "__iter__" (not iterable)
 | 
						|
        if not all(is_hashable(x) for x in parse_dates):  # type: ignore[union-attr]
 | 
						|
            depr = True
 | 
						|
        elif isinstance(parse_dates, dict) and any(
 | 
						|
            lib.is_list_like(x) for x in parse_dates.values()
 | 
						|
        ):
 | 
						|
            depr = True
 | 
						|
        if depr:
 | 
						|
            warnings.warn(
 | 
						|
                "Support for nested sequences for 'parse_dates' in pd.read_csv "
 | 
						|
                "is deprecated. Combine the desired columns with pd.to_datetime "
 | 
						|
                "after parsing instead.",
 | 
						|
                FutureWarning,
 | 
						|
                stacklevel=find_stack_level(),
 | 
						|
            )
 | 
						|
 | 
						|
    if infer_datetime_format is not lib.no_default:
 | 
						|
        warnings.warn(
 | 
						|
            "The argument 'infer_datetime_format' is deprecated and will "
 | 
						|
            "be removed in a future version. "
 | 
						|
            "A strict version of it is now the default, see "
 | 
						|
            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
 | 
						|
            "You can safely remove this argument.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
 | 
						|
    if delim_whitespace is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
 | 
						|
            "will be removed in a future version. Use ``sep='\\s+'`` instead",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        delim_whitespace = False
 | 
						|
 | 
						|
    if verbose is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'verbose' keyword in pd.read_csv is deprecated and "
 | 
						|
            "will be removed in a future version.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        verbose = False
 | 
						|
 | 
						|
    # locals() should never be modified
 | 
						|
    kwds = locals().copy()
 | 
						|
    del kwds["filepath_or_buffer"]
 | 
						|
    del kwds["sep"]
 | 
						|
 | 
						|
    kwds_defaults = _refine_defaults_read(
 | 
						|
        dialect,
 | 
						|
        delimiter,
 | 
						|
        delim_whitespace,
 | 
						|
        engine,
 | 
						|
        sep,
 | 
						|
        on_bad_lines,
 | 
						|
        names,
 | 
						|
        defaults={"delimiter": ","},
 | 
						|
        dtype_backend=dtype_backend,
 | 
						|
    )
 | 
						|
    kwds.update(kwds_defaults)
 | 
						|
 | 
						|
    return _read(filepath_or_buffer, kwds)
 | 
						|
 | 
						|
 | 
						|
# iterator=True -> TextFileReader
 | 
						|
@overload
 | 
						|
def read_table(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: Literal[True],
 | 
						|
    chunksize: int | None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: str | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# chunksize=int -> TextFileReader
 | 
						|
@overload
 | 
						|
def read_table(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    chunksize: int,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: str | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# default -> DataFrame
 | 
						|
@overload
 | 
						|
def read_table(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: Literal[False] = ...,
 | 
						|
    chunksize: None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: str | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> DataFrame:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
# Unions -> DataFrame | TextFileReader
 | 
						|
@overload
 | 
						|
def read_table(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = ...,
 | 
						|
    delimiter: str | None | lib.NoDefault = ...,
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = ...,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = ...,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = ...,
 | 
						|
    usecols: UsecolsArgType = ...,
 | 
						|
    dtype: DtypeArg | None = ...,
 | 
						|
    engine: CSVEngine | None = ...,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = ...,
 | 
						|
    true_values: list | None = ...,
 | 
						|
    false_values: list | None = ...,
 | 
						|
    skipinitialspace: bool = ...,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = ...,
 | 
						|
    skipfooter: int = ...,
 | 
						|
    nrows: int | None = ...,
 | 
						|
    na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = ...,
 | 
						|
    keep_default_na: bool = ...,
 | 
						|
    na_filter: bool = ...,
 | 
						|
    verbose: bool | lib.NoDefault = ...,
 | 
						|
    skip_blank_lines: bool = ...,
 | 
						|
    parse_dates: bool | Sequence[Hashable] = ...,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = ...,
 | 
						|
    keep_date_col: bool | lib.NoDefault = ...,
 | 
						|
    date_parser: Callable | lib.NoDefault = ...,
 | 
						|
    date_format: str | dict[Hashable, str] | None = ...,
 | 
						|
    dayfirst: bool = ...,
 | 
						|
    cache_dates: bool = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    chunksize: int | None = ...,
 | 
						|
    compression: CompressionOptions = ...,
 | 
						|
    thousands: str | None = ...,
 | 
						|
    decimal: str = ...,
 | 
						|
    lineterminator: str | None = ...,
 | 
						|
    quotechar: str = ...,
 | 
						|
    quoting: int = ...,
 | 
						|
    doublequote: bool = ...,
 | 
						|
    escapechar: str | None = ...,
 | 
						|
    comment: str | None = ...,
 | 
						|
    encoding: str | None = ...,
 | 
						|
    encoding_errors: str | None = ...,
 | 
						|
    dialect: str | csv.Dialect | None = ...,
 | 
						|
    on_bad_lines=...,
 | 
						|
    delim_whitespace: bool = ...,
 | 
						|
    low_memory: bool = ...,
 | 
						|
    memory_map: bool = ...,
 | 
						|
    float_precision: str | None = ...,
 | 
						|
    storage_options: StorageOptions = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@Appender(
 | 
						|
    _doc_read_csv_and_table.format(
 | 
						|
        func_name="read_table",
 | 
						|
        summary="Read general delimited file into DataFrame.",
 | 
						|
        see_also_func_name="read_csv",
 | 
						|
        see_also_func_summary=(
 | 
						|
            "Read a comma-separated values (csv) file into DataFrame."
 | 
						|
        ),
 | 
						|
        _default_sep=r"'\\t' (tab-stop)",
 | 
						|
        storage_options=_shared_docs["storage_options"],
 | 
						|
        decompression_options=_shared_docs["decompression_options"]
 | 
						|
        % "filepath_or_buffer",
 | 
						|
    )
 | 
						|
)
 | 
						|
def read_table(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    sep: str | None | lib.NoDefault = lib.no_default,
 | 
						|
    delimiter: str | None | lib.NoDefault = None,
 | 
						|
    # Column and Index Locations and Names
 | 
						|
    header: int | Sequence[int] | None | Literal["infer"] = "infer",
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault = lib.no_default,
 | 
						|
    index_col: IndexLabel | Literal[False] | None = None,
 | 
						|
    usecols: UsecolsArgType = None,
 | 
						|
    # General Parsing Configuration
 | 
						|
    dtype: DtypeArg | None = None,
 | 
						|
    engine: CSVEngine | None = None,
 | 
						|
    converters: Mapping[Hashable, Callable] | None = None,
 | 
						|
    true_values: list | None = None,
 | 
						|
    false_values: list | None = None,
 | 
						|
    skipinitialspace: bool = False,
 | 
						|
    skiprows: list[int] | int | Callable[[Hashable], bool] | None = None,
 | 
						|
    skipfooter: int = 0,
 | 
						|
    nrows: int | None = None,
 | 
						|
    # NA and Missing Data Handling
 | 
						|
    na_values: Sequence[str] | Mapping[str, Sequence[str]] | None = None,
 | 
						|
    keep_default_na: bool = True,
 | 
						|
    na_filter: bool = True,
 | 
						|
    verbose: bool | lib.NoDefault = lib.no_default,
 | 
						|
    skip_blank_lines: bool = True,
 | 
						|
    # Datetime Handling
 | 
						|
    parse_dates: bool | Sequence[Hashable] = False,
 | 
						|
    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
 | 
						|
    keep_date_col: bool | lib.NoDefault = lib.no_default,
 | 
						|
    date_parser: Callable | lib.NoDefault = lib.no_default,
 | 
						|
    date_format: str | dict[Hashable, str] | None = None,
 | 
						|
    dayfirst: bool = False,
 | 
						|
    cache_dates: bool = True,
 | 
						|
    # Iteration
 | 
						|
    iterator: bool = False,
 | 
						|
    chunksize: int | None = None,
 | 
						|
    # Quoting, Compression, and File Format
 | 
						|
    compression: CompressionOptions = "infer",
 | 
						|
    thousands: str | None = None,
 | 
						|
    decimal: str = ".",
 | 
						|
    lineterminator: str | None = None,
 | 
						|
    quotechar: str = '"',
 | 
						|
    quoting: int = csv.QUOTE_MINIMAL,
 | 
						|
    doublequote: bool = True,
 | 
						|
    escapechar: str | None = None,
 | 
						|
    comment: str | None = None,
 | 
						|
    encoding: str | None = None,
 | 
						|
    encoding_errors: str | None = "strict",
 | 
						|
    dialect: str | csv.Dialect | None = None,
 | 
						|
    # Error Handling
 | 
						|
    on_bad_lines: str = "error",
 | 
						|
    # Internal
 | 
						|
    delim_whitespace: bool | lib.NoDefault = lib.no_default,
 | 
						|
    low_memory: bool = _c_parser_defaults["low_memory"],
 | 
						|
    memory_map: bool = False,
 | 
						|
    float_precision: str | None = None,
 | 
						|
    storage_options: StorageOptions | None = None,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    if keep_date_col is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'keep_date_col' keyword in pd.read_table is deprecated and "
 | 
						|
            "will be removed in a future version. Explicitly remove unwanted "
 | 
						|
            "columns after parsing instead.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        keep_date_col = False
 | 
						|
 | 
						|
    # error: Item "bool" of "bool | Sequence[Hashable]" has no attribute "__iter__"
 | 
						|
    if lib.is_list_like(parse_dates) and not all(is_hashable(x) for x in parse_dates):  # type: ignore[union-attr]
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "Support for nested sequences for 'parse_dates' in pd.read_table "
 | 
						|
            "is deprecated. Combine the desired columns with pd.to_datetime "
 | 
						|
            "after parsing instead.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
 | 
						|
    if infer_datetime_format is not lib.no_default:
 | 
						|
        warnings.warn(
 | 
						|
            "The argument 'infer_datetime_format' is deprecated and will "
 | 
						|
            "be removed in a future version. "
 | 
						|
            "A strict version of it is now the default, see "
 | 
						|
            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
 | 
						|
            "You can safely remove this argument.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
 | 
						|
    if delim_whitespace is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'delim_whitespace' keyword in pd.read_table is deprecated and "
 | 
						|
            "will be removed in a future version. Use ``sep='\\s+'`` instead",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        delim_whitespace = False
 | 
						|
 | 
						|
    if verbose is not lib.no_default:
 | 
						|
        # GH#55569
 | 
						|
        warnings.warn(
 | 
						|
            "The 'verbose' keyword in pd.read_table is deprecated and "
 | 
						|
            "will be removed in a future version.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        verbose = False
 | 
						|
 | 
						|
    # locals() should never be modified
 | 
						|
    kwds = locals().copy()
 | 
						|
    del kwds["filepath_or_buffer"]
 | 
						|
    del kwds["sep"]
 | 
						|
 | 
						|
    kwds_defaults = _refine_defaults_read(
 | 
						|
        dialect,
 | 
						|
        delimiter,
 | 
						|
        delim_whitespace,
 | 
						|
        engine,
 | 
						|
        sep,
 | 
						|
        on_bad_lines,
 | 
						|
        names,
 | 
						|
        defaults={"delimiter": "\t"},
 | 
						|
        dtype_backend=dtype_backend,
 | 
						|
    )
 | 
						|
    kwds.update(kwds_defaults)
 | 
						|
 | 
						|
    return _read(filepath_or_buffer, kwds)
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_fwf(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    colspecs: Sequence[tuple[int, int]] | str | None = ...,
 | 
						|
    widths: Sequence[int] | None = ...,
 | 
						|
    infer_nrows: int = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
    iterator: Literal[True],
 | 
						|
    chunksize: int | None = ...,
 | 
						|
    **kwds,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_fwf(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    colspecs: Sequence[tuple[int, int]] | str | None = ...,
 | 
						|
    widths: Sequence[int] | None = ...,
 | 
						|
    infer_nrows: int = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
    iterator: bool = ...,
 | 
						|
    chunksize: int,
 | 
						|
    **kwds,
 | 
						|
) -> TextFileReader:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def read_fwf(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    colspecs: Sequence[tuple[int, int]] | str | None = ...,
 | 
						|
    widths: Sequence[int] | None = ...,
 | 
						|
    infer_nrows: int = ...,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = ...,
 | 
						|
    iterator: Literal[False] = ...,
 | 
						|
    chunksize: None = ...,
 | 
						|
    **kwds,
 | 
						|
) -> DataFrame:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
def read_fwf(
 | 
						|
    filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
 | 
						|
    *,
 | 
						|
    colspecs: Sequence[tuple[int, int]] | str | None = "infer",
 | 
						|
    widths: Sequence[int] | None = None,
 | 
						|
    infer_nrows: int = 100,
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 | 
						|
    iterator: bool = False,
 | 
						|
    chunksize: int | None = None,
 | 
						|
    **kwds,
 | 
						|
) -> DataFrame | TextFileReader:
 | 
						|
    r"""
 | 
						|
    Read a table of fixed-width formatted lines into DataFrame.
 | 
						|
 | 
						|
    Also supports optionally iterating or breaking of the file
 | 
						|
    into chunks.
 | 
						|
 | 
						|
    Additional help can be found in the `online docs for IO Tools
 | 
						|
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    filepath_or_buffer : str, path object, or file-like object
 | 
						|
        String, path object (implementing ``os.PathLike[str]``), or file-like
 | 
						|
        object implementing a text ``read()`` function.The string could be a URL.
 | 
						|
        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
 | 
						|
        expected. A local file could be:
 | 
						|
        ``file://localhost/path/to/table.csv``.
 | 
						|
    colspecs : list of tuple (int, int) or 'infer'. optional
 | 
						|
        A list of tuples giving the extents of the fixed-width
 | 
						|
        fields of each line as half-open intervals (i.e.,  [from, to[ ).
 | 
						|
        String value 'infer' can be used to instruct the parser to try
 | 
						|
        detecting the column specifications from the first 100 rows of
 | 
						|
        the data which are not being skipped via skiprows (default='infer').
 | 
						|
    widths : list of int, optional
 | 
						|
        A list of field widths which can be used instead of 'colspecs' if
 | 
						|
        the intervals are contiguous.
 | 
						|
    infer_nrows : int, default 100
 | 
						|
        The number of rows to consider when letting the parser determine the
 | 
						|
        `colspecs`.
 | 
						|
    dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
 | 
						|
        Back-end data type applied to the resultant :class:`DataFrame`
 | 
						|
        (still experimental). Behaviour is as follows:
 | 
						|
 | 
						|
        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
 | 
						|
          (default).
 | 
						|
        * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
 | 
						|
          DataFrame.
 | 
						|
 | 
						|
        .. versionadded:: 2.0
 | 
						|
 | 
						|
    **kwds : optional
 | 
						|
        Optional keyword arguments can be passed to ``TextFileReader``.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    DataFrame or TextFileReader
 | 
						|
        A comma-separated values (csv) file is returned as two-dimensional
 | 
						|
        data structure with labeled axes.
 | 
						|
 | 
						|
    See Also
 | 
						|
    --------
 | 
						|
    DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
 | 
						|
    read_csv : Read a comma-separated values (csv) file into DataFrame.
 | 
						|
 | 
						|
    Examples
 | 
						|
    --------
 | 
						|
    >>> pd.read_fwf('data.csv')  # doctest: +SKIP
 | 
						|
    """
 | 
						|
    # Check input arguments.
 | 
						|
    if colspecs is None and widths is None:
 | 
						|
        raise ValueError("Must specify either colspecs or widths")
 | 
						|
    if colspecs not in (None, "infer") and widths is not None:
 | 
						|
        raise ValueError("You must specify only one of 'widths' and 'colspecs'")
 | 
						|
 | 
						|
    # Compute 'colspecs' from 'widths', if specified.
 | 
						|
    if widths is not None:
 | 
						|
        colspecs, col = [], 0
 | 
						|
        for w in widths:
 | 
						|
            colspecs.append((col, col + w))
 | 
						|
            col += w
 | 
						|
 | 
						|
    # for mypy
 | 
						|
    assert colspecs is not None
 | 
						|
 | 
						|
    # GH#40830
 | 
						|
    # Ensure length of `colspecs` matches length of `names`
 | 
						|
    names = kwds.get("names")
 | 
						|
    if names is not None:
 | 
						|
        if len(names) != len(colspecs) and colspecs != "infer":
 | 
						|
            # need to check len(index_col) as it might contain
 | 
						|
            # unnamed indices, in which case it's name is not required
 | 
						|
            len_index = 0
 | 
						|
            if kwds.get("index_col") is not None:
 | 
						|
                index_col: Any = kwds.get("index_col")
 | 
						|
                if index_col is not False:
 | 
						|
                    if not is_list_like(index_col):
 | 
						|
                        len_index = 1
 | 
						|
                    else:
 | 
						|
                        len_index = len(index_col)
 | 
						|
            if kwds.get("usecols") is None and len(names) + len_index != len(colspecs):
 | 
						|
                # If usecols is used colspec may be longer than names
 | 
						|
                raise ValueError("Length of colspecs must match length of names")
 | 
						|
 | 
						|
    kwds["colspecs"] = colspecs
 | 
						|
    kwds["infer_nrows"] = infer_nrows
 | 
						|
    kwds["engine"] = "python-fwf"
 | 
						|
    kwds["iterator"] = iterator
 | 
						|
    kwds["chunksize"] = chunksize
 | 
						|
 | 
						|
    check_dtype_backend(dtype_backend)
 | 
						|
    kwds["dtype_backend"] = dtype_backend
 | 
						|
    return _read(filepath_or_buffer, kwds)
 | 
						|
 | 
						|
 | 
						|
class TextFileReader(abc.Iterator):
 | 
						|
    """
 | 
						|
 | 
						|
    Passed dialect overrides any of the related parser options
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list,
 | 
						|
        engine: CSVEngine | None = None,
 | 
						|
        **kwds,
 | 
						|
    ) -> None:
 | 
						|
        if engine is not None:
 | 
						|
            engine_specified = True
 | 
						|
        else:
 | 
						|
            engine = "python"
 | 
						|
            engine_specified = False
 | 
						|
        self.engine = engine
 | 
						|
        self._engine_specified = kwds.get("engine_specified", engine_specified)
 | 
						|
 | 
						|
        _validate_skipfooter(kwds)
 | 
						|
 | 
						|
        dialect = _extract_dialect(kwds)
 | 
						|
        if dialect is not None:
 | 
						|
            if engine == "pyarrow":
 | 
						|
                raise ValueError(
 | 
						|
                    "The 'dialect' option is not supported with the 'pyarrow' engine"
 | 
						|
                )
 | 
						|
            kwds = _merge_with_dialect_properties(dialect, kwds)
 | 
						|
 | 
						|
        if kwds.get("header", "infer") == "infer":
 | 
						|
            kwds["header"] = 0 if kwds.get("names") is None else None
 | 
						|
 | 
						|
        self.orig_options = kwds
 | 
						|
 | 
						|
        # miscellanea
 | 
						|
        self._currow = 0
 | 
						|
 | 
						|
        options = self._get_options_with_defaults(engine)
 | 
						|
        options["storage_options"] = kwds.get("storage_options", None)
 | 
						|
 | 
						|
        self.chunksize = options.pop("chunksize", None)
 | 
						|
        self.nrows = options.pop("nrows", None)
 | 
						|
 | 
						|
        self._check_file_or_buffer(f, engine)
 | 
						|
        self.options, self.engine = self._clean_options(options, engine)
 | 
						|
 | 
						|
        if "has_index_names" in kwds:
 | 
						|
            self.options["has_index_names"] = kwds["has_index_names"]
 | 
						|
 | 
						|
        self.handles: IOHandles | None = None
 | 
						|
        self._engine = self._make_engine(f, self.engine)
 | 
						|
 | 
						|
    def close(self) -> None:
 | 
						|
        if self.handles is not None:
 | 
						|
            self.handles.close()
 | 
						|
        self._engine.close()
 | 
						|
 | 
						|
    def _get_options_with_defaults(self, engine: CSVEngine) -> dict[str, Any]:
 | 
						|
        kwds = self.orig_options
 | 
						|
 | 
						|
        options = {}
 | 
						|
        default: object | None
 | 
						|
 | 
						|
        for argname, default in parser_defaults.items():
 | 
						|
            value = kwds.get(argname, default)
 | 
						|
 | 
						|
            # see gh-12935
 | 
						|
            if (
 | 
						|
                engine == "pyarrow"
 | 
						|
                and argname in _pyarrow_unsupported
 | 
						|
                and value != default
 | 
						|
                and value != getattr(value, "value", default)
 | 
						|
            ):
 | 
						|
                raise ValueError(
 | 
						|
                    f"The {repr(argname)} option is not supported with the "
 | 
						|
                    f"'pyarrow' engine"
 | 
						|
                )
 | 
						|
            options[argname] = value
 | 
						|
 | 
						|
        for argname, default in _c_parser_defaults.items():
 | 
						|
            if argname in kwds:
 | 
						|
                value = kwds[argname]
 | 
						|
 | 
						|
                if engine != "c" and value != default:
 | 
						|
                    # TODO: Refactor this logic, its pretty convoluted
 | 
						|
                    if "python" in engine and argname not in _python_unsupported:
 | 
						|
                        pass
 | 
						|
                    elif "pyarrow" in engine and argname not in _pyarrow_unsupported:
 | 
						|
                        pass
 | 
						|
                    else:
 | 
						|
                        raise ValueError(
 | 
						|
                            f"The {repr(argname)} option is not supported with the "
 | 
						|
                            f"{repr(engine)} engine"
 | 
						|
                        )
 | 
						|
            else:
 | 
						|
                value = default
 | 
						|
            options[argname] = value
 | 
						|
 | 
						|
        if engine == "python-fwf":
 | 
						|
            for argname, default in _fwf_defaults.items():
 | 
						|
                options[argname] = kwds.get(argname, default)
 | 
						|
 | 
						|
        return options
 | 
						|
 | 
						|
    def _check_file_or_buffer(self, f, engine: CSVEngine) -> None:
 | 
						|
        # see gh-16530
 | 
						|
        if is_file_like(f) and engine != "c" and not hasattr(f, "__iter__"):
 | 
						|
            # The C engine doesn't need the file-like to have the "__iter__"
 | 
						|
            # attribute. However, the Python engine needs "__iter__(...)"
 | 
						|
            # when iterating through such an object, meaning it
 | 
						|
            # needs to have that attribute
 | 
						|
            raise ValueError(
 | 
						|
                "The 'python' engine cannot iterate through this file buffer."
 | 
						|
            )
 | 
						|
 | 
						|
    def _clean_options(
 | 
						|
        self, options: dict[str, Any], engine: CSVEngine
 | 
						|
    ) -> tuple[dict[str, Any], CSVEngine]:
 | 
						|
        result = options.copy()
 | 
						|
 | 
						|
        fallback_reason = None
 | 
						|
 | 
						|
        # C engine not supported yet
 | 
						|
        if engine == "c":
 | 
						|
            if options["skipfooter"] > 0:
 | 
						|
                fallback_reason = "the 'c' engine does not support skipfooter"
 | 
						|
                engine = "python"
 | 
						|
 | 
						|
        sep = options["delimiter"]
 | 
						|
        delim_whitespace = options["delim_whitespace"]
 | 
						|
 | 
						|
        if sep is None and not delim_whitespace:
 | 
						|
            if engine in ("c", "pyarrow"):
 | 
						|
                fallback_reason = (
 | 
						|
                    f"the '{engine}' engine does not support "
 | 
						|
                    "sep=None with delim_whitespace=False"
 | 
						|
                )
 | 
						|
                engine = "python"
 | 
						|
        elif sep is not None and len(sep) > 1:
 | 
						|
            if engine == "c" and sep == r"\s+":
 | 
						|
                result["delim_whitespace"] = True
 | 
						|
                del result["delimiter"]
 | 
						|
            elif engine not in ("python", "python-fwf"):
 | 
						|
                # wait until regex engine integrated
 | 
						|
                fallback_reason = (
 | 
						|
                    f"the '{engine}' engine does not support "
 | 
						|
                    "regex separators (separators > 1 char and "
 | 
						|
                    r"different from '\s+' are interpreted as regex)"
 | 
						|
                )
 | 
						|
                engine = "python"
 | 
						|
        elif delim_whitespace:
 | 
						|
            if "python" in engine:
 | 
						|
                result["delimiter"] = r"\s+"
 | 
						|
        elif sep is not None:
 | 
						|
            encodeable = True
 | 
						|
            encoding = sys.getfilesystemencoding() or "utf-8"
 | 
						|
            try:
 | 
						|
                if len(sep.encode(encoding)) > 1:
 | 
						|
                    encodeable = False
 | 
						|
            except UnicodeDecodeError:
 | 
						|
                encodeable = False
 | 
						|
            if not encodeable and engine not in ("python", "python-fwf"):
 | 
						|
                fallback_reason = (
 | 
						|
                    f"the separator encoded in {encoding} "
 | 
						|
                    f"is > 1 char long, and the '{engine}' engine "
 | 
						|
                    "does not support such separators"
 | 
						|
                )
 | 
						|
                engine = "python"
 | 
						|
 | 
						|
        quotechar = options["quotechar"]
 | 
						|
        if quotechar is not None and isinstance(quotechar, (str, bytes)):
 | 
						|
            if (
 | 
						|
                len(quotechar) == 1
 | 
						|
                and ord(quotechar) > 127
 | 
						|
                and engine not in ("python", "python-fwf")
 | 
						|
            ):
 | 
						|
                fallback_reason = (
 | 
						|
                    "ord(quotechar) > 127, meaning the "
 | 
						|
                    "quotechar is larger than one byte, "
 | 
						|
                    f"and the '{engine}' engine does not support such quotechars"
 | 
						|
                )
 | 
						|
                engine = "python"
 | 
						|
 | 
						|
        if fallback_reason and self._engine_specified:
 | 
						|
            raise ValueError(fallback_reason)
 | 
						|
 | 
						|
        if engine == "c":
 | 
						|
            for arg in _c_unsupported:
 | 
						|
                del result[arg]
 | 
						|
 | 
						|
        if "python" in engine:
 | 
						|
            for arg in _python_unsupported:
 | 
						|
                if fallback_reason and result[arg] != _c_parser_defaults.get(arg):
 | 
						|
                    raise ValueError(
 | 
						|
                        "Falling back to the 'python' engine because "
 | 
						|
                        f"{fallback_reason}, but this causes {repr(arg)} to be "
 | 
						|
                        "ignored as it is not supported by the 'python' engine."
 | 
						|
                    )
 | 
						|
                del result[arg]
 | 
						|
 | 
						|
        if fallback_reason:
 | 
						|
            warnings.warn(
 | 
						|
                (
 | 
						|
                    "Falling back to the 'python' engine because "
 | 
						|
                    f"{fallback_reason}; you can avoid this warning by specifying "
 | 
						|
                    "engine='python'."
 | 
						|
                ),
 | 
						|
                ParserWarning,
 | 
						|
                stacklevel=find_stack_level(),
 | 
						|
            )
 | 
						|
 | 
						|
        index_col = options["index_col"]
 | 
						|
        names = options["names"]
 | 
						|
        converters = options["converters"]
 | 
						|
        na_values = options["na_values"]
 | 
						|
        skiprows = options["skiprows"]
 | 
						|
 | 
						|
        validate_header_arg(options["header"])
 | 
						|
 | 
						|
        if index_col is True:
 | 
						|
            raise ValueError("The value of index_col couldn't be 'True'")
 | 
						|
        if is_index_col(index_col):
 | 
						|
            if not isinstance(index_col, (list, tuple, np.ndarray)):
 | 
						|
                index_col = [index_col]
 | 
						|
        result["index_col"] = index_col
 | 
						|
 | 
						|
        names = list(names) if names is not None else names
 | 
						|
 | 
						|
        # type conversion-related
 | 
						|
        if converters is not None:
 | 
						|
            if not isinstance(converters, dict):
 | 
						|
                raise TypeError(
 | 
						|
                    "Type converters must be a dict or subclass, "
 | 
						|
                    f"input was a {type(converters).__name__}"
 | 
						|
                )
 | 
						|
        else:
 | 
						|
            converters = {}
 | 
						|
 | 
						|
        # Converting values to NA
 | 
						|
        keep_default_na = options["keep_default_na"]
 | 
						|
        floatify = engine != "pyarrow"
 | 
						|
        na_values, na_fvalues = _clean_na_values(
 | 
						|
            na_values, keep_default_na, floatify=floatify
 | 
						|
        )
 | 
						|
 | 
						|
        # handle skiprows; this is internally handled by the
 | 
						|
        # c-engine, so only need for python and pyarrow parsers
 | 
						|
        if engine == "pyarrow":
 | 
						|
            if not is_integer(skiprows) and skiprows is not None:
 | 
						|
                # pyarrow expects skiprows to be passed as an integer
 | 
						|
                raise ValueError(
 | 
						|
                    "skiprows argument must be an integer when using "
 | 
						|
                    "engine='pyarrow'"
 | 
						|
                )
 | 
						|
        else:
 | 
						|
            if is_integer(skiprows):
 | 
						|
                skiprows = list(range(skiprows))
 | 
						|
            if skiprows is None:
 | 
						|
                skiprows = set()
 | 
						|
            elif not callable(skiprows):
 | 
						|
                skiprows = set(skiprows)
 | 
						|
 | 
						|
        # put stuff back
 | 
						|
        result["names"] = names
 | 
						|
        result["converters"] = converters
 | 
						|
        result["na_values"] = na_values
 | 
						|
        result["na_fvalues"] = na_fvalues
 | 
						|
        result["skiprows"] = skiprows
 | 
						|
 | 
						|
        return result, engine
 | 
						|
 | 
						|
    def __next__(self) -> DataFrame:
 | 
						|
        try:
 | 
						|
            return self.get_chunk()
 | 
						|
        except StopIteration:
 | 
						|
            self.close()
 | 
						|
            raise
 | 
						|
 | 
						|
    def _make_engine(
 | 
						|
        self,
 | 
						|
        f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list | IO,
 | 
						|
        engine: CSVEngine = "c",
 | 
						|
    ) -> ParserBase:
 | 
						|
        mapping: dict[str, type[ParserBase]] = {
 | 
						|
            "c": CParserWrapper,
 | 
						|
            "python": PythonParser,
 | 
						|
            "pyarrow": ArrowParserWrapper,
 | 
						|
            "python-fwf": FixedWidthFieldParser,
 | 
						|
        }
 | 
						|
        if engine not in mapping:
 | 
						|
            raise ValueError(
 | 
						|
                f"Unknown engine: {engine} (valid options are {mapping.keys()})"
 | 
						|
            )
 | 
						|
        if not isinstance(f, list):
 | 
						|
            # open file here
 | 
						|
            is_text = True
 | 
						|
            mode = "r"
 | 
						|
            if engine == "pyarrow":
 | 
						|
                is_text = False
 | 
						|
                mode = "rb"
 | 
						|
            elif (
 | 
						|
                engine == "c"
 | 
						|
                and self.options.get("encoding", "utf-8") == "utf-8"
 | 
						|
                and isinstance(stringify_path(f), str)
 | 
						|
            ):
 | 
						|
                # c engine can decode utf-8 bytes, adding TextIOWrapper makes
 | 
						|
                # the c-engine especially for memory_map=True far slower
 | 
						|
                is_text = False
 | 
						|
                if "b" not in mode:
 | 
						|
                    mode += "b"
 | 
						|
            self.handles = get_handle(
 | 
						|
                f,
 | 
						|
                mode,
 | 
						|
                encoding=self.options.get("encoding", None),
 | 
						|
                compression=self.options.get("compression", None),
 | 
						|
                memory_map=self.options.get("memory_map", False),
 | 
						|
                is_text=is_text,
 | 
						|
                errors=self.options.get("encoding_errors", "strict"),
 | 
						|
                storage_options=self.options.get("storage_options", None),
 | 
						|
            )
 | 
						|
            assert self.handles is not None
 | 
						|
            f = self.handles.handle
 | 
						|
 | 
						|
        elif engine != "python":
 | 
						|
            msg = f"Invalid file path or buffer object type: {type(f)}"
 | 
						|
            raise ValueError(msg)
 | 
						|
 | 
						|
        try:
 | 
						|
            return mapping[engine](f, **self.options)
 | 
						|
        except Exception:
 | 
						|
            if self.handles is not None:
 | 
						|
                self.handles.close()
 | 
						|
            raise
 | 
						|
 | 
						|
    def _failover_to_python(self) -> None:
 | 
						|
        raise AbstractMethodError(self)
 | 
						|
 | 
						|
    def read(self, nrows: int | None = None) -> DataFrame:
 | 
						|
        if self.engine == "pyarrow":
 | 
						|
            try:
 | 
						|
                # error: "ParserBase" has no attribute "read"
 | 
						|
                df = self._engine.read()  # type: ignore[attr-defined]
 | 
						|
            except Exception:
 | 
						|
                self.close()
 | 
						|
                raise
 | 
						|
        else:
 | 
						|
            nrows = validate_integer("nrows", nrows)
 | 
						|
            try:
 | 
						|
                # error: "ParserBase" has no attribute "read"
 | 
						|
                (
 | 
						|
                    index,
 | 
						|
                    columns,
 | 
						|
                    col_dict,
 | 
						|
                ) = self._engine.read(  # type: ignore[attr-defined]
 | 
						|
                    nrows
 | 
						|
                )
 | 
						|
            except Exception:
 | 
						|
                self.close()
 | 
						|
                raise
 | 
						|
 | 
						|
            if index is None:
 | 
						|
                if col_dict:
 | 
						|
                    # Any column is actually fine:
 | 
						|
                    new_rows = len(next(iter(col_dict.values())))
 | 
						|
                    index = RangeIndex(self._currow, self._currow + new_rows)
 | 
						|
                else:
 | 
						|
                    new_rows = 0
 | 
						|
            else:
 | 
						|
                new_rows = len(index)
 | 
						|
 | 
						|
            if hasattr(self, "orig_options"):
 | 
						|
                dtype_arg = self.orig_options.get("dtype", None)
 | 
						|
            else:
 | 
						|
                dtype_arg = None
 | 
						|
 | 
						|
            if isinstance(dtype_arg, dict):
 | 
						|
                dtype = defaultdict(lambda: None)  # type: ignore[var-annotated]
 | 
						|
                dtype.update(dtype_arg)
 | 
						|
            elif dtype_arg is not None and pandas_dtype(dtype_arg) in (
 | 
						|
                np.str_,
 | 
						|
                np.object_,
 | 
						|
            ):
 | 
						|
                dtype = defaultdict(lambda: dtype_arg)
 | 
						|
            else:
 | 
						|
                dtype = None
 | 
						|
 | 
						|
            if dtype is not None:
 | 
						|
                new_col_dict = {}
 | 
						|
                for k, v in col_dict.items():
 | 
						|
                    d = (
 | 
						|
                        dtype[k]
 | 
						|
                        if pandas_dtype(dtype[k]) in (np.str_, np.object_)
 | 
						|
                        else None
 | 
						|
                    )
 | 
						|
                    new_col_dict[k] = Series(v, index=index, dtype=d, copy=False)
 | 
						|
            else:
 | 
						|
                new_col_dict = col_dict
 | 
						|
 | 
						|
            df = DataFrame(
 | 
						|
                new_col_dict,
 | 
						|
                columns=columns,
 | 
						|
                index=index,
 | 
						|
                copy=not using_copy_on_write(),
 | 
						|
            )
 | 
						|
 | 
						|
            self._currow += new_rows
 | 
						|
        return df
 | 
						|
 | 
						|
    def get_chunk(self, size: int | None = None) -> DataFrame:
 | 
						|
        if size is None:
 | 
						|
            size = self.chunksize
 | 
						|
        if self.nrows is not None:
 | 
						|
            if self._currow >= self.nrows:
 | 
						|
                raise StopIteration
 | 
						|
            size = min(size, self.nrows - self._currow)
 | 
						|
        return self.read(nrows=size)
 | 
						|
 | 
						|
    def __enter__(self) -> Self:
 | 
						|
        return self
 | 
						|
 | 
						|
    def __exit__(
 | 
						|
        self,
 | 
						|
        exc_type: type[BaseException] | None,
 | 
						|
        exc_value: BaseException | None,
 | 
						|
        traceback: TracebackType | None,
 | 
						|
    ) -> None:
 | 
						|
        self.close()
 | 
						|
 | 
						|
 | 
						|
def TextParser(*args, **kwds) -> TextFileReader:
 | 
						|
    """
 | 
						|
    Converts lists of lists/tuples into DataFrames with proper type inference
 | 
						|
    and optional (e.g. string to datetime) conversion. Also enables iterating
 | 
						|
    lazily over chunks of large files
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    data : file-like object or list
 | 
						|
    delimiter : separator character to use
 | 
						|
    dialect : str or csv.Dialect instance, optional
 | 
						|
        Ignored if delimiter is longer than 1 character
 | 
						|
    names : sequence, default
 | 
						|
    header : int, default 0
 | 
						|
        Row to use to parse column labels. Defaults to the first row. Prior
 | 
						|
        rows will be discarded
 | 
						|
    index_col : int or list, optional
 | 
						|
        Column or columns to use as the (possibly hierarchical) index
 | 
						|
    has_index_names: bool, default False
 | 
						|
        True if the cols defined in index_col have an index name and are
 | 
						|
        not in the header.
 | 
						|
    na_values : scalar, str, list-like, or dict, optional
 | 
						|
        Additional strings to recognize as NA/NaN.
 | 
						|
    keep_default_na : bool, default True
 | 
						|
    thousands : str, optional
 | 
						|
        Thousands separator
 | 
						|
    comment : str, optional
 | 
						|
        Comment out remainder of line
 | 
						|
    parse_dates : bool, default False
 | 
						|
    keep_date_col : bool, default False
 | 
						|
    date_parser : function, optional
 | 
						|
 | 
						|
        .. deprecated:: 2.0.0
 | 
						|
    date_format : str or dict of column -> format, default ``None``
 | 
						|
 | 
						|
        .. versionadded:: 2.0.0
 | 
						|
    skiprows : list of integers
 | 
						|
        Row numbers to skip
 | 
						|
    skipfooter : int
 | 
						|
        Number of line at bottom of file to skip
 | 
						|
    converters : dict, optional
 | 
						|
        Dict of functions for converting values in certain columns. Keys can
 | 
						|
        either be integers or column labels, values are functions that take one
 | 
						|
        input argument, the cell (not column) content, and return the
 | 
						|
        transformed content.
 | 
						|
    encoding : str, optional
 | 
						|
        Encoding to use for UTF when reading/writing (ex. 'utf-8')
 | 
						|
    float_precision : str, optional
 | 
						|
        Specifies which converter the C engine should use for floating-point
 | 
						|
        values. The options are `None` or `high` for the ordinary converter,
 | 
						|
        `legacy` for the original lower precision pandas converter, and
 | 
						|
        `round_trip` for the round-trip converter.
 | 
						|
    """
 | 
						|
    kwds["engine"] = "python"
 | 
						|
    return TextFileReader(*args, **kwds)
 | 
						|
 | 
						|
 | 
						|
def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = True):
 | 
						|
    na_fvalues: set | dict
 | 
						|
    if na_values is None:
 | 
						|
        if keep_default_na:
 | 
						|
            na_values = STR_NA_VALUES
 | 
						|
        else:
 | 
						|
            na_values = set()
 | 
						|
        na_fvalues = set()
 | 
						|
    elif isinstance(na_values, dict):
 | 
						|
        old_na_values = na_values.copy()
 | 
						|
        na_values = {}  # Prevent aliasing.
 | 
						|
 | 
						|
        # Convert the values in the na_values dictionary
 | 
						|
        # into array-likes for further use. This is also
 | 
						|
        # where we append the default NaN values, provided
 | 
						|
        # that `keep_default_na=True`.
 | 
						|
        for k, v in old_na_values.items():
 | 
						|
            if not is_list_like(v):
 | 
						|
                v = [v]
 | 
						|
 | 
						|
            if keep_default_na:
 | 
						|
                v = set(v) | STR_NA_VALUES
 | 
						|
 | 
						|
            na_values[k] = v
 | 
						|
        na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
 | 
						|
    else:
 | 
						|
        if not is_list_like(na_values):
 | 
						|
            na_values = [na_values]
 | 
						|
        na_values = _stringify_na_values(na_values, floatify)
 | 
						|
        if keep_default_na:
 | 
						|
            na_values = na_values | STR_NA_VALUES
 | 
						|
 | 
						|
        na_fvalues = _floatify_na_values(na_values)
 | 
						|
 | 
						|
    return na_values, na_fvalues
 | 
						|
 | 
						|
 | 
						|
def _floatify_na_values(na_values):
 | 
						|
    # create float versions of the na_values
 | 
						|
    result = set()
 | 
						|
    for v in na_values:
 | 
						|
        try:
 | 
						|
            v = float(v)
 | 
						|
            if not np.isnan(v):
 | 
						|
                result.add(v)
 | 
						|
        except (TypeError, ValueError, OverflowError):
 | 
						|
            pass
 | 
						|
    return result
 | 
						|
 | 
						|
 | 
						|
def _stringify_na_values(na_values, floatify: bool):
 | 
						|
    """return a stringified and numeric for these values"""
 | 
						|
    result: list[str | float] = []
 | 
						|
    for x in na_values:
 | 
						|
        result.append(str(x))
 | 
						|
        result.append(x)
 | 
						|
        try:
 | 
						|
            v = float(x)
 | 
						|
 | 
						|
            # we are like 999 here
 | 
						|
            if v == int(v):
 | 
						|
                v = int(v)
 | 
						|
                result.append(f"{v}.0")
 | 
						|
                result.append(str(v))
 | 
						|
 | 
						|
            if floatify:
 | 
						|
                result.append(v)
 | 
						|
        except (TypeError, ValueError, OverflowError):
 | 
						|
            pass
 | 
						|
        if floatify:
 | 
						|
            try:
 | 
						|
                result.append(int(x))
 | 
						|
            except (TypeError, ValueError, OverflowError):
 | 
						|
                pass
 | 
						|
    return set(result)
 | 
						|
 | 
						|
 | 
						|
def _refine_defaults_read(
 | 
						|
    dialect: str | csv.Dialect | None,
 | 
						|
    delimiter: str | None | lib.NoDefault,
 | 
						|
    delim_whitespace: bool,
 | 
						|
    engine: CSVEngine | None,
 | 
						|
    sep: str | None | lib.NoDefault,
 | 
						|
    on_bad_lines: str | Callable,
 | 
						|
    names: Sequence[Hashable] | None | lib.NoDefault,
 | 
						|
    defaults: dict[str, Any],
 | 
						|
    dtype_backend: DtypeBackend | lib.NoDefault,
 | 
						|
):
 | 
						|
    """Validate/refine default values of input parameters of read_csv, read_table.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    dialect : str or csv.Dialect
 | 
						|
        If provided, this parameter will override values (default or not) for the
 | 
						|
        following parameters: `delimiter`, `doublequote`, `escapechar`,
 | 
						|
        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
 | 
						|
        override values, a ParserWarning will be issued. See csv.Dialect
 | 
						|
        documentation for more details.
 | 
						|
    delimiter : str or object
 | 
						|
        Alias for sep.
 | 
						|
    delim_whitespace : bool
 | 
						|
        Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
 | 
						|
        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
 | 
						|
        is set to True, nothing should be passed in for the ``delimiter``
 | 
						|
        parameter.
 | 
						|
 | 
						|
        .. deprecated:: 2.2.0
 | 
						|
            Use ``sep="\\s+"`` instead.
 | 
						|
    engine : {{'c', 'python'}}
 | 
						|
        Parser engine to use. The C engine is faster while the python engine is
 | 
						|
        currently more feature-complete.
 | 
						|
    sep : str or object
 | 
						|
        A delimiter provided by the user (str) or a sentinel value, i.e.
 | 
						|
        pandas._libs.lib.no_default.
 | 
						|
    on_bad_lines : str, callable
 | 
						|
        An option for handling bad lines or a sentinel value(None).
 | 
						|
    names : array-like, optional
 | 
						|
        List of column names to use. If the file contains a header row,
 | 
						|
        then you should explicitly pass ``header=0`` to override the column names.
 | 
						|
        Duplicates in this list are not allowed.
 | 
						|
    defaults: dict
 | 
						|
        Default values of input parameters.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    kwds : dict
 | 
						|
        Input parameters with correct values.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError :
 | 
						|
        If a delimiter was specified with ``sep`` (or ``delimiter``) and
 | 
						|
        ``delim_whitespace=True``.
 | 
						|
    """
 | 
						|
    # fix types for sep, delimiter to Union(str, Any)
 | 
						|
    delim_default = defaults["delimiter"]
 | 
						|
    kwds: dict[str, Any] = {}
 | 
						|
    # gh-23761
 | 
						|
    #
 | 
						|
    # When a dialect is passed, it overrides any of the overlapping
 | 
						|
    # parameters passed in directly. We don't want to warn if the
 | 
						|
    # default parameters were passed in (since it probably means
 | 
						|
    # that the user didn't pass them in explicitly in the first place).
 | 
						|
    #
 | 
						|
    # "delimiter" is the annoying corner case because we alias it to
 | 
						|
    # "sep" before doing comparison to the dialect values later on.
 | 
						|
    # Thus, we need a flag to indicate that we need to "override"
 | 
						|
    # the comparison to dialect values by checking if default values
 | 
						|
    # for BOTH "delimiter" and "sep" were provided.
 | 
						|
    if dialect is not None:
 | 
						|
        kwds["sep_override"] = delimiter is None and (
 | 
						|
            sep is lib.no_default or sep == delim_default
 | 
						|
        )
 | 
						|
 | 
						|
    if delimiter and (sep is not lib.no_default):
 | 
						|
        raise ValueError("Specified a sep and a delimiter; you can only specify one.")
 | 
						|
 | 
						|
    kwds["names"] = None if names is lib.no_default else names
 | 
						|
 | 
						|
    # Alias sep -> delimiter.
 | 
						|
    if delimiter is None:
 | 
						|
        delimiter = sep
 | 
						|
 | 
						|
    if delim_whitespace and (delimiter is not lib.no_default):
 | 
						|
        raise ValueError(
 | 
						|
            "Specified a delimiter with both sep and "
 | 
						|
            "delim_whitespace=True; you can only specify one."
 | 
						|
        )
 | 
						|
 | 
						|
    if delimiter == "\n":
 | 
						|
        raise ValueError(
 | 
						|
            r"Specified \n as separator or delimiter. This forces the python engine "
 | 
						|
            "which does not accept a line terminator. Hence it is not allowed to use "
 | 
						|
            "the line terminator as separator.",
 | 
						|
        )
 | 
						|
 | 
						|
    if delimiter is lib.no_default:
 | 
						|
        # assign default separator value
 | 
						|
        kwds["delimiter"] = delim_default
 | 
						|
    else:
 | 
						|
        kwds["delimiter"] = delimiter
 | 
						|
 | 
						|
    if engine is not None:
 | 
						|
        kwds["engine_specified"] = True
 | 
						|
    else:
 | 
						|
        kwds["engine"] = "c"
 | 
						|
        kwds["engine_specified"] = False
 | 
						|
 | 
						|
    if on_bad_lines == "error":
 | 
						|
        kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.ERROR
 | 
						|
    elif on_bad_lines == "warn":
 | 
						|
        kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN
 | 
						|
    elif on_bad_lines == "skip":
 | 
						|
        kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP
 | 
						|
    elif callable(on_bad_lines):
 | 
						|
        if engine not in ["python", "pyarrow"]:
 | 
						|
            raise ValueError(
 | 
						|
                "on_bad_line can only be a callable function "
 | 
						|
                "if engine='python' or 'pyarrow'"
 | 
						|
            )
 | 
						|
        kwds["on_bad_lines"] = on_bad_lines
 | 
						|
    else:
 | 
						|
        raise ValueError(f"Argument {on_bad_lines} is invalid for on_bad_lines")
 | 
						|
 | 
						|
    check_dtype_backend(dtype_backend)
 | 
						|
 | 
						|
    kwds["dtype_backend"] = dtype_backend
 | 
						|
 | 
						|
    return kwds
 | 
						|
 | 
						|
 | 
						|
def _extract_dialect(kwds: dict[str, Any]) -> csv.Dialect | None:
 | 
						|
    """
 | 
						|
    Extract concrete csv dialect instance.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    csv.Dialect or None
 | 
						|
    """
 | 
						|
    if kwds.get("dialect") is None:
 | 
						|
        return None
 | 
						|
 | 
						|
    dialect = kwds["dialect"]
 | 
						|
    if dialect in csv.list_dialects():
 | 
						|
        dialect = csv.get_dialect(dialect)
 | 
						|
 | 
						|
    _validate_dialect(dialect)
 | 
						|
 | 
						|
    return dialect
 | 
						|
 | 
						|
 | 
						|
MANDATORY_DIALECT_ATTRS = (
 | 
						|
    "delimiter",
 | 
						|
    "doublequote",
 | 
						|
    "escapechar",
 | 
						|
    "skipinitialspace",
 | 
						|
    "quotechar",
 | 
						|
    "quoting",
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
def _validate_dialect(dialect: csv.Dialect) -> None:
 | 
						|
    """
 | 
						|
    Validate csv dialect instance.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError
 | 
						|
        If incorrect dialect is provided.
 | 
						|
    """
 | 
						|
    for param in MANDATORY_DIALECT_ATTRS:
 | 
						|
        if not hasattr(dialect, param):
 | 
						|
            raise ValueError(f"Invalid dialect {dialect} provided")
 | 
						|
 | 
						|
 | 
						|
def _merge_with_dialect_properties(
 | 
						|
    dialect: csv.Dialect,
 | 
						|
    defaults: dict[str, Any],
 | 
						|
) -> dict[str, Any]:
 | 
						|
    """
 | 
						|
    Merge default kwargs in TextFileReader with dialect parameters.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    dialect : csv.Dialect
 | 
						|
        Concrete csv dialect. See csv.Dialect documentation for more details.
 | 
						|
    defaults : dict
 | 
						|
        Keyword arguments passed to TextFileReader.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    kwds : dict
 | 
						|
        Updated keyword arguments, merged with dialect parameters.
 | 
						|
    """
 | 
						|
    kwds = defaults.copy()
 | 
						|
 | 
						|
    for param in MANDATORY_DIALECT_ATTRS:
 | 
						|
        dialect_val = getattr(dialect, param)
 | 
						|
 | 
						|
        parser_default = parser_defaults[param]
 | 
						|
        provided = kwds.get(param, parser_default)
 | 
						|
 | 
						|
        # Messages for conflicting values between the dialect
 | 
						|
        # instance and the actual parameters provided.
 | 
						|
        conflict_msgs = []
 | 
						|
 | 
						|
        # Don't warn if the default parameter was passed in,
 | 
						|
        # even if it conflicts with the dialect (gh-23761).
 | 
						|
        if provided not in (parser_default, dialect_val):
 | 
						|
            msg = (
 | 
						|
                f"Conflicting values for '{param}': '{provided}' was "
 | 
						|
                f"provided, but the dialect specifies '{dialect_val}'. "
 | 
						|
                "Using the dialect-specified value."
 | 
						|
            )
 | 
						|
 | 
						|
            # Annoying corner case for not warning about
 | 
						|
            # conflicts between dialect and delimiter parameter.
 | 
						|
            # Refer to the outer "_read_" function for more info.
 | 
						|
            if not (param == "delimiter" and kwds.pop("sep_override", False)):
 | 
						|
                conflict_msgs.append(msg)
 | 
						|
 | 
						|
        if conflict_msgs:
 | 
						|
            warnings.warn(
 | 
						|
                "\n\n".join(conflict_msgs), ParserWarning, stacklevel=find_stack_level()
 | 
						|
            )
 | 
						|
        kwds[param] = dialect_val
 | 
						|
    return kwds
 | 
						|
 | 
						|
 | 
						|
def _validate_skipfooter(kwds: dict[str, Any]) -> None:
 | 
						|
    """
 | 
						|
    Check whether skipfooter is compatible with other kwargs in TextFileReader.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    kwds : dict
 | 
						|
        Keyword arguments passed to TextFileReader.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError
 | 
						|
        If skipfooter is not compatible with other parameters.
 | 
						|
    """
 | 
						|
    if kwds.get("skipfooter"):
 | 
						|
        if kwds.get("iterator") or kwds.get("chunksize"):
 | 
						|
            raise ValueError("'skipfooter' not supported for iteration")
 | 
						|
        if kwds.get("nrows"):
 | 
						|
            raise ValueError("'skipfooter' not supported with 'nrows'")
 |