You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			457 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			457 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
Module that contains many useful utilities
 | 
						|
for validating data or function arguments
 | 
						|
"""
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from collections.abc import (
 | 
						|
    Iterable,
 | 
						|
    Sequence,
 | 
						|
)
 | 
						|
from typing import (
 | 
						|
    TypeVar,
 | 
						|
    overload,
 | 
						|
)
 | 
						|
 | 
						|
import numpy as np
 | 
						|
 | 
						|
from pandas._libs import lib
 | 
						|
 | 
						|
from pandas.core.dtypes.common import (
 | 
						|
    is_bool,
 | 
						|
    is_integer,
 | 
						|
)
 | 
						|
 | 
						|
BoolishT = TypeVar("BoolishT", bool, int)
 | 
						|
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
 | 
						|
 | 
						|
 | 
						|
def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
 | 
						|
    """
 | 
						|
    Checks whether 'args' has length of at most 'compat_args'. Raises
 | 
						|
    a TypeError if that is not the case, similar to in Python when a
 | 
						|
    function is called with too many arguments.
 | 
						|
    """
 | 
						|
    if max_fname_arg_count < 0:
 | 
						|
        raise ValueError("'max_fname_arg_count' must be non-negative")
 | 
						|
 | 
						|
    if len(args) > len(compat_args):
 | 
						|
        max_arg_count = len(compat_args) + max_fname_arg_count
 | 
						|
        actual_arg_count = len(args) + max_fname_arg_count
 | 
						|
        argument = "argument" if max_arg_count == 1 else "arguments"
 | 
						|
 | 
						|
        raise TypeError(
 | 
						|
            f"{fname}() takes at most {max_arg_count} {argument} "
 | 
						|
            f"({actual_arg_count} given)"
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
 | 
						|
    """
 | 
						|
    Check that the keys in `arg_val_dict` are mapped to their
 | 
						|
    default values as specified in `compat_args`.
 | 
						|
 | 
						|
    Note that this function is to be called only when it has been
 | 
						|
    checked that arg_val_dict.keys() is a subset of compat_args
 | 
						|
    """
 | 
						|
    for key in arg_val_dict:
 | 
						|
        # try checking equality directly with '=' operator,
 | 
						|
        # as comparison may have been overridden for the left
 | 
						|
        # hand object
 | 
						|
        try:
 | 
						|
            v1 = arg_val_dict[key]
 | 
						|
            v2 = compat_args[key]
 | 
						|
 | 
						|
            # check for None-ness otherwise we could end up
 | 
						|
            # comparing a numpy array vs None
 | 
						|
            if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
 | 
						|
                match = False
 | 
						|
            else:
 | 
						|
                match = v1 == v2
 | 
						|
 | 
						|
            if not is_bool(match):
 | 
						|
                raise ValueError("'match' is not a boolean")
 | 
						|
 | 
						|
        # could not compare them directly, so try comparison
 | 
						|
        # using the 'is' operator
 | 
						|
        except ValueError:
 | 
						|
            match = arg_val_dict[key] is compat_args[key]
 | 
						|
 | 
						|
        if not match:
 | 
						|
            raise ValueError(
 | 
						|
                f"the '{key}' parameter is not supported in "
 | 
						|
                f"the pandas implementation of {fname}()"
 | 
						|
            )
 | 
						|
 | 
						|
 | 
						|
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
 | 
						|
    """
 | 
						|
    Checks whether the length of the `*args` argument passed into a function
 | 
						|
    has at most `len(compat_args)` arguments and whether or not all of these
 | 
						|
    elements in `args` are set to their default values.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    fname : str
 | 
						|
        The name of the function being passed the `*args` parameter
 | 
						|
    args : tuple
 | 
						|
        The `*args` parameter passed into a function
 | 
						|
    max_fname_arg_count : int
 | 
						|
        The maximum number of arguments that the function `fname`
 | 
						|
        can accept, excluding those in `args`. Used for displaying
 | 
						|
        appropriate error messages. Must be non-negative.
 | 
						|
    compat_args : dict
 | 
						|
        A dictionary of keys and their associated default values.
 | 
						|
        In order to accommodate buggy behaviour in some versions of `numpy`,
 | 
						|
        where a signature displayed keyword arguments but then passed those
 | 
						|
        arguments **positionally** internally when calling downstream
 | 
						|
        implementations, a dict ensures that the original
 | 
						|
        order of the keyword arguments is enforced.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    TypeError
 | 
						|
        If `args` contains more values than there are `compat_args`
 | 
						|
    ValueError
 | 
						|
        If `args` contains values that do not correspond to those
 | 
						|
        of the default values specified in `compat_args`
 | 
						|
    """
 | 
						|
    _check_arg_length(fname, args, max_fname_arg_count, compat_args)
 | 
						|
 | 
						|
    # We do this so that we can provide a more informative
 | 
						|
    # error message about the parameters that we are not
 | 
						|
    # supporting in the pandas implementation of 'fname'
 | 
						|
    kwargs = dict(zip(compat_args, args))
 | 
						|
    _check_for_default_values(fname, kwargs, compat_args)
 | 
						|
 | 
						|
 | 
						|
def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
 | 
						|
    """
 | 
						|
    Checks whether 'kwargs' contains any keys that are not
 | 
						|
    in 'compat_args' and raises a TypeError if there is one.
 | 
						|
    """
 | 
						|
    # set(dict) --> set of the dictionary's keys
 | 
						|
    diff = set(kwargs) - set(compat_args)
 | 
						|
 | 
						|
    if diff:
 | 
						|
        bad_arg = next(iter(diff))
 | 
						|
        raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
 | 
						|
 | 
						|
 | 
						|
def validate_kwargs(fname, kwargs, compat_args) -> None:
 | 
						|
    """
 | 
						|
    Checks whether parameters passed to the **kwargs argument in a
 | 
						|
    function `fname` are valid parameters as specified in `*compat_args`
 | 
						|
    and whether or not they are set to their default values.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    fname : str
 | 
						|
        The name of the function being passed the `**kwargs` parameter
 | 
						|
    kwargs : dict
 | 
						|
        The `**kwargs` parameter passed into `fname`
 | 
						|
    compat_args: dict
 | 
						|
        A dictionary of keys that `kwargs` is allowed to have and their
 | 
						|
        associated default values
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    TypeError if `kwargs` contains keys not in `compat_args`
 | 
						|
    ValueError if `kwargs` contains keys in `compat_args` that do not
 | 
						|
    map to the default values specified in `compat_args`
 | 
						|
    """
 | 
						|
    kwds = kwargs.copy()
 | 
						|
    _check_for_invalid_keys(fname, kwargs, compat_args)
 | 
						|
    _check_for_default_values(fname, kwds, compat_args)
 | 
						|
 | 
						|
 | 
						|
def validate_args_and_kwargs(
 | 
						|
    fname, args, kwargs, max_fname_arg_count, compat_args
 | 
						|
) -> None:
 | 
						|
    """
 | 
						|
    Checks whether parameters passed to the *args and **kwargs argument in a
 | 
						|
    function `fname` are valid parameters as specified in `*compat_args`
 | 
						|
    and whether or not they are set to their default values.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    fname: str
 | 
						|
        The name of the function being passed the `**kwargs` parameter
 | 
						|
    args: tuple
 | 
						|
        The `*args` parameter passed into a function
 | 
						|
    kwargs: dict
 | 
						|
        The `**kwargs` parameter passed into `fname`
 | 
						|
    max_fname_arg_count: int
 | 
						|
        The minimum number of arguments that the function `fname`
 | 
						|
        requires, excluding those in `args`. Used for displaying
 | 
						|
        appropriate error messages. Must be non-negative.
 | 
						|
    compat_args: dict
 | 
						|
        A dictionary of keys that `kwargs` is allowed to
 | 
						|
        have and their associated default values.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    TypeError if `args` contains more values than there are
 | 
						|
    `compat_args` OR `kwargs` contains keys not in `compat_args`
 | 
						|
    ValueError if `args` contains values not at the default value (`None`)
 | 
						|
    `kwargs` contains keys in `compat_args` that do not map to the default
 | 
						|
    value as specified in `compat_args`
 | 
						|
 | 
						|
    See Also
 | 
						|
    --------
 | 
						|
    validate_args : Purely args validation.
 | 
						|
    validate_kwargs : Purely kwargs validation.
 | 
						|
 | 
						|
    """
 | 
						|
    # Check that the total number of arguments passed in (i.e.
 | 
						|
    # args and kwargs) does not exceed the length of compat_args
 | 
						|
    _check_arg_length(
 | 
						|
        fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
 | 
						|
    )
 | 
						|
 | 
						|
    # Check there is no overlap with the positional and keyword
 | 
						|
    # arguments, similar to what is done in actual Python functions
 | 
						|
    args_dict = dict(zip(compat_args, args))
 | 
						|
 | 
						|
    for key in args_dict:
 | 
						|
        if key in kwargs:
 | 
						|
            raise TypeError(
 | 
						|
                f"{fname}() got multiple values for keyword argument '{key}'"
 | 
						|
            )
 | 
						|
 | 
						|
    kwargs.update(args_dict)
 | 
						|
    validate_kwargs(fname, kwargs, compat_args)
 | 
						|
 | 
						|
 | 
						|
def validate_bool_kwarg(
 | 
						|
    value: BoolishNoneT,
 | 
						|
    arg_name: str,
 | 
						|
    none_allowed: bool = True,
 | 
						|
    int_allowed: bool = False,
 | 
						|
) -> BoolishNoneT:
 | 
						|
    """
 | 
						|
    Ensure that argument passed in arg_name can be interpreted as boolean.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    value : bool
 | 
						|
        Value to be validated.
 | 
						|
    arg_name : str
 | 
						|
        Name of the argument. To be reflected in the error message.
 | 
						|
    none_allowed : bool, default True
 | 
						|
        Whether to consider None to be a valid boolean.
 | 
						|
    int_allowed : bool, default False
 | 
						|
        Whether to consider integer value to be a valid boolean.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    value
 | 
						|
        The same value as input.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError
 | 
						|
        If the value is not a valid boolean.
 | 
						|
    """
 | 
						|
    good_value = is_bool(value)
 | 
						|
    if none_allowed:
 | 
						|
        good_value = good_value or (value is None)
 | 
						|
 | 
						|
    if int_allowed:
 | 
						|
        good_value = good_value or isinstance(value, int)
 | 
						|
 | 
						|
    if not good_value:
 | 
						|
        raise ValueError(
 | 
						|
            f'For argument "{arg_name}" expected type bool, received '
 | 
						|
            f"type {type(value).__name__}."
 | 
						|
        )
 | 
						|
    return value  # pyright: ignore[reportGeneralTypeIssues]
 | 
						|
 | 
						|
 | 
						|
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
 | 
						|
    """
 | 
						|
    Validate the keyword arguments to 'fillna'.
 | 
						|
 | 
						|
    This checks that exactly one of 'value' and 'method' is specified.
 | 
						|
    If 'method' is specified, this validates that it's a valid method.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    value, method : object
 | 
						|
        The 'value' and 'method' keyword arguments for 'fillna'.
 | 
						|
    validate_scalar_dict_value : bool, default True
 | 
						|
        Whether to validate that 'value' is a scalar or dict. Specifically,
 | 
						|
        validate that it is not a list or tuple.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    value, method : object
 | 
						|
    """
 | 
						|
    from pandas.core.missing import clean_fill_method
 | 
						|
 | 
						|
    if value is None and method is None:
 | 
						|
        raise ValueError("Must specify a fill 'value' or 'method'.")
 | 
						|
    if value is None and method is not None:
 | 
						|
        method = clean_fill_method(method)
 | 
						|
 | 
						|
    elif value is not None and method is None:
 | 
						|
        if validate_scalar_dict_value and isinstance(value, (list, tuple)):
 | 
						|
            raise TypeError(
 | 
						|
                '"value" parameter must be a scalar or dict, but '
 | 
						|
                f'you passed a "{type(value).__name__}"'
 | 
						|
            )
 | 
						|
 | 
						|
    elif value is not None and method is not None:
 | 
						|
        raise ValueError("Cannot specify both 'value' and 'method'.")
 | 
						|
 | 
						|
    return value, method
 | 
						|
 | 
						|
 | 
						|
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
 | 
						|
    """
 | 
						|
    Validate percentiles (used by describe and quantile).
 | 
						|
 | 
						|
    This function checks if the given float or iterable of floats is a valid percentile
 | 
						|
    otherwise raises a ValueError.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    q: float or iterable of floats
 | 
						|
        A single percentile or an iterable of percentiles.
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    ndarray
 | 
						|
        An ndarray of the percentiles if valid.
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError if percentiles are not in given interval([0, 1]).
 | 
						|
    """
 | 
						|
    q_arr = np.asarray(q)
 | 
						|
    # Don't change this to an f-string. The string formatting
 | 
						|
    # is too expensive for cases where we don't need it.
 | 
						|
    msg = "percentiles should all be in the interval [0, 1]"
 | 
						|
    if q_arr.ndim == 0:
 | 
						|
        if not 0 <= q_arr <= 1:
 | 
						|
            raise ValueError(msg)
 | 
						|
    else:
 | 
						|
        if not all(0 <= qs <= 1 for qs in q_arr):
 | 
						|
            raise ValueError(msg)
 | 
						|
    return q_arr
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def validate_ascending(ascending: BoolishT) -> BoolishT:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
@overload
 | 
						|
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
 | 
						|
    ...
 | 
						|
 | 
						|
 | 
						|
def validate_ascending(
 | 
						|
    ascending: bool | int | Sequence[BoolishT],
 | 
						|
) -> bool | int | list[BoolishT]:
 | 
						|
    """Validate ``ascending`` kwargs for ``sort_index`` method."""
 | 
						|
    kwargs = {"none_allowed": False, "int_allowed": True}
 | 
						|
    if not isinstance(ascending, Sequence):
 | 
						|
        return validate_bool_kwarg(ascending, "ascending", **kwargs)
 | 
						|
 | 
						|
    return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
 | 
						|
 | 
						|
 | 
						|
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
 | 
						|
    """
 | 
						|
    Check that the `closed` argument is among [None, "left", "right"]
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    closed : {None, "left", "right"}
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    left_closed : bool
 | 
						|
    right_closed : bool
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError : if argument is not among valid values
 | 
						|
    """
 | 
						|
    left_closed = False
 | 
						|
    right_closed = False
 | 
						|
 | 
						|
    if closed is None:
 | 
						|
        left_closed = True
 | 
						|
        right_closed = True
 | 
						|
    elif closed == "left":
 | 
						|
        left_closed = True
 | 
						|
    elif closed == "right":
 | 
						|
        right_closed = True
 | 
						|
    else:
 | 
						|
        raise ValueError("Closed has to be either 'left', 'right' or None")
 | 
						|
 | 
						|
    return left_closed, right_closed
 | 
						|
 | 
						|
 | 
						|
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
 | 
						|
    """
 | 
						|
    Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    inclusive : {"both", "neither", "left", "right"}
 | 
						|
 | 
						|
    Returns
 | 
						|
    -------
 | 
						|
    left_right_inclusive : tuple[bool, bool]
 | 
						|
 | 
						|
    Raises
 | 
						|
    ------
 | 
						|
    ValueError : if argument is not among valid values
 | 
						|
    """
 | 
						|
    left_right_inclusive: tuple[bool, bool] | None = None
 | 
						|
 | 
						|
    if isinstance(inclusive, str):
 | 
						|
        left_right_inclusive = {
 | 
						|
            "both": (True, True),
 | 
						|
            "left": (True, False),
 | 
						|
            "right": (False, True),
 | 
						|
            "neither": (False, False),
 | 
						|
        }.get(inclusive)
 | 
						|
 | 
						|
    if left_right_inclusive is None:
 | 
						|
        raise ValueError(
 | 
						|
            "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
 | 
						|
        )
 | 
						|
 | 
						|
    return left_right_inclusive
 | 
						|
 | 
						|
 | 
						|
def validate_insert_loc(loc: int, length: int) -> int:
 | 
						|
    """
 | 
						|
    Check that we have an integer between -length and length, inclusive.
 | 
						|
 | 
						|
    Standardize negative loc to within [0, length].
 | 
						|
 | 
						|
    The exceptions we raise on failure match np.insert.
 | 
						|
    """
 | 
						|
    if not is_integer(loc):
 | 
						|
        raise TypeError(f"loc must be an integer between -{length} and {length}")
 | 
						|
 | 
						|
    if loc < 0:
 | 
						|
        loc += length
 | 
						|
    if not 0 <= loc <= length:
 | 
						|
        raise IndexError(f"loc must be an integer between -{length} and {length}")
 | 
						|
    return loc  # pyright: ignore[reportGeneralTypeIssues]
 | 
						|
 | 
						|
 | 
						|
def check_dtype_backend(dtype_backend) -> None:
 | 
						|
    if dtype_backend is not lib.no_default:
 | 
						|
        if dtype_backend not in ["numpy_nullable", "pyarrow"]:
 | 
						|
            raise ValueError(
 | 
						|
                f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
 | 
						|
                f"'pyarrow' are allowed.",
 | 
						|
            )
 |