ai-content-maker/.venv/Lib/site-packages/pandas/util/_validators.py

543 lines
17 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""
Module that contains many useful utilities
for validating data or function arguments
"""
from __future__ import annotations
from typing import (
Any,
Iterable,
Sequence,
TypeVar,
overload,
)
import warnings
import numpy as np
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_bool,
is_integer,
)
BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args):
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args):
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = list(diff)[0]
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args) -> None:
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, compat_args
) -> None:
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(
value: BoolishNoneT, arg_name, none_allowed=True, int_allowed=False
) -> BoolishNoneT:
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or value is None
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value
def validate_axis_style_args(
data, args, kwargs, arg_name, method_name
) -> dict[str, Any]:
"""
Argument handler for mixed index, columns / axis functions
In an attempt to handle both `.method(index, columns)`, and
`.method(arg, axis=.)`, we have to do some bad things to argument
parsing. This translates all arguments to `{index=., columns=.}` style.
Parameters
----------
data : DataFrame
args : tuple
All positional arguments from the user
kwargs : dict
All keyword arguments from the user
arg_name, method_name : str
Used for better error messages
Returns
-------
kwargs : dict
A dictionary of keyword arguments. Doesn't modify ``kwargs``
inplace, so update them with the return value here.
Examples
--------
>>> df = pd.DataFrame(range(2))
>>> validate_axis_style_args(df, (str.upper,), {'columns': id},
... 'mapper', 'rename')
{'columns': <built-in function id>, 'index': <method 'upper' of 'str' objects>}
This emits a warning
>>> validate_axis_style_args(df, (str.upper, id), {},
... 'mapper', 'rename')
{'index': <method 'upper' of 'str' objects>, 'columns': <built-in function id>}
"""
# TODO: Change to keyword-only args and remove all this
out = {}
# Goal: fill 'out' with index/columns-style arguments
# like out = {'index': foo, 'columns': bar}
# Start by validating for consistency
if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER):
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
raise TypeError(msg)
# First fill with explicit values provided by the user...
if arg_name in kwargs:
if args:
msg = f"{method_name} got multiple values for argument '{arg_name}'"
raise TypeError(msg)
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = kwargs[arg_name]
# More user-provided arguments, now from kwargs
for k, v in kwargs.items():
try:
ax = data._get_axis_name(k)
except ValueError:
pass
else:
out[ax] = v
# All user-provided kwargs have been handled now.
# Now we supplement with positional arguments, emitting warnings
# when there's ambiguity and raising when there's conflicts
if len(args) == 0:
pass # It's up to the function to decide if this is valid
elif len(args) == 1:
axis = data._get_axis_name(kwargs.get("axis", 0))
out[axis] = args[0]
elif len(args) == 2:
if "axis" in kwargs:
# Unambiguously wrong
msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
raise TypeError(msg)
msg = (
f"Interpreting call\n\t'.{method_name}(a, b)' as "
f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
"arguments to remove any ambiguity. In the future, using "
"positional arguments for 'index' or 'columns' will raise "
"a 'TypeError'."
)
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
out[data._get_axis_name(0)] = args[0]
out[data._get_axis_name(1)] = args[1]
else:
msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
raise TypeError(msg)
return out
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
elif value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg.format(q_arr / 100.0))
return q_arr
@overload
def validate_ascending(ascending: BoolishT) -> BoolishT:
...
@overload
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
...
def validate_ascending(
ascending: bool | int | Sequence[BoolishT],
) -> bool | int | list[BoolishT]:
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, Sequence):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
"""
Check that the `closed` argument is among [None, "left", "right"]
Parameters
----------
closed : {None, "left", "right"}
Returns
-------
left_closed : bool
right_closed : bool
Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
return left_closed, right_closed
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
"""
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
Parameters
----------
inclusive : {"both", "neither", "left", "right"}
Returns
-------
left_right_inclusive : tuple[bool, bool]
Raises
------
ValueError : if argument is not among valid values
"""
left_right_inclusive: tuple[bool, bool] | None = None
if isinstance(inclusive, str):
left_right_inclusive = {
"both": (True, True),
"left": (True, False),
"right": (False, True),
"neither": (False, False),
}.get(inclusive)
if left_right_inclusive is None:
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)
return left_right_inclusive
def validate_insert_loc(loc: int, length: int) -> int:
"""
Check that we have an integer between -length and length, inclusive.
Standardize negative loc to within [0, length].
The exceptions we raise on failure match np.insert.
"""
if not is_integer(loc):
raise TypeError(f"loc must be an integer between -{length} and {length}")
if loc < 0:
loc += length
if not 0 <= loc <= length:
raise IndexError(f"loc must be an integer between -{length} and {length}")
return loc