_validators.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. """
  2. Module that contains many useful utilities
  3. for validating data or function arguments
  4. """
  5. from typing import Iterable, Union
  6. import warnings
  7. import numpy as np
  8. from pandas.core.dtypes.common import is_bool
  9. def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
  10. """
  11. Checks whether 'args' has length of at most 'compat_args'. Raises
  12. a TypeError if that is not the case, similar to in Python when a
  13. function is called with too many arguments.
  14. """
  15. if max_fname_arg_count < 0:
  16. raise ValueError("'max_fname_arg_count' must be non-negative")
  17. if len(args) > len(compat_args):
  18. max_arg_count = len(compat_args) + max_fname_arg_count
  19. actual_arg_count = len(args) + max_fname_arg_count
  20. argument = "argument" if max_arg_count == 1 else "arguments"
  21. raise TypeError(
  22. f"{fname}() takes at most {max_arg_count} {argument} "
  23. f"({actual_arg_count} given)"
  24. )
  25. def _check_for_default_values(fname, arg_val_dict, compat_args):
  26. """
  27. Check that the keys in `arg_val_dict` are mapped to their
  28. default values as specified in `compat_args`.
  29. Note that this function is to be called only when it has been
  30. checked that arg_val_dict.keys() is a subset of compat_args
  31. """
  32. for key in arg_val_dict:
  33. # try checking equality directly with '=' operator,
  34. # as comparison may have been overridden for the left
  35. # hand object
  36. try:
  37. v1 = arg_val_dict[key]
  38. v2 = compat_args[key]
  39. # check for None-ness otherwise we could end up
  40. # comparing a numpy array vs None
  41. if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
  42. match = False
  43. else:
  44. match = v1 == v2
  45. if not is_bool(match):
  46. raise ValueError("'match' is not a boolean")
  47. # could not compare them directly, so try comparison
  48. # using the 'is' operator
  49. except ValueError:
  50. match = arg_val_dict[key] is compat_args[key]
  51. if not match:
  52. raise ValueError(
  53. f"the '{key}' parameter is not supported in "
  54. f"the pandas implementation of {fname}()"
  55. )
  56. def validate_args(fname, args, max_fname_arg_count, compat_args):
  57. """
  58. Checks whether the length of the `*args` argument passed into a function
  59. has at most `len(compat_args)` arguments and whether or not all of these
  60. elements in `args` are set to their default values.
  61. Parameters
  62. ----------
  63. fname : str
  64. The name of the function being passed the `*args` parameter
  65. args : tuple
  66. The `*args` parameter passed into a function
  67. max_fname_arg_count : int
  68. The maximum number of arguments that the function `fname`
  69. can accept, excluding those in `args`. Used for displaying
  70. appropriate error messages. Must be non-negative.
  71. compat_args : dict
  72. A dictionary of keys and their associated default values.
  73. In order to accommodate buggy behaviour in some versions of `numpy`,
  74. where a signature displayed keyword arguments but then passed those
  75. arguments **positionally** internally when calling downstream
  76. implementations, a dict ensures that the original
  77. order of the keyword arguments is enforced.
  78. Raises
  79. ------
  80. TypeError
  81. If `args` contains more values than there are `compat_args`
  82. ValueError
  83. If `args` contains values that do not correspond to those
  84. of the default values specified in `compat_args`
  85. """
  86. _check_arg_length(fname, args, max_fname_arg_count, compat_args)
  87. # We do this so that we can provide a more informative
  88. # error message about the parameters that we are not
  89. # supporting in the pandas implementation of 'fname'
  90. kwargs = dict(zip(compat_args, args))
  91. _check_for_default_values(fname, kwargs, compat_args)
  92. def _check_for_invalid_keys(fname, kwargs, compat_args):
  93. """
  94. Checks whether 'kwargs' contains any keys that are not
  95. in 'compat_args' and raises a TypeError if there is one.
  96. """
  97. # set(dict) --> set of the dictionary's keys
  98. diff = set(kwargs) - set(compat_args)
  99. if diff:
  100. bad_arg = list(diff)[0]
  101. raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
  102. def validate_kwargs(fname, kwargs, compat_args):
  103. """
  104. Checks whether parameters passed to the **kwargs argument in a
  105. function `fname` are valid parameters as specified in `*compat_args`
  106. and whether or not they are set to their default values.
  107. Parameters
  108. ----------
  109. fname : str
  110. The name of the function being passed the `**kwargs` parameter
  111. kwargs : dict
  112. The `**kwargs` parameter passed into `fname`
  113. compat_args: dict
  114. A dictionary of keys that `kwargs` is allowed to have and their
  115. associated default values
  116. Raises
  117. ------
  118. TypeError if `kwargs` contains keys not in `compat_args`
  119. ValueError if `kwargs` contains keys in `compat_args` that do not
  120. map to the default values specified in `compat_args`
  121. """
  122. kwds = kwargs.copy()
  123. _check_for_invalid_keys(fname, kwargs, compat_args)
  124. _check_for_default_values(fname, kwds, compat_args)
  125. def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args):
  126. """
  127. Checks whether parameters passed to the *args and **kwargs argument in a
  128. function `fname` are valid parameters as specified in `*compat_args`
  129. and whether or not they are set to their default values.
  130. Parameters
  131. ----------
  132. fname: str
  133. The name of the function being passed the `**kwargs` parameter
  134. args: tuple
  135. The `*args` parameter passed into a function
  136. kwargs: dict
  137. The `**kwargs` parameter passed into `fname`
  138. max_fname_arg_count: int
  139. The minimum number of arguments that the function `fname`
  140. requires, excluding those in `args`. Used for displaying
  141. appropriate error messages. Must be non-negative.
  142. compat_args: dict
  143. A dictionary of keys that `kwargs` is allowed to
  144. have and their associated default values.
  145. Raises
  146. ------
  147. TypeError if `args` contains more values than there are
  148. `compat_args` OR `kwargs` contains keys not in `compat_args`
  149. ValueError if `args` contains values not at the default value (`None`)
  150. `kwargs` contains keys in `compat_args` that do not map to the default
  151. value as specified in `compat_args`
  152. See Also
  153. --------
  154. validate_args : Purely args validation.
  155. validate_kwargs : Purely kwargs validation.
  156. """
  157. # Check that the total number of arguments passed in (i.e.
  158. # args and kwargs) does not exceed the length of compat_args
  159. _check_arg_length(
  160. fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
  161. )
  162. # Check there is no overlap with the positional and keyword
  163. # arguments, similar to what is done in actual Python functions
  164. args_dict = dict(zip(compat_args, args))
  165. for key in args_dict:
  166. if key in kwargs:
  167. raise TypeError(
  168. f"{fname}() got multiple values for keyword argument '{key}'"
  169. )
  170. kwargs.update(args_dict)
  171. validate_kwargs(fname, kwargs, compat_args)
  172. def validate_bool_kwarg(value, arg_name):
  173. """ Ensures that argument passed in arg_name is of type bool. """
  174. if not (is_bool(value) or value is None):
  175. raise ValueError(
  176. f'For argument "{arg_name}" expected type bool, received '
  177. f"type {type(value).__name__}."
  178. )
  179. return value
  180. def validate_axis_style_args(data, args, kwargs, arg_name, method_name):
  181. """Argument handler for mixed index, columns / axis functions
  182. In an attempt to handle both `.method(index, columns)`, and
  183. `.method(arg, axis=.)`, we have to do some bad things to argument
  184. parsing. This translates all arguments to `{index=., columns=.}` style.
  185. Parameters
  186. ----------
  187. data : DataFrame
  188. args : tuple
  189. All positional arguments from the user
  190. kwargs : dict
  191. All keyword arguments from the user
  192. arg_name, method_name : str
  193. Used for better error messages
  194. Returns
  195. -------
  196. kwargs : dict
  197. A dictionary of keyword arguments. Doesn't modify ``kwargs``
  198. inplace, so update them with the return value here.
  199. Examples
  200. --------
  201. >>> df._validate_axis_style_args((str.upper,), {'columns': id},
  202. ... 'mapper', 'rename')
  203. {'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
  204. This emits a warning
  205. >>> df._validate_axis_style_args((str.upper, id), {},
  206. ... 'mapper', 'rename')
  207. {'columns': <function id>, 'index': <method 'upper' of 'str' objects>}
  208. """
  209. # TODO: Change to keyword-only args and remove all this
  210. out = {}
  211. # Goal: fill 'out' with index/columns-style arguments
  212. # like out = {'index': foo, 'columns': bar}
  213. # Start by validating for consistency
  214. if "axis" in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS):
  215. msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
  216. raise TypeError(msg)
  217. # First fill with explicit values provided by the user...
  218. if arg_name in kwargs:
  219. if args:
  220. msg = f"{method_name} got multiple values for argument '{arg_name}'"
  221. raise TypeError(msg)
  222. axis = data._get_axis_name(kwargs.get("axis", 0))
  223. out[axis] = kwargs[arg_name]
  224. # More user-provided arguments, now from kwargs
  225. for k, v in kwargs.items():
  226. try:
  227. ax = data._get_axis_name(k)
  228. except ValueError:
  229. pass
  230. else:
  231. out[ax] = v
  232. # All user-provided kwargs have been handled now.
  233. # Now we supplement with positional arguments, emitting warnings
  234. # when there's ambiguity and raising when there's conflicts
  235. if len(args) == 0:
  236. pass # It's up to the function to decide if this is valid
  237. elif len(args) == 1:
  238. axis = data._get_axis_name(kwargs.get("axis", 0))
  239. out[axis] = args[0]
  240. elif len(args) == 2:
  241. if "axis" in kwargs:
  242. # Unambiguously wrong
  243. msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
  244. raise TypeError(msg)
  245. msg = (
  246. "Interpreting call\n\t'.{method_name}(a, b)' as "
  247. "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
  248. "arguments to remove any ambiguity. In the future, using "
  249. "positional arguments for 'index' or 'columns' will raise "
  250. " a 'TypeError'."
  251. )
  252. warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4)
  253. out[data._AXIS_NAMES[0]] = args[0]
  254. out[data._AXIS_NAMES[1]] = args[1]
  255. else:
  256. msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
  257. raise TypeError(msg)
  258. return out
  259. def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
  260. """Validate the keyword arguments to 'fillna'.
  261. This checks that exactly one of 'value' and 'method' is specified.
  262. If 'method' is specified, this validates that it's a valid method.
  263. Parameters
  264. ----------
  265. value, method : object
  266. The 'value' and 'method' keyword arguments for 'fillna'.
  267. validate_scalar_dict_value : bool, default True
  268. Whether to validate that 'value' is a scalar or dict. Specifically,
  269. validate that it is not a list or tuple.
  270. Returns
  271. -------
  272. value, method : object
  273. """
  274. from pandas.core.missing import clean_fill_method
  275. if value is None and method is None:
  276. raise ValueError("Must specify a fill 'value' or 'method'.")
  277. elif value is None and method is not None:
  278. method = clean_fill_method(method)
  279. elif value is not None and method is None:
  280. if validate_scalar_dict_value and isinstance(value, (list, tuple)):
  281. raise TypeError(
  282. '"value" parameter must be a scalar or dict, but '
  283. f'you passed a "{type(value).__name__}"'
  284. )
  285. elif value is not None and method is not None:
  286. raise ValueError("Cannot specify both 'value' and 'method'.")
  287. return value, method
  288. def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
  289. """
  290. Validate percentiles (used by describe and quantile).
  291. This function checks if the given float oriterable of floats is a valid percentile
  292. otherwise raises a ValueError.
  293. Parameters
  294. ----------
  295. q: float or iterable of floats
  296. A single percentile or an iterable of percentiles.
  297. Returns
  298. -------
  299. ndarray
  300. An ndarray of the percentiles if valid.
  301. Raises
  302. ------
  303. ValueError if percentiles are not in given interval([0, 1]).
  304. """
  305. msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
  306. q_arr = np.asarray(q)
  307. if q_arr.ndim == 0:
  308. if not 0 <= q_arr <= 1:
  309. raise ValueError(msg.format(q_arr / 100.0))
  310. else:
  311. if not all(0 <= qs <= 1 for qs in q_arr):
  312. raise ValueError(msg.format(q_arr / 100.0))
  313. return q_arr