ai-content-maker/.venv/Lib/site-packages/pandas/tests/computation/test_eval.py

1942 lines
67 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
from __future__ import annotations
from functools import reduce
from itertools import product
import operator
import random
import warnings
import numpy as np
import pytest
from pandas.errors import (
NumExprClobberingError,
PerformanceWarning,
UndefinedVariableError,
)
import pandas.util._test_decorators as td
from pandas.core.dtypes.common import (
is_bool,
is_float,
is_list_like,
is_scalar,
)
import pandas as pd
from pandas import (
DataFrame,
Series,
date_range,
)
import pandas._testing as tm
from pandas.core.computation import pytables
from pandas.core.computation.engines import ENGINES
import pandas.core.computation.expr as expr
from pandas.core.computation.expr import (
BaseExprVisitor,
PandasExprVisitor,
PythonExprVisitor,
)
from pandas.core.computation.expressions import (
NUMEXPR_INSTALLED,
USE_NUMEXPR,
)
from pandas.core.computation.ops import (
ARITH_OPS_SYMS,
SPECIAL_CASE_ARITH_OPS_SYMS,
_binary_math_ops,
_binary_ops_dict,
_unary_math_ops,
)
from pandas.core.computation.scope import DEFAULT_GLOBALS
@pytest.fixture(
params=(
pytest.param(
engine,
marks=[
pytest.mark.skipif(
engine == "numexpr" and not USE_NUMEXPR,
reason=f"numexpr enabled->{USE_NUMEXPR}, "
f"installed->{NUMEXPR_INSTALLED}",
),
td.skip_if_no_ne,
],
)
for engine in ENGINES
)
)
def engine(request):
return request.param
@pytest.fixture(params=expr.PARSERS)
def parser(request):
return request.param
@pytest.fixture(params=list(_unary_math_ops) if NUMEXPR_INSTALLED else [])
def unary_fns_for_ne(request):
return request.param
def _eval_single_bin(lhs, cmp1, rhs, engine):
c = _binary_ops_dict[cmp1]
if ENGINES[engine].has_neg_frac:
try:
return c(lhs, rhs)
except ValueError as e:
if str(e).startswith(
"negative number cannot be raised to a fractional power"
):
return np.nan
raise
return c(lhs, rhs)
# TODO: using range(5) here is a kludge
@pytest.fixture(
params=list(range(5)),
ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"],
)
def lhs(request):
nan_df1 = DataFrame(np.random.rand(10, 5))
nan_df1[nan_df1 > 0.5] = np.nan
opts = (
DataFrame(np.random.randn(10, 5)),
Series(np.random.randn(5)),
Series([1, 2, np.nan, np.nan, 5]),
nan_df1,
np.random.randn(),
)
return opts[request.param]
rhs = lhs
midhs = lhs
class TestEval:
@pytest.mark.parametrize(
"cmp1",
["!=", "==", "<=", ">=", "<", ">"],
ids=["ne", "eq", "le", "ge", "lt", "gt"],
)
@pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"])
@pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS)
def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser):
if parser == "python" and binop in ["and", "or"]:
msg = "'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
pd.eval(ex, engine=engine, parser=parser)
return
lhs_new = _eval_single_bin(lhs, cmp1, rhs, engine)
rhs_new = _eval_single_bin(lhs, cmp2, rhs, engine)
expected = _eval_single_bin(lhs_new, binop, rhs_new, engine)
ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
result = pd.eval(ex, engine=engine, parser=parser)
tm.assert_equal(result, expected)
@pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS)
def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser):
lhs = lhs < 0
rhs = rhs < 0
if parser == "python" and cmp_op in ["in", "not in"]:
msg = "'(In|NotIn)' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
ex = f"lhs {cmp_op} rhs"
pd.eval(ex, engine=engine, parser=parser)
return
ex = f"lhs {cmp_op} rhs"
msg = "|".join(
[
r"only list-like( or dict-like)? objects are allowed to be "
r"passed to (DataFrame\.)?isin\(\), you passed a "
r"(\[|')bool(\]|')",
"argument of type 'bool' is not iterable",
]
)
if cmp_op in ("in", "not in") and not is_list_like(rhs):
with pytest.raises(TypeError, match=msg):
pd.eval(
ex,
engine=engine,
parser=parser,
local_dict={"lhs": lhs, "rhs": rhs},
)
else:
expected = _eval_single_bin(lhs, cmp_op, rhs, engine)
result = pd.eval(ex, engine=engine, parser=parser)
tm.assert_equal(result, expected)
@pytest.mark.parametrize("op", expr.CMP_OPS_SYMS)
def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
if parser == "python" and op in ["in", "not in"]:
msg = "'(In|NotIn)' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
ex = f"~(lhs {op} rhs)"
pd.eval(ex, engine=engine, parser=parser)
return
if (
is_float(lhs)
and not is_float(rhs)
and op in ["in", "not in"]
and engine == "python"
and parser == "pandas"
):
mark = pytest.mark.xfail(
reason="Looks like expected is negative, unclear whether "
"expected is incorrect or result is incorrect"
)
request.node.add_marker(mark)
skip_these = ["in", "not in"]
ex = f"~(lhs {op} rhs)"
msg = "|".join(
[
r"only list-like( or dict-like)? objects are allowed to be "
r"passed to (DataFrame\.)?isin\(\), you passed a "
r"(\[|')float(\]|')",
"argument of type 'float' is not iterable",
]
)
if is_scalar(rhs) and op in skip_these:
with pytest.raises(TypeError, match=msg):
pd.eval(
ex,
engine=engine,
parser=parser,
local_dict={"lhs": lhs, "rhs": rhs},
)
else:
# compound
if is_scalar(lhs) and is_scalar(rhs):
lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs))
expected = _eval_single_bin(lhs, op, rhs, engine)
if is_scalar(expected):
expected = not expected
else:
expected = ~expected
result = pd.eval(ex, engine=engine, parser=parser)
tm.assert_almost_equal(expected, result)
@pytest.mark.parametrize("cmp1", ["<", ">"])
@pytest.mark.parametrize("cmp2", ["<", ">"])
def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser):
mid = midhs
if parser == "python":
ex1 = f"lhs {cmp1} mid {cmp2} rhs"
msg = "'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(ex1, engine=engine, parser=parser)
return
lhs_new = _eval_single_bin(lhs, cmp1, mid, engine)
rhs_new = _eval_single_bin(mid, cmp2, rhs, engine)
if lhs_new is not None and rhs_new is not None:
ex1 = f"lhs {cmp1} mid {cmp2} rhs"
ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs"
ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)"
expected = _eval_single_bin(lhs_new, "&", rhs_new, engine)
for ex in (ex1, ex2, ex3):
result = pd.eval(ex, engine=engine, parser=parser)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS))
)
def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser):
ex = f"lhs {arith1} rhs"
result = pd.eval(ex, engine=engine, parser=parser)
expected = _eval_single_bin(lhs, arith1, rhs, engine)
tm.assert_almost_equal(result, expected)
ex = f"lhs {arith1} rhs {arith1} rhs"
result = pd.eval(ex, engine=engine, parser=parser)
nlhs = _eval_single_bin(lhs, arith1, rhs, engine)
try:
nlhs, ghs = nlhs.align(rhs)
except (ValueError, TypeError, AttributeError):
# ValueError: series frame or frame series align
# TypeError, AttributeError: series or frame with scalar align
return
else:
if engine == "numexpr":
import numexpr as ne
# direct numpy comparison
expected = ne.evaluate(f"nlhs {arith1} ghs")
# Update assert statement due to unreliable numerical
# precision component (GH37328)
# TODO: update testing code so that assert_almost_equal statement
# can be replaced again by the assert_numpy_array_equal statement
tm.assert_almost_equal(result.values, expected)
else:
expected = eval(f"nlhs {arith1} ghs")
tm.assert_almost_equal(result, expected)
# modulus, pow, and floor division require special casing
def test_modulus(self, lhs, rhs, engine, parser):
ex = r"lhs % rhs"
result = pd.eval(ex, engine=engine, parser=parser)
expected = lhs % rhs
tm.assert_almost_equal(result, expected)
if engine == "numexpr":
import numexpr as ne
expected = ne.evaluate(r"expected % rhs")
if isinstance(result, (DataFrame, Series)):
tm.assert_almost_equal(result.values, expected)
else:
tm.assert_almost_equal(result, expected.item())
else:
expected = _eval_single_bin(expected, "%", rhs, engine)
tm.assert_almost_equal(result, expected)
def test_floor_division(self, lhs, rhs, engine, parser):
ex = "lhs // rhs"
if engine == "python":
res = pd.eval(ex, engine=engine, parser=parser)
expected = lhs // rhs
tm.assert_equal(res, expected)
else:
msg = (
r"unsupported operand type\(s\) for //: 'VariableNode' and "
"'VariableNode'"
)
with pytest.raises(TypeError, match=msg):
pd.eval(
ex,
local_dict={"lhs": lhs, "rhs": rhs},
engine=engine,
parser=parser,
)
@td.skip_if_windows
def test_pow(self, lhs, rhs, engine, parser):
# odd failure on win32 platform, so skip
ex = "lhs ** rhs"
expected = _eval_single_bin(lhs, "**", rhs, engine)
result = pd.eval(ex, engine=engine, parser=parser)
if (
is_scalar(lhs)
and is_scalar(rhs)
and isinstance(expected, (complex, np.complexfloating))
and np.isnan(result)
):
msg = "(DataFrame.columns|numpy array) are different"
with pytest.raises(AssertionError, match=msg):
tm.assert_numpy_array_equal(result, expected)
else:
tm.assert_almost_equal(result, expected)
ex = "(lhs ** rhs) ** rhs"
result = pd.eval(ex, engine=engine, parser=parser)
middle = _eval_single_bin(lhs, "**", rhs, engine)
expected = _eval_single_bin(middle, "**", rhs, engine)
tm.assert_almost_equal(result, expected)
def check_single_invert_op(self, lhs, engine, parser):
# simple
try:
elb = lhs.astype(bool)
except AttributeError:
elb = np.array([bool(lhs)])
expected = ~elb
result = pd.eval("~elb", engine=engine, parser=parser)
tm.assert_almost_equal(expected, result)
def test_frame_invert(self, engine, parser):
expr = "~lhs"
# ~ ##
# frame
# float always raises
lhs = DataFrame(np.random.randn(5, 2))
if engine == "numexpr":
msg = "couldn't find matching opcode for 'invert_dd'"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
else:
msg = "ufunc 'invert' not supported for the input types"
with pytest.raises(TypeError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
# int raises on numexpr
lhs = DataFrame(np.random.randint(5, size=(5, 2)))
if engine == "numexpr":
msg = "couldn't find matching opcode for 'invert"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
else:
expect = ~lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
# bool always works
lhs = DataFrame(np.random.rand(5, 2) > 0.5)
expect = ~lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
# object raises
lhs = DataFrame({"b": ["a", 1, 2.0], "c": np.random.rand(3) > 0.5})
if engine == "numexpr":
with pytest.raises(ValueError, match="unknown type object"):
pd.eval(expr, engine=engine, parser=parser)
else:
msg = "bad operand type for unary ~: 'str'"
with pytest.raises(TypeError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
def test_series_invert(self, engine, parser):
# ~ ####
expr = "~lhs"
# series
# float raises
lhs = Series(np.random.randn(5))
if engine == "numexpr":
msg = "couldn't find matching opcode for 'invert_dd'"
with pytest.raises(NotImplementedError, match=msg):
result = pd.eval(expr, engine=engine, parser=parser)
else:
msg = "ufunc 'invert' not supported for the input types"
with pytest.raises(TypeError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
# int raises on numexpr
lhs = Series(np.random.randint(5, size=5))
if engine == "numexpr":
msg = "couldn't find matching opcode for 'invert"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
else:
expect = ~lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
# bool
lhs = Series(np.random.rand(5) > 0.5)
expect = ~lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
# float
# int
# bool
# object
lhs = Series(["a", 1, 2.0])
if engine == "numexpr":
with pytest.raises(ValueError, match="unknown type object"):
pd.eval(expr, engine=engine, parser=parser)
else:
msg = "bad operand type for unary ~: 'str'"
with pytest.raises(TypeError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
def test_frame_negate(self, engine, parser):
expr = "-lhs"
# float
lhs = DataFrame(np.random.randn(5, 2))
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
# int
lhs = DataFrame(np.random.randint(5, size=(5, 2)))
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
# bool doesn't work with numexpr but works elsewhere
lhs = DataFrame(np.random.rand(5, 2) > 0.5)
if engine == "numexpr":
msg = "couldn't find matching opcode for 'neg_bb'"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
else:
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
def test_series_negate(self, engine, parser):
expr = "-lhs"
# float
lhs = Series(np.random.randn(5))
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
# int
lhs = Series(np.random.randint(5, size=5))
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
# bool doesn't work with numexpr but works elsewhere
lhs = Series(np.random.rand(5) > 0.5)
if engine == "numexpr":
msg = "couldn't find matching opcode for 'neg_bb'"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(expr, engine=engine, parser=parser)
else:
expect = -lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
@pytest.mark.parametrize(
"lhs",
[
# Float
DataFrame(np.random.randn(5, 2)),
# Int
DataFrame(np.random.randint(5, size=(5, 2))),
# bool doesn't work with numexpr but works elsewhere
DataFrame(np.random.rand(5, 2) > 0.5),
],
)
def test_frame_pos(self, lhs, engine, parser):
expr = "+lhs"
expect = lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_frame_equal(expect, result)
@pytest.mark.parametrize(
"lhs",
[
# Float
Series(np.random.randn(5)),
# Int
Series(np.random.randint(5, size=5)),
# bool doesn't work with numexpr but works elsewhere
Series(np.random.rand(5) > 0.5),
],
)
def test_series_pos(self, lhs, engine, parser):
expr = "+lhs"
expect = lhs
result = pd.eval(expr, engine=engine, parser=parser)
tm.assert_series_equal(expect, result)
def test_scalar_unary(self, engine, parser):
msg = "bad operand type for unary ~: 'float'"
with pytest.raises(TypeError, match=msg):
pd.eval("~1.0", engine=engine, parser=parser)
assert pd.eval("-1.0", parser=parser, engine=engine) == -1.0
assert pd.eval("+1.0", parser=parser, engine=engine) == +1.0
assert pd.eval("~1", parser=parser, engine=engine) == ~1
assert pd.eval("-1", parser=parser, engine=engine) == -1
assert pd.eval("+1", parser=parser, engine=engine) == +1
assert pd.eval("~True", parser=parser, engine=engine) == ~True
assert pd.eval("~False", parser=parser, engine=engine) == ~False
assert pd.eval("-True", parser=parser, engine=engine) == -True
assert pd.eval("-False", parser=parser, engine=engine) == -False
assert pd.eval("+True", parser=parser, engine=engine) == +True
assert pd.eval("+False", parser=parser, engine=engine) == +False
def test_unary_in_array(self):
# GH 11235
# TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI
# but cannot reproduce locally
result = np.array(
pd.eval(
"[-True, True, ~True, +True,"
"-False, False, ~False, +False,"
"-37, 37, ~37, +37]"
),
dtype=np.object_,
)
expected = np.array(
[
-True,
True,
~True,
+True,
-False,
False,
~False,
+False,
-37,
37,
~37,
+37,
],
dtype=np.object_,
)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"])
def test_float_comparison_bin_op(self, dtype, expr):
# GH 16363
df = DataFrame({"x": np.array([0], dtype=dtype)})
res = df.eval(expr)
assert res.values == np.array([False])
@pytest.mark.parametrize(
"ex",
(
"1 or 2",
"1 and 2",
"a and b",
"a or b",
"1 or 2 and (3 + 2) > 3",
"2 * x > 2 or 1 and 2",
"2 * df > 3 and 1 or a",
),
)
def test_disallow_scalar_bool_ops(self, ex, engine, parser):
x, a, b = np.random.randn(3), 1, 2 # noqa:F841
df = DataFrame(np.random.randn(3, 2)) # noqa:F841
msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(ex, engine=engine, parser=parser)
def test_identical(self, engine, parser):
# see gh-10546
x = 1
result = pd.eval("x", engine=engine, parser=parser)
assert result == 1
assert is_scalar(result)
x = 1.5
result = pd.eval("x", engine=engine, parser=parser)
assert result == 1.5
assert is_scalar(result)
x = False
result = pd.eval("x", engine=engine, parser=parser)
assert not result
assert is_bool(result)
assert is_scalar(result)
x = np.array([1])
result = pd.eval("x", engine=engine, parser=parser)
tm.assert_numpy_array_equal(result, np.array([1]))
assert result.shape == (1,)
x = np.array([1.5])
result = pd.eval("x", engine=engine, parser=parser)
tm.assert_numpy_array_equal(result, np.array([1.5]))
assert result.shape == (1,)
x = np.array([False]) # noqa:F841
result = pd.eval("x", engine=engine, parser=parser)
tm.assert_numpy_array_equal(result, np.array([False]))
assert result.shape == (1,)
def test_line_continuation(self, engine, parser):
# GH 11149
exp = """1 + 2 * \
5 - 1 + 2 """
result = pd.eval(exp, engine=engine, parser=parser)
assert result == 12
def test_float_truncation(self, engine, parser):
# GH 14241
exp = "1000000000.006"
result = pd.eval(exp, engine=engine, parser=parser)
expected = np.float64(exp)
assert result == expected
df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})
cutoff = 1000000000.0006
result = df.query(f"A < {cutoff:.4f}")
assert result.empty
cutoff = 1000000000.0010
result = df.query(f"A > {cutoff:.4f}")
expected = df.loc[[1, 2], :]
tm.assert_frame_equal(expected, result)
exact = 1000000000.0011
result = df.query(f"A == {exact:.4f}")
expected = df.loc[[1], :]
tm.assert_frame_equal(expected, result)
def test_disallow_python_keywords(self):
# GH 18221
df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"])
msg = "Python keyword not valid identifier in numexpr query"
with pytest.raises(SyntaxError, match=msg):
df.query("class == 0")
df = DataFrame()
df.index.name = "lambda"
with pytest.raises(SyntaxError, match=msg):
df.query("lambda == 0")
def test_true_false_logic(self):
# GH 25823
assert pd.eval("not True") == -2
assert pd.eval("not False") == -1
assert pd.eval("True and not True") == 0
def test_and_logic_string_match(self):
# GH 25823
event = Series({"a": "hello"})
assert pd.eval(f"{event.str.match('hello').a}")
assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")
f = lambda *args, **kwargs: np.random.randn()
# -------------------------------------
# gh-12388: Typecasting rules consistency with python
class TestTypeCasting:
@pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"])
# maybe someday... numexpr has too many upcasting rules now
# chain(*(np.sctypes[x] for x in ['uint', 'int', 'float']))
@pytest.mark.parametrize("dt", [np.float32, np.float64])
@pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")])
def test_binop_typecasting(self, engine, parser, op, dt, left_right):
df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt)
left, right = left_right
s = f"{left} {op} {right}"
res = pd.eval(s, engine=engine, parser=parser)
assert df.values.dtype == dt
assert res.values.dtype == dt
tm.assert_frame_equal(res, eval(s))
# -------------------------------------
# Basic and complex alignment
def should_warn(*args):
not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args))
only_one_dt = reduce(
operator.xor, map(lambda x: issubclass(x.dtype.type, np.datetime64), args)
)
return not_mono and only_one_dt
class TestAlignment:
index_types = ["i", "s", "dt"]
lhs_index_types = index_types + ["s"] # 'p'
def test_align_nested_unary_op(self, engine, parser):
s = "df * ~2"
df = tm.makeCustomDataframe(5, 3, data_gen_f=f)
res = pd.eval(s, engine=engine, parser=parser)
tm.assert_frame_equal(res, df * ~2)
@pytest.mark.parametrize("lr_idx_type", lhs_index_types)
@pytest.mark.parametrize("rr_idx_type", index_types)
@pytest.mark.parametrize("c_idx_type", index_types)
def test_basic_frame_alignment(
self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type
):
with warnings.catch_warnings(record=True):
warnings.simplefilter("always", RuntimeWarning)
df = tm.makeCustomDataframe(
10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type
)
df2 = tm.makeCustomDataframe(
20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type
)
# only warns if not monotonic and not sortable
if should_warn(df.index, df2.index):
with tm.assert_produces_warning(RuntimeWarning):
res = pd.eval("df + df2", engine=engine, parser=parser)
else:
res = pd.eval("df + df2", engine=engine, parser=parser)
tm.assert_frame_equal(res, df + df2)
@pytest.mark.parametrize("r_idx_type", lhs_index_types)
@pytest.mark.parametrize("c_idx_type", lhs_index_types)
def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type):
df = tm.makeCustomDataframe(
10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
)
res = pd.eval("df < 2", engine=engine, parser=parser)
tm.assert_frame_equal(res, df < 2)
df3 = DataFrame(np.random.randn(*df.shape), index=df.index, columns=df.columns)
res = pd.eval("df < df3", engine=engine, parser=parser)
tm.assert_frame_equal(res, df < df3)
@pytest.mark.parametrize("r1", lhs_index_types)
@pytest.mark.parametrize("c1", index_types)
@pytest.mark.parametrize("r2", index_types)
@pytest.mark.parametrize("c2", index_types)
def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2):
with warnings.catch_warnings(record=True):
warnings.simplefilter("always", RuntimeWarning)
df = tm.makeCustomDataframe(
3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1
)
df2 = tm.makeCustomDataframe(
4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
)
df3 = tm.makeCustomDataframe(
5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
)
if should_warn(df.index, df2.index, df3.index):
with tm.assert_produces_warning(RuntimeWarning):
res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
else:
res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
tm.assert_frame_equal(res, df + df2 + df3)
@pytest.mark.parametrize("index_name", ["index", "columns"])
@pytest.mark.parametrize("c_idx_type", index_types)
@pytest.mark.parametrize("r_idx_type", lhs_index_types)
def test_basic_frame_series_alignment(
self, engine, parser, index_name, r_idx_type, c_idx_type
):
with warnings.catch_warnings(record=True):
warnings.simplefilter("always", RuntimeWarning)
df = tm.makeCustomDataframe(
10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
)
index = getattr(df, index_name)
s = Series(np.random.randn(5), index[:5])
if should_warn(df.index, s.index):
with tm.assert_produces_warning(RuntimeWarning):
res = pd.eval("df + s", engine=engine, parser=parser)
else:
res = pd.eval("df + s", engine=engine, parser=parser)
if r_idx_type == "dt" or c_idx_type == "dt":
expected = df.add(s) if engine == "numexpr" else df + s
else:
expected = df + s
tm.assert_frame_equal(res, expected)
@pytest.mark.parametrize("index_name", ["index", "columns"])
@pytest.mark.parametrize(
"r_idx_type, c_idx_type",
list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")],
)
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
def test_basic_series_frame_alignment(
self, request, engine, parser, index_name, r_idx_type, c_idx_type
):
if (
engine == "numexpr"
and parser == "pandas"
and index_name == "index"
and r_idx_type == "i"
and c_idx_type == "s"
):
reason = (
f"Flaky column ordering when engine={engine}, "
f"parser={parser}, index_name={index_name}, "
f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}"
)
request.node.add_marker(pytest.mark.xfail(reason=reason, strict=False))
df = tm.makeCustomDataframe(
10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
)
index = getattr(df, index_name)
s = Series(np.random.randn(5), index[:5])
if should_warn(s.index, df.index):
with tm.assert_produces_warning(RuntimeWarning):
res = pd.eval("s + df", engine=engine, parser=parser)
else:
res = pd.eval("s + df", engine=engine, parser=parser)
if r_idx_type == "dt" or c_idx_type == "dt":
expected = df.add(s) if engine == "numexpr" else s + df
else:
expected = s + df
tm.assert_frame_equal(res, expected)
@pytest.mark.parametrize("c_idx_type", index_types)
@pytest.mark.parametrize("r_idx_type", lhs_index_types)
@pytest.mark.parametrize("index_name", ["index", "columns"])
@pytest.mark.parametrize("op", ["+", "*"])
def test_series_frame_commutativity(
self, engine, parser, index_name, op, r_idx_type, c_idx_type
):
with warnings.catch_warnings(record=True):
warnings.simplefilter("always", RuntimeWarning)
df = tm.makeCustomDataframe(
10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
)
index = getattr(df, index_name)
s = Series(np.random.randn(5), index[:5])
lhs = f"s {op} df"
rhs = f"df {op} s"
if should_warn(df.index, s.index):
with tm.assert_produces_warning(RuntimeWarning):
a = pd.eval(lhs, engine=engine, parser=parser)
with tm.assert_produces_warning(RuntimeWarning):
b = pd.eval(rhs, engine=engine, parser=parser)
else:
a = pd.eval(lhs, engine=engine, parser=parser)
b = pd.eval(rhs, engine=engine, parser=parser)
if r_idx_type != "dt" and c_idx_type != "dt":
if engine == "numexpr":
tm.assert_frame_equal(a, b)
@pytest.mark.parametrize("r1", lhs_index_types)
@pytest.mark.parametrize("c1", index_types)
@pytest.mark.parametrize("r2", index_types)
@pytest.mark.parametrize("c2", index_types)
def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2):
n = 3
m1 = 5
m2 = 2 * m1
with warnings.catch_warnings(record=True):
warnings.simplefilter("always", RuntimeWarning)
index_name = random.choice(["index", "columns"])
obj_name = random.choice(["df", "df2"])
df = tm.makeCustomDataframe(
m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1
)
df2 = tm.makeCustomDataframe(
m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
)
index = getattr(locals().get(obj_name), index_name)
ser = Series(np.random.randn(n), index[:n])
if r2 == "dt" or c2 == "dt":
if engine == "numexpr":
expected2 = df2.add(ser)
else:
expected2 = df2 + ser
else:
expected2 = df2 + ser
if r1 == "dt" or c1 == "dt":
if engine == "numexpr":
expected = expected2.add(df)
else:
expected = expected2 + df
else:
expected = expected2 + df
if should_warn(df2.index, ser.index, df.index):
with tm.assert_produces_warning(RuntimeWarning):
res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
else:
res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
assert res.shape == expected.shape
tm.assert_frame_equal(res, expected)
def test_performance_warning_for_poor_alignment(self, engine, parser):
df = DataFrame(np.random.randn(1000, 10))
s = Series(np.random.randn(10000))
if engine == "numexpr":
seen = PerformanceWarning
else:
seen = False
with tm.assert_produces_warning(seen):
pd.eval("df + s", engine=engine, parser=parser)
s = Series(np.random.randn(1000))
with tm.assert_produces_warning(False):
pd.eval("df + s", engine=engine, parser=parser)
df = DataFrame(np.random.randn(10, 10000))
s = Series(np.random.randn(10000))
with tm.assert_produces_warning(False):
pd.eval("df + s", engine=engine, parser=parser)
df = DataFrame(np.random.randn(10, 10))
s = Series(np.random.randn(10000))
is_python_engine = engine == "python"
if not is_python_engine:
wrn = PerformanceWarning
else:
wrn = False
with tm.assert_produces_warning(wrn) as w:
pd.eval("df + s", engine=engine, parser=parser)
if not is_python_engine:
assert len(w) == 1
msg = str(w[0].message)
logged = np.log10(s.size - df.shape[1])
expected = (
f"Alignment difference on axis 1 is larger "
f"than an order of magnitude on term 'df', "
f"by more than {logged:.4g}; performance may suffer."
)
assert msg == expected
# ------------------------------------
# Slightly more complex ops
class TestOperations:
def eval(self, *args, **kwargs):
kwargs["level"] = kwargs.pop("level", 0) + 1
return pd.eval(*args, **kwargs)
def test_simple_arith_ops(self, engine, parser):
exclude_arith = []
if parser == "python":
exclude_arith = ["in", "not in"]
arith_ops = [
op
for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS
if op not in exclude_arith
]
ops = (op for op in arith_ops if op != "//")
for op in ops:
ex = f"1 {op} 1"
ex2 = f"x {op} 1"
ex3 = f"1 {op} (x + 1)"
if op in ("in", "not in"):
msg = "argument of type 'int' is not iterable"
with pytest.raises(TypeError, match=msg):
pd.eval(ex, engine=engine, parser=parser)
else:
expec = _eval_single_bin(1, op, 1, engine)
x = self.eval(ex, engine=engine, parser=parser)
assert x == expec
expec = _eval_single_bin(x, op, 1, engine)
y = self.eval(ex2, local_dict={"x": x}, engine=engine, parser=parser)
assert y == expec
expec = _eval_single_bin(1, op, x + 1, engine)
y = self.eval(ex3, local_dict={"x": x}, engine=engine, parser=parser)
assert y == expec
@pytest.mark.parametrize("rhs", [True, False])
@pytest.mark.parametrize("lhs", [True, False])
@pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
def test_simple_bool_ops(self, rhs, lhs, op):
ex = f"{lhs} {op} {rhs}"
if parser == "python" and op in ["and", "or"]:
msg = "'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
self.eval(ex)
return
res = self.eval(ex)
exp = eval(ex)
assert res == exp
@pytest.mark.parametrize("rhs", [True, False])
@pytest.mark.parametrize("lhs", [True, False])
@pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
def test_bool_ops_with_constants(self, rhs, lhs, op):
ex = f"{lhs} {op} {rhs}"
if parser == "python" and op in ["and", "or"]:
msg = "'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
self.eval(ex)
return
res = self.eval(ex)
exp = eval(ex)
assert res == exp
def test_4d_ndarray_fails(self):
x = np.random.randn(3, 4, 5, 6)
y = Series(np.random.randn(10))
msg = "N-dimensional objects, where N > 2, are not supported with eval"
with pytest.raises(NotImplementedError, match=msg):
self.eval("x + y", local_dict={"x": x, "y": y})
def test_constant(self):
x = self.eval("1")
assert x == 1
def test_single_variable(self):
df = DataFrame(np.random.randn(10, 2))
df2 = self.eval("df", local_dict={"df": df})
tm.assert_frame_equal(df, df2)
def test_truediv(self):
s = np.array([1]) # noqa:F841
ex = "s / 1"
# FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be
# removed in a future version.
with tm.assert_produces_warning(FutureWarning):
res = self.eval(ex, truediv=False)
tm.assert_numpy_array_equal(res, np.array([1.0]))
with tm.assert_produces_warning(FutureWarning):
res = self.eval(ex, truediv=True)
tm.assert_numpy_array_equal(res, np.array([1.0]))
with tm.assert_produces_warning(FutureWarning):
res = self.eval("1 / 2", truediv=True)
expec = 0.5
assert res == expec
with tm.assert_produces_warning(FutureWarning):
res = self.eval("1 / 2", truediv=False)
expec = 0.5
assert res == expec
with tm.assert_produces_warning(FutureWarning):
res = self.eval("s / 2", truediv=False)
expec = 0.5
assert res == expec
with tm.assert_produces_warning(FutureWarning):
res = self.eval("s / 2", truediv=True)
expec = 0.5
assert res == expec
def test_failing_subscript_with_name_error(self):
df = DataFrame(np.random.randn(5, 3)) # noqa:F841
with pytest.raises(NameError, match="name 'x' is not defined"):
self.eval("df[x > 2] > 2")
def test_lhs_expression_subscript(self):
df = DataFrame(np.random.randn(5, 3))
result = self.eval("(df + 1)[df > 2]", local_dict={"df": df})
expected = (df + 1)[df > 2]
tm.assert_frame_equal(result, expected)
def test_attr_expression(self):
df = DataFrame(np.random.randn(5, 3), columns=list("abc"))
expr1 = "df.a < df.b"
expec1 = df.a < df.b
expr2 = "df.a + df.b + df.c"
expec2 = df.a + df.b + df.c
expr3 = "df.a + df.b + df.c[df.b < 0]"
expec3 = df.a + df.b + df.c[df.b < 0]
exprs = expr1, expr2, expr3
expecs = expec1, expec2, expec3
for e, expec in zip(exprs, expecs):
tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df}))
def test_assignment_fails(self):
df = DataFrame(np.random.randn(5, 3), columns=list("abc"))
df2 = DataFrame(np.random.randn(5, 3))
expr1 = "df = df2"
msg = "cannot assign without a target object"
with pytest.raises(ValueError, match=msg):
self.eval(expr1, local_dict={"df": df, "df2": df2})
def test_assignment_column_multiple_raise(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# multiple assignees
with pytest.raises(SyntaxError, match="invalid syntax"):
df.eval("d c = a + b")
def test_assignment_column_invalid_assign(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# invalid assignees
msg = "left hand side of an assignment must be a single name"
with pytest.raises(SyntaxError, match=msg):
df.eval("d,c = a + b")
def test_assignment_column_invalid_assign_function_call(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
msg = "cannot assign to function call"
with pytest.raises(SyntaxError, match=msg):
df.eval('Timestamp("20131001") = a + b')
def test_assignment_single_assign_existing(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# single assignment - existing variable
expected = df.copy()
expected["a"] = expected["a"] + expected["b"]
df.eval("a = a + b", inplace=True)
tm.assert_frame_equal(df, expected)
def test_assignment_single_assign_new(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# single assignment - new variable
expected = df.copy()
expected["c"] = expected["a"] + expected["b"]
df.eval("c = a + b", inplace=True)
tm.assert_frame_equal(df, expected)
def test_assignment_single_assign_local_overlap(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
df = df.copy()
a = 1 # noqa:F841
df.eval("a = 1 + b", inplace=True)
expected = df.copy()
expected["a"] = 1 + expected["b"]
tm.assert_frame_equal(df, expected)
def test_assignment_single_assign_name(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
a = 1 # noqa:F841
old_a = df.a.copy()
df.eval("a = a + b", inplace=True)
result = old_a + df.b
tm.assert_series_equal(result, df.a, check_names=False)
assert result.name is None
def test_assignment_multiple_raises(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# multiple assignment
df.eval("c = a + b", inplace=True)
msg = "can only assign a single expression"
with pytest.raises(SyntaxError, match=msg):
df.eval("c = a = b")
def test_assignment_explicit(self):
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
# explicit targets
self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True)
expected = df.copy()
expected["c"] = expected["a"] + expected["b"]
tm.assert_frame_equal(df, expected)
def test_column_in(self):
# GH 11235
df = DataFrame({"a": [11], "b": [-32]})
result = df.eval("a in [11, -32]")
expected = Series([True])
# TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
# but cannot reproduce locally
tm.assert_series_equal(result, expected, check_names=False)
@pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
def test_assignment_not_inplace(self):
# see gh-9297
df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
actual = df.eval("c = a + b", inplace=False)
assert actual is not None
expected = df.copy()
expected["c"] = expected["a"] + expected["b"]
tm.assert_frame_equal(df, expected)
def test_multi_line_expression(self):
# GH 11149
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
expected = df.copy()
expected["c"] = expected["a"] + expected["b"]
expected["d"] = expected["c"] + expected["b"]
answer = df.eval(
"""
c = a + b
d = c + b""",
inplace=True,
)
tm.assert_frame_equal(expected, df)
assert answer is None
expected["a"] = expected["a"] - 1
expected["e"] = expected["a"] + 2
answer = df.eval(
"""
a = a - 1
e = a + 2""",
inplace=True,
)
tm.assert_frame_equal(expected, df)
assert answer is None
# multi-line not valid if not all assignments
msg = "Multi-line expressions are only valid if all expressions contain"
with pytest.raises(ValueError, match=msg):
df.eval(
"""
a = b + 2
b - 2""",
inplace=False,
)
def test_multi_line_expression_not_inplace(self):
# GH 11149
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
expected = df.copy()
expected["c"] = expected["a"] + expected["b"]
expected["d"] = expected["c"] + expected["b"]
df = df.eval(
"""
c = a + b
d = c + b""",
inplace=False,
)
tm.assert_frame_equal(expected, df)
expected["a"] = expected["a"] - 1
expected["e"] = expected["a"] + 2
df = df.eval(
"""
a = a - 1
e = a + 2""",
inplace=False,
)
tm.assert_frame_equal(expected, df)
def test_multi_line_expression_local_variable(self):
# GH 15342
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
expected = df.copy()
local_var = 7
expected["c"] = expected["a"] * local_var
expected["d"] = expected["c"] + local_var
answer = df.eval(
"""
c = a * @local_var
d = c + @local_var
""",
inplace=True,
)
tm.assert_frame_equal(expected, df)
assert answer is None
def test_multi_line_expression_callable_local_variable(self):
# 26426
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
def local_func(a, b):
return b
expected = df.copy()
expected["c"] = expected["a"] * local_func(1, 7)
expected["d"] = expected["c"] + local_func(1, 7)
answer = df.eval(
"""
c = a * @local_func(1, 7)
d = c + @local_func(1, 7)
""",
inplace=True,
)
tm.assert_frame_equal(expected, df)
assert answer is None
def test_multi_line_expression_callable_local_variable_with_kwargs(self):
# 26426
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
def local_func(a, b):
return b
expected = df.copy()
expected["c"] = expected["a"] * local_func(b=7, a=1)
expected["d"] = expected["c"] + local_func(b=7, a=1)
answer = df.eval(
"""
c = a * @local_func(b=7, a=1)
d = c + @local_func(b=7, a=1)
""",
inplace=True,
)
tm.assert_frame_equal(expected, df)
assert answer is None
def test_assignment_in_query(self):
# GH 8664
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_orig = df.copy()
msg = "cannot assign without a target object"
with pytest.raises(ValueError, match=msg):
df.query("a = 1")
tm.assert_frame_equal(df, df_orig)
def test_query_inplace(self):
# see gh-11149
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
expected = df.copy()
expected = expected[expected["a"] == 2]
df.query("a == 2", inplace=True)
tm.assert_frame_equal(expected, df)
df = {}
expected = {"a": 3}
self.eval("a = 1 + 2", target=df, inplace=True)
tm.assert_dict_equal(df, expected)
@pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)])
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_cannot_item_assign(self, invalid_target):
msg = "Cannot assign expression output to target"
expression = "a = 1 + 2"
with pytest.raises(ValueError, match=msg):
self.eval(expression, target=invalid_target, inplace=True)
if hasattr(invalid_target, "copy"):
with pytest.raises(ValueError, match=msg):
self.eval(expression, target=invalid_target, inplace=False)
@pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)])
def test_cannot_copy_item(self, invalid_target):
msg = "Cannot return a copy of the target"
expression = "a = 1 + 2"
with pytest.raises(ValueError, match=msg):
self.eval(expression, target=invalid_target, inplace=False)
@pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}])
def test_inplace_no_assignment(self, target):
expression = "1 + 2"
assert self.eval(expression, target=target, inplace=False) == 3
msg = "Cannot operate inplace if there is no assignment"
with pytest.raises(ValueError, match=msg):
self.eval(expression, target=target, inplace=True)
def test_basic_period_index_boolean_expression(self):
df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
e = df < 2
r = self.eval("df < 2", local_dict={"df": df})
x = df < 2
tm.assert_frame_equal(r, e)
tm.assert_frame_equal(x, e)
def test_basic_period_index_subscript_expression(self):
df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
r = self.eval("df[df < 2 + 3]", local_dict={"df": df})
e = df[df < 2 + 3]
tm.assert_frame_equal(r, e)
def test_nested_period_index_subscript_expression(self):
df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df})
e = df[df[df < 2] < 2] + df * 2
tm.assert_frame_equal(r, e)
def test_date_boolean(self, engine, parser):
df = DataFrame(np.random.randn(5, 3))
df["dates1"] = date_range("1/1/2012", periods=5)
res = self.eval(
"df.dates1 < 20130101",
local_dict={"df": df},
engine=engine,
parser=parser,
)
expec = df.dates1 < "20130101"
tm.assert_series_equal(res, expec, check_names=False)
def test_simple_in_ops(self, engine, parser):
if parser != "python":
res = pd.eval("1 in [1, 2]", engine=engine, parser=parser)
assert res
res = pd.eval("2 in (1, 2)", engine=engine, parser=parser)
assert res
res = pd.eval("3 in (1, 2)", engine=engine, parser=parser)
assert not res
res = pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
assert res
res = pd.eval("[3] not in (1, 2)", engine=engine, parser=parser)
assert res
res = pd.eval("[3] in ([3], 2)", engine=engine, parser=parser)
assert res
res = pd.eval("[[3]] in [[[3]], 2]", engine=engine, parser=parser)
assert res
res = pd.eval("(3,) in [(3,), 2]", engine=engine, parser=parser)
assert res
res = pd.eval("(3,) not in [(3,), 2]", engine=engine, parser=parser)
assert not res
res = pd.eval("[(3,)] in [[(3,)], 2]", engine=engine, parser=parser)
assert res
else:
msg = "'In' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
pd.eval("1 in [1, 2]", engine=engine, parser=parser)
with pytest.raises(NotImplementedError, match=msg):
pd.eval("2 in (1, 2)", engine=engine, parser=parser)
with pytest.raises(NotImplementedError, match=msg):
pd.eval("3 in (1, 2)", engine=engine, parser=parser)
with pytest.raises(NotImplementedError, match=msg):
pd.eval("[(3,)] in (1, 2, [(3,)])", engine=engine, parser=parser)
msg = "'NotIn' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
with pytest.raises(NotImplementedError, match=msg):
pd.eval("[3] not in (1, 2, [[3]])", engine=engine, parser=parser)
def test_check_many_exprs(self, engine, parser):
a = 1 # noqa:F841
expr = " * ".join("a" * 33)
expected = 1
res = pd.eval(expr, engine=engine, parser=parser)
assert res == expected
@pytest.mark.parametrize(
"expr",
[
"df > 2 and df > 3",
"df > 2 or df > 3",
"not df > 2",
],
)
def test_fails_and_or_not(self, expr, engine, parser):
df = DataFrame(np.random.randn(5, 3))
if parser == "python":
msg = "'BoolOp' nodes are not implemented"
if "not" in expr:
msg = "'Not' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(
expr,
local_dict={"df": df},
parser=parser,
engine=engine,
)
else:
# smoke-test, should not raise
pd.eval(
expr,
local_dict={"df": df},
parser=parser,
engine=engine,
)
@pytest.mark.parametrize("char", ["|", "&"])
def test_fails_ampersand_pipe(self, char, engine, parser):
df = DataFrame(np.random.randn(5, 3)) # noqa:F841
ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)"
if parser == "python":
msg = "cannot evaluate scalar only bool ops"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(ex, parser=parser, engine=engine)
else:
# smoke-test, should not raise
pd.eval(ex, parser=parser, engine=engine)
class TestMath:
def eval(self, *args, **kwargs):
kwargs["level"] = kwargs.pop("level", 0) + 1
return pd.eval(*args, **kwargs)
def test_unary_functions(self, unary_fns_for_ne):
df = DataFrame({"a": np.random.randn(10)})
a = df.a
fn = unary_fns_for_ne
expr = f"{fn}(a)"
got = self.eval(expr)
with np.errstate(all="ignore"):
expect = getattr(np, fn)(a)
tm.assert_series_equal(got, expect, check_names=False)
@pytest.mark.parametrize("fn", _binary_math_ops)
def test_binary_functions(self, fn):
df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
a = df.a
b = df.b
expr = f"{fn}(a, b)"
got = self.eval(expr)
with np.errstate(all="ignore"):
expect = getattr(np, fn)(a, b)
tm.assert_almost_equal(got, expect, check_names=False)
def test_df_use_case(self, engine, parser):
df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
df.eval(
"e = arctan2(sin(a), b)",
engine=engine,
parser=parser,
inplace=True,
)
got = df.e
expect = np.arctan2(np.sin(df.a), df.b)
tm.assert_series_equal(got, expect, check_names=False)
def test_df_arithmetic_subexpression(self, engine, parser):
df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
got = df.e
expect = np.sin(df.a + df.b)
tm.assert_series_equal(got, expect, check_names=False)
@pytest.mark.parametrize(
"dtype, expect_dtype",
[
(np.int32, np.float64),
(np.int64, np.float64),
(np.float32, np.float32),
(np.float64, np.float64),
pytest.param(np.complex128, np.complex128, marks=td.skip_if_windows),
],
)
def test_result_types(self, dtype, expect_dtype, engine, parser):
# xref https://github.com/pandas-dev/pandas/issues/12293
# this fails on Windows, apparently a floating point precision issue
# Did not test complex64 because DataFrame is converting it to
# complex128. Due to https://github.com/pandas-dev/pandas/issues/10952
df = DataFrame({"a": np.random.randn(10).astype(dtype)})
assert df.a.dtype == dtype
df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
got = df.b
expect = np.sin(df.a)
assert expect.dtype == got.dtype
assert expect_dtype == got.dtype
tm.assert_series_equal(got, expect, check_names=False)
def test_undefined_func(self, engine, parser):
df = DataFrame({"a": np.random.randn(10)})
msg = '"mysin" is not a supported function'
with pytest.raises(ValueError, match=msg):
df.eval("mysin(a)", engine=engine, parser=parser)
def test_keyword_arg(self, engine, parser):
df = DataFrame({"a": np.random.randn(10)})
msg = 'Function "sin" does not support keyword arguments'
with pytest.raises(TypeError, match=msg):
df.eval("sin(x=a)", engine=engine, parser=parser)
_var_s = np.random.randn(10)
class TestScope:
def test_global_scope(self, engine, parser):
e = "_var_s * 2"
tm.assert_numpy_array_equal(
_var_s * 2, pd.eval(e, engine=engine, parser=parser)
)
def test_no_new_locals(self, engine, parser):
x = 1
lcls = locals().copy()
pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser)
lcls2 = locals().copy()
lcls2.pop("lcls")
assert lcls == lcls2
def test_no_new_globals(self, engine, parser):
x = 1 # noqa:F841
gbls = globals().copy()
pd.eval("x + 1", engine=engine, parser=parser)
gbls2 = globals().copy()
assert gbls == gbls2
def test_empty_locals(self, engine, parser):
# GH 47084
x = 1 # noqa: F841
msg = "name 'x' is not defined"
with pytest.raises(UndefinedVariableError, match=msg):
pd.eval("x + 1", engine=engine, parser=parser, local_dict={})
def test_empty_globals(self, engine, parser):
# GH 47084
msg = "name '_var_s' is not defined"
e = "_var_s * 2"
with pytest.raises(UndefinedVariableError, match=msg):
pd.eval(e, engine=engine, parser=parser, global_dict={})
@td.skip_if_no_ne
def test_invalid_engine():
msg = "Invalid engine 'asdf' passed"
with pytest.raises(KeyError, match=msg):
pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf")
@td.skip_if_no_ne
@pytest.mark.parametrize(
("use_numexpr", "expected"),
(
(True, "numexpr"),
(False, "python"),
),
)
def test_numexpr_option_respected(use_numexpr, expected):
# GH 32556
from pandas.core.computation.eval import _check_engine
with pd.option_context("compute.use_numexpr", use_numexpr):
result = _check_engine(None)
assert result == expected
@td.skip_if_no_ne
def test_numexpr_option_incompatible_op():
# GH 32556
with pd.option_context("compute.use_numexpr", False):
df = DataFrame(
{"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
)
result = df.query("A.isnull()")
expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
tm.assert_frame_equal(result, expected)
@td.skip_if_no_ne
def test_invalid_parser():
msg = "Invalid parser 'asdf' passed"
with pytest.raises(KeyError, match=msg):
pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf")
_parsers: dict[str, type[BaseExprVisitor]] = {
"python": PythonExprVisitor,
"pytables": pytables.PyTablesExprVisitor,
"pandas": PandasExprVisitor,
}
@pytest.mark.parametrize("engine", ENGINES)
@pytest.mark.parametrize("parser", _parsers)
def test_disallowed_nodes(engine, parser):
VisitorClass = _parsers[parser]
inst = VisitorClass("x + 1", engine, parser)
for ops in VisitorClass.unsupported_nodes:
msg = "nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
getattr(inst, ops)()
def test_syntax_error_exprs(engine, parser):
e = "s +"
with pytest.raises(SyntaxError, match="invalid syntax"):
pd.eval(e, engine=engine, parser=parser)
def test_name_error_exprs(engine, parser):
e = "s + t"
msg = "name 's' is not defined"
with pytest.raises(NameError, match=msg):
pd.eval(e, engine=engine, parser=parser)
@pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"])
def test_invalid_local_variable_reference(engine, parser, express):
a, b = 1, 2 # noqa:F841
if parser != "pandas":
with pytest.raises(SyntaxError, match="The '@' prefix is only"):
pd.eval(express, engine=engine, parser=parser)
else:
with pytest.raises(SyntaxError, match="The '@' prefix is not"):
pd.eval(express, engine=engine, parser=parser)
def test_numexpr_builtin_raises(engine, parser):
sin, dotted_line = 1, 2
if engine == "numexpr":
msg = "Variables in expression .+"
with pytest.raises(NumExprClobberingError, match=msg):
pd.eval("sin + dotted_line", engine=engine, parser=parser)
else:
res = pd.eval("sin + dotted_line", engine=engine, parser=parser)
assert res == sin + dotted_line
def test_bad_resolver_raises(engine, parser):
cannot_resolve = 42, 3.0
with pytest.raises(TypeError, match="Resolver of type .+"):
pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser)
def test_empty_string_raises(engine, parser):
# GH 13139
with pytest.raises(ValueError, match="expr cannot be an empty string"):
pd.eval("", engine=engine, parser=parser)
def test_more_than_one_expression_raises(engine, parser):
with pytest.raises(SyntaxError, match="only a single expression is allowed"):
pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser)
@pytest.mark.parametrize("cmp", ("and", "or"))
@pytest.mark.parametrize("lhs", (int, float))
@pytest.mark.parametrize("rhs", (int, float))
def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
gen = {int: lambda: np.random.randint(10), float: np.random.randn}
mid = gen[lhs]() # noqa:F841
lhs = gen[lhs]()
rhs = gen[rhs]()
ex1 = f"lhs {cmp} mid {cmp} rhs"
ex2 = f"lhs {cmp} mid and mid {cmp} rhs"
ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)"
for ex in (ex1, ex2, ex3):
msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
with pytest.raises(NotImplementedError, match=msg):
pd.eval(ex, engine=engine, parser=parser)
@pytest.mark.parametrize(
"other",
[
"'x'",
"...",
],
)
def test_equals_various(other):
df = DataFrame({"A": ["a", "b", "c"]})
result = df.eval(f"A == {other}")
expected = Series([False, False, False], name="A")
if USE_NUMEXPR:
# https://github.com/pandas-dev/pandas/issues/10239
# lose name with numexpr engine. Remove when that's fixed.
expected.name = None
tm.assert_series_equal(result, expected)
def test_inf(engine, parser):
s = "inf + 1"
expected = np.inf
result = pd.eval(s, engine=engine, parser=parser)
assert result == expected
def test_truediv_deprecated(engine, parser):
# GH#29182
match = "The `truediv` parameter in pd.eval is deprecated"
with tm.assert_produces_warning(FutureWarning) as m:
pd.eval("1+1", engine=engine, parser=parser, truediv=True)
assert len(m) == 1
assert match in str(m[0].message)
with tm.assert_produces_warning(FutureWarning) as m:
pd.eval("1+1", engine=engine, parser=parser, truediv=False)
assert len(m) == 1
assert match in str(m[0].message)
@pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"])
def test_query_token(engine, column):
# See: https://github.com/pandas-dev/pandas/pull/42826
df = DataFrame(np.random.randn(5, 2), columns=[column, "b"])
expected = df[df[column] > 5]
query_string = f"`{column}` > 5"
result = df.query(query_string, engine=engine)
tm.assert_frame_equal(result, expected)
def test_negate_lt_eq_le(engine, parser):
df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"])
expected = df[~(df.cat > 0)]
result = df.query("~(cat > 0)", engine=engine, parser=parser)
tm.assert_frame_equal(result, expected)
if parser == "python":
msg = "'Not' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
df.query("not (cat > 0)", engine=engine, parser=parser)
else:
result = df.query("not (cat > 0)", engine=engine, parser=parser)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"column",
DEFAULT_GLOBALS.keys(),
)
def test_eval_no_support_column_name(request, column):
# GH 44603
if column in ["True", "False", "inf", "Inf"]:
request.node.add_marker(
pytest.mark.xfail(
raises=KeyError,
reason=f"GH 47859 DataFrame eval not supported with {column}",
)
)
df = DataFrame(np.random.randint(0, 100, size=(10, 2)), columns=[column, "col1"])
expected = df[df[column] > 6]
result = df.query(f"{column}>6")
tm.assert_frame_equal(result, expected)
@td.skip_array_manager_not_yet_implemented
def test_set_inplace(using_copy_on_write):
# https://github.com/pandas-dev/pandas/issues/47449
# Ensure we don't only update the DataFrame inplace, but also the actual
# column values, such that references to this column also get updated
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
result_view = df[:]
ser = df["A"]
df.eval("A = B + C", inplace=True)
expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]})
tm.assert_frame_equal(df, expected)
if not using_copy_on_write:
tm.assert_series_equal(ser, expected["A"])
tm.assert_series_equal(result_view["A"], expected["A"])
else:
expected = Series([1, 2, 3], name="A")
tm.assert_series_equal(ser, expected)
tm.assert_series_equal(result_view["A"], expected)
class TestValidate:
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
def test_validate_bool_args(self, value):
msg = 'For argument "inplace" expected type bool, received type'
with pytest.raises(ValueError, match=msg):
pd.eval("2+2", inplace=value)