ai-content-maker/.venv/Lib/site-packages/pandas/tests/frame/methods/test_diff.py

import numpy as np
import pytest

import pandas as pd
from pandas import (
    DataFrame,
    Series,
    Timestamp,
    date_range,
)
import pandas._testing as tm


class TestDataFrameDiff:
    def test_diff_requires_integer(self):
        df = DataFrame(np.random.randn(2, 2))
        with pytest.raises(ValueError, match="periods must be an integer"):
            df.diff(1.5)

    # GH#44572 np.int64 is accepted
    @pytest.mark.parametrize("num", [1, np.int64(1)])
    def test_diff(self, datetime_frame, num):
        df = datetime_frame
        the_diff = df.diff(num)

        expected = df["A"] - df["A"].shift(num)
        tm.assert_series_equal(the_diff["A"], expected)

    def test_diff_int_dtype(self):
        # int dtype
        a = 10_000_000_000_000_000
        b = a + 1
        ser = Series([a, b])

        rs = DataFrame({"s": ser}).diff()
        assert rs.s[1] == 1

    def test_diff_mixed_numeric(self, datetime_frame):
        # mixed numeric
        tf = datetime_frame.astype("float32")
        the_diff = tf.diff(1)
        tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1))

    def test_diff_axis1_nonconsolidated(self):
        # GH#10907
        df = DataFrame({"y": Series([2]), "z": Series([3])})
        df.insert(0, "x", 1)
        result = df.diff(axis=1)
        expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)})
        tm.assert_frame_equal(result, expected)

    def test_diff_timedelta64_with_nat(self):
        # GH#32441
        arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]")
        arr[:, 0] = np.timedelta64("NaT", "ns")

        df = DataFrame(arr)
        result = df.diff(1, axis=0)

        expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]})
        tm.assert_equal(result, expected)

        result = df.diff(0)
        expected = df - df
        assert expected[0].isna().all()
        tm.assert_equal(result, expected)

        result = df.diff(-1, axis=1)
        expected = df * np.nan
        tm.assert_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_diff_datetime_axis0_with_nat(self, tz):
        # GH#32441
        dti = pd.DatetimeIndex(["NaT", "2019-01-01", "2019-01-02"], tz=tz)
        ser = Series(dti)

        df = ser.to_frame()

        result = df.diff()
        ex_index = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta(days=1)])
        expected = Series(ex_index).to_frame()
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz", [pytest.param(None, marks=pytest.mark.xfail), "UTC"])
    def test_diff_datetime_with_nat_zero_periods(self, tz):
        # diff on NaT values should give NaT, not timedelta64(0)
        dti = date_range("2016-01-01", periods=4, tz=tz)
        ser = Series(dti)
        df = ser.to_frame()

        df[1] = ser.copy()

        with tm.assert_produces_warning(None):
            df.iloc[:, 0] = pd.NaT

        expected = df - df
        assert expected[0].isna().all()

        result = df.diff(0, axis=0)
        tm.assert_frame_equal(result, expected)

        result = df.diff(0, axis=1)
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_diff_datetime_axis0(self, tz):
        # GH#18578
        df = DataFrame(
            {
                0: date_range("2010", freq="D", periods=2, tz=tz),
                1: date_range("2010", freq="D", periods=2, tz=tz),
            }
        )

        result = df.diff(axis=0)
        expected = DataFrame(
            {
                0: pd.TimedeltaIndex(["NaT", "1 days"]),
                1: pd.TimedeltaIndex(["NaT", "1 days"]),
            }
        )
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "UTC"])
    def test_diff_datetime_axis1(self, tz):
        # GH#18578
        df = DataFrame(
            {
                0: date_range("2010", freq="D", periods=2, tz=tz),
                1: date_range("2010", freq="D", periods=2, tz=tz),
            }
        )

        result = df.diff(axis=1)
        expected = DataFrame(
            {
                0: pd.TimedeltaIndex(["NaT", "NaT"]),
                1: pd.TimedeltaIndex(["0 days", "0 days"]),
            }
        )
        tm.assert_frame_equal(result, expected)

    def test_diff_timedelta(self):
        # GH#4533
        df = DataFrame(
            {
                "time": [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")],
                "value": [1.0, 2.0],
            }
        )

        res = df.diff()
        exp = DataFrame(
            [[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"]
        )
        tm.assert_frame_equal(res, exp)

    def test_diff_mixed_dtype(self):
        df = DataFrame(np.random.randn(5, 3))
        df["A"] = np.array([1, 2, 3, 4, 5], dtype=object)

        result = df.diff()
        assert result[0].dtype == np.float64

    def test_diff_neg_n(self, datetime_frame):
        rs = datetime_frame.diff(-1)
        xp = datetime_frame - datetime_frame.shift(-1)
        tm.assert_frame_equal(rs, xp)

    def test_diff_float_n(self, datetime_frame):
        rs = datetime_frame.diff(1.0)
        xp = datetime_frame.diff(1)
        tm.assert_frame_equal(rs, xp)

    def test_diff_axis(self):
        # GH#9727
        df = DataFrame([[1.0, 2.0], [3.0, 4.0]])
        tm.assert_frame_equal(
            df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])
        )
        tm.assert_frame_equal(
            df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])
        )

    def test_diff_period(self):
        # GH#32995 Don't pass an incorrect axis
        pi = date_range("2016-01-01", periods=3).to_period("D")
        df = DataFrame({"A": pi})

        result = df.diff(1, axis=1)

        expected = (df - pd.NaT).astype(object)
        tm.assert_frame_equal(result, expected)

    def test_diff_axis1_mixed_dtypes(self):
        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
        df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})

        expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2})

        result = df.diff(axis=1)
        tm.assert_frame_equal(result, expected)

        # GH#21437 mixed-float-dtypes
        df = DataFrame(
            {"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")}
        )
        result = df.diff(axis=1)
        expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0})
        tm.assert_frame_equal(result, expected)

    def test_diff_axis1_mixed_dtypes_large_periods(self):
        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
        df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})

        expected = df * np.nan

        result = df.diff(axis=1, periods=3)
        tm.assert_frame_equal(result, expected)

    def test_diff_axis1_mixed_dtypes_negative_periods(self):
        # GH#32995 operate column-wise when we have mixed dtypes and axis=1
        df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})

        expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan})

        result = df.diff(axis=1, periods=-1)
        tm.assert_frame_equal(result, expected)

    def test_diff_sparse(self):
        # GH#28813 .diff() should work for sparse dataframes as well
        sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]")

        result = sparse_df.diff()
        expected = DataFrame(
            [[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0)
        )

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "axis,expected",
        [
            (
                0,
                DataFrame(
                    {
                        "a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0],
                        "b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan],
                        "c": np.repeat(np.nan, 8),
                        "d": [np.nan, 3, 5, 7, 9, 11, 13, 15],
                    },
                    dtype="Int64",
                ),
            ),
            (
                1,
                DataFrame(
                    {
                        "a": np.repeat(np.nan, 8),
                        "b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0],
                        "c": np.repeat(np.nan, 8),
                        "d": np.repeat(np.nan, 8),
                    },
                    dtype="Int64",
                ),
            ),
        ],
    )
    def test_diff_integer_na(self, axis, expected):
        # GH#24171 IntegerNA Support for DataFrame.diff()
        df = DataFrame(
            {
                "a": np.repeat([0, 1, np.nan, 2], 2),
                "b": np.tile([0, 1, np.nan, 2], 2),
                "c": np.repeat(np.nan, 8),
                "d": np.arange(1, 9) ** 2,
            },
            dtype="Int64",
        )

        # Test case for default behaviour of diff
        result = df.diff(axis=axis)
        tm.assert_frame_equal(result, expected)

    def test_diff_readonly(self):
        # https://github.com/pandas-dev/pandas/issues/35559
        arr = np.random.randn(5, 2)
        arr.flags.writeable = False
        df = DataFrame(arr)
        result = df.diff()
        expected = DataFrame(np.array(df)).diff()
        tm.assert_frame_equal(result, expected)

    def test_diff_all_int_dtype(self, any_int_numpy_dtype):
        # GH 14773
        df = DataFrame(range(5))
        df = df.astype(any_int_numpy_dtype)
        result = df.diff()
        expected_dtype = (
            "float32" if any_int_numpy_dtype in ("int8", "int16") else "float64"
        )
        expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype)
        tm.assert_frame_equal(result, expected)
first commit 2024-05-03 04:18:51 +03:00			`import numpy as np`
			`import pytest`

			`import pandas as pd`
			`from pandas import (`
			`DataFrame,`
			`Series,`
			`Timestamp,`
			`date_range,`
			`)`
			`import pandas._testing as tm`


			`class TestDataFrameDiff:`
			`def test_diff_requires_integer(self):`
			`df = DataFrame(np.random.randn(2, 2))`
			`with pytest.raises(ValueError, match="periods must be an integer"):`
			`df.diff(1.5)`

			`# GH#44572 np.int64 is accepted`
			`@pytest.mark.parametrize("num", [1, np.int64(1)])`
			`def test_diff(self, datetime_frame, num):`
			`df = datetime_frame`
			`the_diff = df.diff(num)`

			`expected = df["A"] - df["A"].shift(num)`
			`tm.assert_series_equal(the_diff["A"], expected)`

			`def test_diff_int_dtype(self):`
			`# int dtype`
			`a = 10_000_000_000_000_000`
			`b = a + 1`
			`ser = Series([a, b])`

			`rs = DataFrame({"s": ser}).diff()`
			`assert rs.s[1] == 1`

			`def test_diff_mixed_numeric(self, datetime_frame):`
			`# mixed numeric`
			`tf = datetime_frame.astype("float32")`
			`the_diff = tf.diff(1)`
			`tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1))`

			`def test_diff_axis1_nonconsolidated(self):`
			`# GH#10907`
			`df = DataFrame({"y": Series([2]), "z": Series([3])})`
			`df.insert(0, "x", 1)`
			`result = df.diff(axis=1)`
			`expected = DataFrame({"x": np.nan, "y": Series(1), "z": Series(1)})`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_timedelta64_with_nat(self):`
			`# GH#32441`
			`arr = np.arange(6).reshape(3, 2).astype("timedelta64[ns]")`
			`arr[:, 0] = np.timedelta64("NaT", "ns")`

			`df = DataFrame(arr)`
			`result = df.diff(1, axis=0)`

			`expected = DataFrame({0: df[0], 1: [pd.NaT, pd.Timedelta(2), pd.Timedelta(2)]})`
			`tm.assert_equal(result, expected)`

			`result = df.diff(0)`
			`expected = df - df`
			`assert expected[0].isna().all()`
			`tm.assert_equal(result, expected)`

			`result = df.diff(-1, axis=1)`
			`expected = df * np.nan`
			`tm.assert_equal(result, expected)`

			`@pytest.mark.parametrize("tz", [None, "UTC"])`
			`def test_diff_datetime_axis0_with_nat(self, tz):`
			`# GH#32441`
			`dti = pd.DatetimeIndex(["NaT", "2019-01-01", "2019-01-02"], tz=tz)`
			`ser = Series(dti)`

			`df = ser.to_frame()`

			`result = df.diff()`
			`ex_index = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta(days=1)])`
			`expected = Series(ex_index).to_frame()`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize("tz", [pytest.param(None, marks=pytest.mark.xfail), "UTC"])`
			`def test_diff_datetime_with_nat_zero_periods(self, tz):`
			`# diff on NaT values should give NaT, not timedelta64(0)`
			`dti = date_range("2016-01-01", periods=4, tz=tz)`
			`ser = Series(dti)`
			`df = ser.to_frame()`

			`df[1] = ser.copy()`

			`with tm.assert_produces_warning(None):`
			`df.iloc[:, 0] = pd.NaT`

			`expected = df - df`
			`assert expected[0].isna().all()`

			`result = df.diff(0, axis=0)`
			`tm.assert_frame_equal(result, expected)`

			`result = df.diff(0, axis=1)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize("tz", [None, "UTC"])`
			`def test_diff_datetime_axis0(self, tz):`
			`# GH#18578`
			`df = DataFrame(`
			`{`
			`0: date_range("2010", freq="D", periods=2, tz=tz),`
			`1: date_range("2010", freq="D", periods=2, tz=tz),`
			`}`
			`)`

			`result = df.diff(axis=0)`
			`expected = DataFrame(`
			`{`
			`0: pd.TimedeltaIndex(["NaT", "1 days"]),`
			`1: pd.TimedeltaIndex(["NaT", "1 days"]),`
			`}`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize("tz", [None, "UTC"])`
			`def test_diff_datetime_axis1(self, tz):`
			`# GH#18578`
			`df = DataFrame(`
			`{`
			`0: date_range("2010", freq="D", periods=2, tz=tz),`
			`1: date_range("2010", freq="D", periods=2, tz=tz),`
			`}`
			`)`

			`result = df.diff(axis=1)`
			`expected = DataFrame(`
			`{`
			`0: pd.TimedeltaIndex(["NaT", "NaT"]),`
			`1: pd.TimedeltaIndex(["0 days", "0 days"]),`
			`}`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_timedelta(self):`
			`# GH#4533`
			`df = DataFrame(`
			`{`
			`"time": [Timestamp("20130101 9:01"), Timestamp("20130101 9:02")],`
			`"value": [1.0, 2.0],`
			`}`
			`)`

			`res = df.diff()`
			`exp = DataFrame(`
			`[[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"]`
			`)`
			`tm.assert_frame_equal(res, exp)`

			`def test_diff_mixed_dtype(self):`
			`df = DataFrame(np.random.randn(5, 3))`
			`df["A"] = np.array([1, 2, 3, 4, 5], dtype=object)`

			`result = df.diff()`
			`assert result[0].dtype == np.float64`

			`def test_diff_neg_n(self, datetime_frame):`
			`rs = datetime_frame.diff(-1)`
			`xp = datetime_frame - datetime_frame.shift(-1)`
			`tm.assert_frame_equal(rs, xp)`

			`def test_diff_float_n(self, datetime_frame):`
			`rs = datetime_frame.diff(1.0)`
			`xp = datetime_frame.diff(1)`
			`tm.assert_frame_equal(rs, xp)`

			`def test_diff_axis(self):`
			`# GH#9727`
			`df = DataFrame([[1.0, 2.0], [3.0, 4.0]])`
			`tm.assert_frame_equal(`
			`df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]])`
			`)`
			`tm.assert_frame_equal(`
			`df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]])`
			`)`

			`def test_diff_period(self):`
			`# GH#32995 Don't pass an incorrect axis`
			`pi = date_range("2016-01-01", periods=3).to_period("D")`
			`df = DataFrame({"A": pi})`

			`result = df.diff(1, axis=1)`

			`expected = (df - pd.NaT).astype(object)`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_axis1_mixed_dtypes(self):`
			`# GH#32995 operate column-wise when we have mixed dtypes and axis=1`
			`df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})`

			`expected = DataFrame({"A": [np.nan, np.nan, np.nan], "B": df["B"] / 2})`

			`result = df.diff(axis=1)`
			`tm.assert_frame_equal(result, expected)`

			`# GH#21437 mixed-float-dtypes`
			`df = DataFrame(`
			`{"a": np.arange(3, dtype="float32"), "b": np.arange(3, dtype="float64")}`
			`)`
			`result = df.diff(axis=1)`
			`expected = DataFrame({"a": df["a"] * np.nan, "b": df["b"] * 0})`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_axis1_mixed_dtypes_large_periods(self):`
			`# GH#32995 operate column-wise when we have mixed dtypes and axis=1`
			`df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})`

			`expected = df * np.nan`

			`result = df.diff(axis=1, periods=3)`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_axis1_mixed_dtypes_negative_periods(self):`
			`# GH#32995 operate column-wise when we have mixed dtypes and axis=1`
			`df = DataFrame({"A": range(3), "B": 2 * np.arange(3, dtype=np.float64)})`

			`expected = DataFrame({"A": -1.0 * df["A"], "B": df["B"] * np.nan})`

			`result = df.diff(axis=1, periods=-1)`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_sparse(self):`
			`# GH#28813 .diff() should work for sparse dataframes as well`
			`sparse_df = DataFrame([[0, 1], [1, 0]], dtype="Sparse[int]")`

			`result = sparse_df.diff()`
			`expected = DataFrame(`
			`[[np.nan, np.nan], [1.0, -1.0]], dtype=pd.SparseDtype("float", 0.0)`
			`)`

			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"axis,expected",`
			`[`
			`(`
			`0,`
			`DataFrame(`
			`{`
			`"a": [np.nan, 0, 1, 0, np.nan, np.nan, np.nan, 0],`
			`"b": [np.nan, 1, np.nan, np.nan, -2, 1, np.nan, np.nan],`
			`"c": np.repeat(np.nan, 8),`
			`"d": [np.nan, 3, 5, 7, 9, 11, 13, 15],`
			`},`
			`dtype="Int64",`
			`),`
			`),`
			`(`
			`1,`
			`DataFrame(`
			`{`
			`"a": np.repeat(np.nan, 8),`
			`"b": [0, 1, np.nan, 1, np.nan, np.nan, np.nan, 0],`
			`"c": np.repeat(np.nan, 8),`
			`"d": np.repeat(np.nan, 8),`
			`},`
			`dtype="Int64",`
			`),`
			`),`
			`],`
			`)`
			`def test_diff_integer_na(self, axis, expected):`
			`# GH#24171 IntegerNA Support for DataFrame.diff()`
			`df = DataFrame(`
			`{`
			`"a": np.repeat([0, 1, np.nan, 2], 2),`
			`"b": np.tile([0, 1, np.nan, 2], 2),`
			`"c": np.repeat(np.nan, 8),`
			`"d": np.arange(1, 9) ** 2,`
			`},`
			`dtype="Int64",`
			`)`

			`# Test case for default behaviour of diff`
			`result = df.diff(axis=axis)`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_readonly(self):`
			`# https://github.com/pandas-dev/pandas/issues/35559`
			`arr = np.random.randn(5, 2)`
			`arr.flags.writeable = False`
			`df = DataFrame(arr)`
			`result = df.diff()`
			`expected = DataFrame(np.array(df)).diff()`
			`tm.assert_frame_equal(result, expected)`

			`def test_diff_all_int_dtype(self, any_int_numpy_dtype):`
			`# GH 14773`
			`df = DataFrame(range(5))`
			`df = df.astype(any_int_numpy_dtype)`
			`result = df.diff()`
			`expected_dtype = (`
			`"float32" if any_int_numpy_dtype in ("int8", "int16") else "float64"`
			`)`
			`expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype)`
			`tm.assert_frame_equal(result, expected)`