ai-content-maker/.venv/Lib/site-packages/pandas/tests/arrays/test_datetimes.py

642 lines
22 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""
Tests for DatetimeArray
"""
import operator
import numpy as np
import pytest
from pandas._libs.tslibs import tz_compare
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas.core.dtypes.dtypes import DatetimeTZDtype
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestNonNano:
@pytest.fixture(params=["s", "ms", "us"])
def unit(self, request):
"""Fixture returning parametrized time units"""
return request.param
@pytest.fixture
def reso(self, unit):
"""Fixture returning datetime resolution for a given time unit"""
return {
"s": NpyDatetimeUnit.NPY_FR_s.value,
"ms": NpyDatetimeUnit.NPY_FR_ms.value,
"us": NpyDatetimeUnit.NPY_FR_us.value,
}[unit]
@pytest.fixture
def dtype(self, unit, tz_naive_fixture):
tz = tz_naive_fixture
if tz is None:
return np.dtype(f"datetime64[{unit}]")
else:
return DatetimeTZDtype(unit=unit, tz=tz)
@pytest.fixture
def dta_dti(self, unit, dtype):
tz = getattr(dtype, "tz", None)
dti = pd.date_range("2016-01-01", periods=55, freq="D", tz=tz)
if tz is None:
arr = np.asarray(dti).astype(f"M8[{unit}]")
else:
arr = np.asarray(dti.tz_convert("UTC").tz_localize(None)).astype(
f"M8[{unit}]"
)
dta = DatetimeArray._simple_new(arr, dtype=dtype)
return dta, dti
@pytest.fixture
def dta(self, dta_dti):
dta, dti = dta_dti
return dta
def test_non_nano(self, unit, reso, dtype):
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
dta = DatetimeArray._simple_new(arr, dtype=dtype)
assert dta.dtype == dtype
assert dta[0]._reso == reso
assert tz_compare(dta.tz, dta[0].tz)
assert (dta[0] == dta[:1]).all()
@pytest.mark.filterwarnings(
"ignore:weekofyear and week have been deprecated:FutureWarning"
)
@pytest.mark.parametrize(
"field", DatetimeArray._field_ops + DatetimeArray._bool_ops
)
def test_fields(self, unit, reso, field, dtype, dta_dti):
dta, dti = dta_dti
# FIXME: assert (dti == dta).all()
res = getattr(dta, field)
expected = getattr(dti._data, field)
tm.assert_numpy_array_equal(res, expected)
def test_normalize(self, unit):
dti = pd.date_range("2016-01-01 06:00:00", periods=55, freq="D")
arr = np.asarray(dti).astype(f"M8[{unit}]")
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
assert not dta.is_normalized
# TODO: simplify once we can just .astype to other unit
exp = np.asarray(dti.normalize()).astype(f"M8[{unit}]")
expected = DatetimeArray._simple_new(exp, dtype=exp.dtype)
res = dta.normalize()
tm.assert_extension_array_equal(res, expected)
def test_simple_new_requires_match(self, unit):
arr = np.arange(5, dtype=np.int64).view(f"M8[{unit}]")
dtype = DatetimeTZDtype(unit, "UTC")
dta = DatetimeArray._simple_new(arr, dtype=dtype)
assert dta.dtype == dtype
wrong = DatetimeTZDtype("ns", "UTC")
with pytest.raises(AssertionError, match=""):
DatetimeArray._simple_new(arr, dtype=wrong)
def test_std_non_nano(self, unit):
dti = pd.date_range("2016-01-01", periods=55, freq="D")
arr = np.asarray(dti).astype(f"M8[{unit}]")
dta = DatetimeArray._simple_new(arr, dtype=arr.dtype)
# we should match the nano-reso std, but floored to our reso.
res = dta.std()
assert res._reso == dta._reso
assert res == dti.std().floor(unit)
@pytest.mark.filterwarnings("ignore:Converting to PeriodArray.*:UserWarning")
def test_to_period(self, dta_dti):
dta, dti = dta_dti
result = dta.to_period("D")
expected = dti._data.to_period("D")
tm.assert_extension_array_equal(result, expected)
def test_iter(self, dta):
res = next(iter(dta))
expected = dta[0]
assert type(res) is pd.Timestamp
assert res.value == expected.value
assert res._reso == expected._reso
assert res == expected
def test_astype_object(self, dta):
result = dta.astype(object)
assert all(x._reso == dta._reso for x in result)
assert all(x == y for x, y in zip(result, dta))
def test_to_pydatetime(self, dta_dti):
dta, dti = dta_dti
result = dta.to_pydatetime()
expected = dti.to_pydatetime()
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("meth", ["time", "timetz", "date"])
def test_time_date(self, dta_dti, meth):
dta, dti = dta_dti
result = getattr(dta, meth)
expected = getattr(dti, meth)
tm.assert_numpy_array_equal(result, expected)
def test_format_native_types(self, unit, reso, dtype, dta_dti):
# In this case we should get the same formatted values with our nano
# version dti._data as we do with the non-nano dta
dta, dti = dta_dti
res = dta._format_native_types()
exp = dti._data._format_native_types()
tm.assert_numpy_array_equal(res, exp)
def test_repr(self, dta_dti, unit):
dta, dti = dta_dti
assert repr(dta) == repr(dti._data).replace("[ns", f"[{unit}")
# TODO: tests with td64
def test_compare_mismatched_resolutions(self, comparison_op):
# comparison that numpy gets wrong bc of silent overflows
op = comparison_op
iinfo = np.iinfo(np.int64)
vals = np.array([iinfo.min, iinfo.min + 1, iinfo.max], dtype=np.int64)
# Construct so that arr2[1] < arr[1] < arr[2] < arr2[2]
arr = np.array(vals).view("M8[ns]")
arr2 = arr.view("M8[s]")
left = DatetimeArray._simple_new(arr, dtype=arr.dtype)
right = DatetimeArray._simple_new(arr2, dtype=arr2.dtype)
if comparison_op is operator.eq:
expected = np.array([False, False, False])
elif comparison_op is operator.ne:
expected = np.array([True, True, True])
elif comparison_op in [operator.lt, operator.le]:
expected = np.array([False, False, True])
else:
expected = np.array([False, True, False])
result = op(left, right)
tm.assert_numpy_array_equal(result, expected)
result = op(left[1], right)
tm.assert_numpy_array_equal(result, expected)
if op not in [operator.eq, operator.ne]:
# check that numpy still gets this wrong; if it is fixed we may be
# able to remove compare_mismatched_resolutions
np_res = op(left._ndarray, right._ndarray)
tm.assert_numpy_array_equal(np_res[1:], ~expected[1:])
class TestDatetimeArrayComparisons:
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
# sufficiently robust
def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
# arbitrary tz-naive DatetimeIndex
op = comparison_op
dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None)
arr = DatetimeArray(dti)
assert arr.freq == dti.freq
assert arr.tz == dti.tz
right = dti
expected = np.ones(len(arr), dtype=bool)
if comparison_op.__name__ in ["ne", "gt", "lt"]:
# for these the comparisons should be all-False
expected = ~expected
result = op(arr, arr)
tm.assert_numpy_array_equal(result, expected)
for other in [
right,
np.array(right),
list(right),
tuple(right),
right.astype(object),
]:
result = op(arr, other)
tm.assert_numpy_array_equal(result, expected)
result = op(other, arr)
tm.assert_numpy_array_equal(result, expected)
class TestDatetimeArray:
def test_astype_non_nano_tznaive(self):
dti = pd.date_range("2016-01-01", periods=3)
res = dti.astype("M8[s]")
assert res.dtype == "M8[s]"
dta = dti._data
res = dta.astype("M8[s]")
assert res.dtype == "M8[s]"
assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
def test_astype_non_nano_tzaware(self):
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
res = dti.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"
dta = dti._data
res = dta.astype("M8[s, US/Pacific]")
assert res.dtype == "M8[s, US/Pacific]"
# from non-nano to non-nano, preserving reso
res2 = res.astype("M8[s, UTC]")
assert res2.dtype == "M8[s, UTC]"
assert not tm.shares_memory(res2, res)
res3 = res.astype("M8[s, UTC]", copy=False)
assert res2.dtype == "M8[s, UTC]"
assert tm.shares_memory(res3, res)
def test_astype_to_same(self):
arr = DatetimeArray._from_sequence(
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
)
result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False)
assert result is arr
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"])
@pytest.mark.parametrize(
"other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"]
)
def test_astype_copies(self, dtype, other):
# https://github.com/pandas-dev/pandas/pull/32490
ser = pd.Series([1, 2], dtype=dtype)
orig = ser.copy()
warn = None
if (dtype == "datetime64[ns]") ^ (other == "datetime64[ns]"):
# deprecated in favor of tz_localize
warn = FutureWarning
with tm.assert_produces_warning(warn):
t = ser.astype(other)
t[:] = pd.NaT
tm.assert_series_equal(ser, orig)
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])
if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)
warn = None
if dtype != expected_dtype:
warn = FutureWarning
msg = " will return exactly the specified dtype"
with tm.assert_produces_warning(warn, match=msg):
result = arr.astype(dtype)
assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)
def test_tz_setter_raises(self):
arr = DatetimeArray._from_sequence(
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
)
with pytest.raises(AttributeError, match="tz_localize"):
arr.tz = "UTC"
def test_setitem_str_impute_tz(self, tz_naive_fixture):
# Like for getitem, if we are passed a naive-like string, we impute
# our own timezone.
tz = tz_naive_fixture
data = np.array([1, 2, 3], dtype="M8[ns]")
dtype = data.dtype if tz is None else DatetimeTZDtype(tz=tz)
arr = DatetimeArray(data, dtype=dtype)
expected = arr.copy()
ts = pd.Timestamp("2020-09-08 16:50").tz_localize(tz)
setter = str(ts.tz_localize(None))
# Setting a scalar tznaive string
expected[0] = ts
arr[0] = setter
tm.assert_equal(arr, expected)
# Setting a listlike of tznaive strings
expected[1] = ts
arr[:2] = [setter, setter]
tm.assert_equal(arr, expected)
def test_setitem_different_tz_raises(self):
data = np.array([1, 2, 3], dtype="M8[ns]")
arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central"))
with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
arr[0] = pd.Timestamp("2000")
ts = pd.Timestamp("2000", tz="US/Eastern")
with pytest.raises(ValueError, match="US/Central"):
with tm.assert_produces_warning(
FutureWarning, match="mismatched timezones"
):
arr[0] = ts
# once deprecation is enforced
# assert arr[0] == ts.tz_convert("US/Central")
def test_setitem_clears_freq(self):
a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central"))
a[0] = pd.Timestamp("2000", tz="US/Central")
assert a.freq is None
@pytest.mark.parametrize(
"obj",
[
pd.Timestamp("2021-01-01"),
pd.Timestamp("2021-01-01").to_datetime64(),
pd.Timestamp("2021-01-01").to_pydatetime(),
],
)
def test_setitem_objects(self, obj):
# make sure we accept datetime64 and datetime in addition to Timestamp
dti = pd.date_range("2000", periods=2, freq="D")
arr = dti._data
arr[0] = obj
assert arr[0] == obj
def test_repeat_preserves_tz(self):
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
arr = DatetimeArray(dti)
repeated = arr.repeat([1, 1])
# preserves tz and values, but not freq
expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype)
tm.assert_equal(repeated, expected)
def test_value_counts_preserves_tz(self):
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
arr = DatetimeArray(dti).repeat([4, 3])
result = arr.value_counts()
# Note: not tm.assert_index_equal, since `freq`s do not match
assert result.index.equals(dti)
arr[-2] = pd.NaT
result = arr.value_counts(dropna=False)
expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["pad", "backfill"])
def test_fillna_preserves_tz(self, method):
dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
arr = DatetimeArray(dti, copy=True)
arr[2] = pd.NaT
fill_val = dti[1] if method == "pad" else dti[3]
expected = DatetimeArray._from_sequence(
[dti[0], dti[1], fill_val, dti[3], dti[4]],
dtype=DatetimeTZDtype(tz="US/Central"),
)
result = arr.fillna(method=method)
tm.assert_extension_array_equal(result, expected)
# assert that arr and dti were not modified in-place
assert arr[2] is pd.NaT
assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central")
def test_fillna_2d(self):
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
dta = dti._data.reshape(3, 2).copy()
dta[0, 1] = pd.NaT
dta[1, 0] = pd.NaT
res1 = dta.fillna(method="pad")
expected1 = dta.copy()
expected1[1, 0] = dta[0, 0]
tm.assert_extension_array_equal(res1, expected1)
res2 = dta.fillna(method="backfill")
expected2 = dta.copy()
expected2 = dta.copy()
expected2[1, 0] = dta[2, 0]
expected2[0, 1] = dta[1, 1]
tm.assert_extension_array_equal(res2, expected2)
# with different ordering for underlying ndarray; behavior should
# be unchanged
dta2 = dta._from_backing_data(dta._ndarray.copy(order="F"))
assert dta2._ndarray.flags["F_CONTIGUOUS"]
assert not dta2._ndarray.flags["C_CONTIGUOUS"]
tm.assert_extension_array_equal(dta, dta2)
res3 = dta2.fillna(method="pad")
tm.assert_extension_array_equal(res3, expected1)
res4 = dta2.fillna(method="backfill")
tm.assert_extension_array_equal(res4, expected2)
# test the DataFrame method while we're here
df = pd.DataFrame(dta)
res = df.fillna(method="pad")
expected = pd.DataFrame(expected1)
tm.assert_frame_equal(res, expected)
res = df.fillna(method="backfill")
expected = pd.DataFrame(expected2)
tm.assert_frame_equal(res, expected)
def test_array_interface_tz(self):
tz = "US/Central"
data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz))
result = np.asarray(data)
expected = np.array(
[
pd.Timestamp("2017-01-01T00:00:00", tz=tz),
pd.Timestamp("2017-01-02T00:00:00", tz=tz),
],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)
result = np.asarray(data, dtype=object)
tm.assert_numpy_array_equal(result, expected)
result = np.asarray(data, dtype="M8[ns]")
expected = np.array(
["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]"
)
tm.assert_numpy_array_equal(result, expected)
def test_array_interface(self):
data = DatetimeArray(pd.date_range("2017", periods=2))
expected = np.array(
["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]"
)
result = np.asarray(data)
tm.assert_numpy_array_equal(result, expected)
result = np.asarray(data, dtype=object)
expected = np.array(
[pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("index", [True, False])
def test_searchsorted_different_tz(self, index):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
arr = DatetimeArray(data, freq="D").tz_localize("Asia/Tokyo")
if index:
arr = pd.Index(arr)
expected = arr.searchsorted(arr[2])
result = arr.searchsorted(arr[2].tz_convert("UTC"))
assert result == expected
expected = arr.searchsorted(arr[2:6])
result = arr.searchsorted(arr[2:6].tz_convert("UTC"))
tm.assert_equal(result, expected)
@pytest.mark.parametrize("index", [True, False])
def test_searchsorted_tzawareness_compat(self, index):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
arr = DatetimeArray(data, freq="D")
if index:
arr = pd.Index(arr)
mismatch = arr.tz_localize("Asia/Tokyo")
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
with pytest.raises(TypeError, match=msg):
arr.searchsorted(mismatch[0])
with pytest.raises(TypeError, match=msg):
arr.searchsorted(mismatch)
with pytest.raises(TypeError, match=msg):
mismatch.searchsorted(arr[0])
with pytest.raises(TypeError, match=msg):
mismatch.searchsorted(arr)
@pytest.mark.parametrize(
"other",
[
1,
np.int64(1),
1.0,
np.timedelta64("NaT"),
pd.Timedelta(days=2),
"invalid",
np.arange(10, dtype="i8") * 24 * 3600 * 10**9,
np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10**9,
pd.Timestamp("2021-01-01").to_period("D"),
],
)
@pytest.mark.parametrize("index", [True, False])
def test_searchsorted_invalid_types(self, other, index):
data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9
arr = DatetimeArray(data, freq="D")
if index:
arr = pd.Index(arr)
msg = "|".join(
[
"searchsorted requires compatible dtype or scalar",
"value should be a 'Timestamp', 'NaT', or array of those. Got",
]
)
with pytest.raises(TypeError, match=msg):
arr.searchsorted(other)
def test_shift_fill_value(self):
dti = pd.date_range("2016-01-01", periods=3)
dta = dti._data
expected = DatetimeArray(np.roll(dta._data, 1))
fv = dta[-1]
for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
result = dta.shift(1, fill_value=fill_value)
tm.assert_datetime_array_equal(result, expected)
dta = dta.tz_localize("UTC")
expected = expected.tz_localize("UTC")
fv = dta[-1]
for fill_value in [fv, fv.to_pydatetime()]:
result = dta.shift(1, fill_value=fill_value)
tm.assert_datetime_array_equal(result, expected)
def test_shift_value_tzawareness_mismatch(self):
dti = pd.date_range("2016-01-01", periods=3)
dta = dti._data
fv = dta[-1].tz_localize("UTC")
for invalid in [fv, fv.to_pydatetime()]:
with pytest.raises(TypeError, match="Cannot compare"):
dta.shift(1, fill_value=invalid)
dta = dta.tz_localize("UTC")
fv = dta[-1].tz_localize(None)
for invalid in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
with pytest.raises(TypeError, match="Cannot compare"):
dta.shift(1, fill_value=invalid)
def test_shift_requires_tzmatch(self):
# since filling is setitem-like, we require a matching timezone,
# not just matching tzawawreness
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
dta = dti._data
fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific")
msg = "Timezones don't match. 'UTC' != 'US/Pacific'"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(
FutureWarning, match="mismatched timezones"
):
dta.shift(1, fill_value=fill_value)
# once deprecation is enforced
# expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC"))
# tm.assert_equal(result, expected)
def test_tz_localize_t2d(self):
dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific")
dta = dti._data.reshape(3, 4)
result = dta.tz_localize(None)
expected = dta.ravel().tz_localize(None).reshape(dta.shape)
tm.assert_datetime_array_equal(result, expected)
roundtrip = expected.tz_localize("US/Pacific")
tm.assert_datetime_array_equal(roundtrip, dta)