ai-content-maker/.venv/Lib/site-packages/pandas/tests/indexes/test_common.py

515 lines
17 KiB
Python

"""
Collection of tests asserting things that should be true for
any index subclass except for MultiIndex. Makes use of the `index_flat`
fixture defined in pandas/conftest.py.
"""
import re
import numpy as np
import pytest
from pandas.compat import (
IS64,
pa_version_under7p0,
)
from pandas.core.dtypes.common import is_integer_dtype
import pandas as pd
from pandas import (
CategoricalIndex,
DatetimeIndex,
MultiIndex,
PeriodIndex,
RangeIndex,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.api import NumericIndex
class TestCommon:
@pytest.mark.parametrize("name", [None, "new_name"])
def test_to_frame(self, name, index_flat):
# see GH#15230, GH#22580
idx = index_flat
if name:
idx_name = name
else:
idx_name = idx.name or 0
df = idx.to_frame(name=idx_name)
assert df.index is idx
assert len(df.columns) == 1
assert df.columns[0] == idx_name
assert df[idx_name].values is not idx.values
df = idx.to_frame(index=False, name=idx_name)
assert df.index is not idx
def test_droplevel(self, index_flat):
# GH 21115
# MultiIndex is tested separately in test_multi.py
index = index_flat
assert index.droplevel([]).equals(index)
for level in [index.name, [index.name]]:
if isinstance(index.name, tuple) and level is index.name:
# GH 21121 : droplevel with tuple name
continue
msg = (
"Cannot remove 1 levels from an index with 1 levels: at least one "
"level must be left."
)
with pytest.raises(ValueError, match=msg):
index.droplevel(level)
for level in "wrong", ["wrong"]:
with pytest.raises(
KeyError,
match=r"'Requested level \(wrong\) does not match index name \(None\)'",
):
index.droplevel(level)
def test_constructor_non_hashable_name(self, index_flat):
# GH 20527
index = index_flat
message = "Index.name must be a hashable type"
renamed = [["1"]]
# With .rename()
with pytest.raises(TypeError, match=message):
index.rename(name=renamed)
# With .set_names()
with pytest.raises(TypeError, match=message):
index.set_names(names=renamed)
def test_constructor_unwraps_index(self, index_flat):
a = index_flat
# Passing dtype is necessary for Index([True, False], dtype=object)
# case.
b = type(a)(a, dtype=a.dtype)
tm.assert_equal(a._data, b._data)
def test_to_flat_index(self, index_flat):
# 22866
index = index_flat
result = index.to_flat_index()
tm.assert_index_equal(result, index)
def test_set_name_methods(self, index_flat):
# MultiIndex tested separately
index = index_flat
new_name = "This is the new name for this index"
original_name = index.name
new_ind = index.set_names([new_name])
assert new_ind.name == new_name
assert index.name == original_name
res = index.rename(new_name, inplace=True)
# should return None
assert res is None
assert index.name == new_name
assert index.names == [new_name]
# FIXME: dont leave commented-out
# with pytest.raises(TypeError, match="list-like"):
# # should still fail even if it would be the right length
# ind.set_names("a")
with pytest.raises(ValueError, match="Level must be None"):
index.set_names("a", level=0)
# rename in place just leaves tuples and other containers alone
name = ("A", "B")
index.rename(name, inplace=True)
assert index.name == name
assert index.names == [name]
def test_copy_and_deepcopy(self, index_flat):
from copy import (
copy,
deepcopy,
)
index = index_flat
for func in (copy, deepcopy):
idx_copy = func(index)
assert idx_copy is not index
assert idx_copy.equals(index)
new_copy = index.copy(deep=True, name="banana")
assert new_copy.name == "banana"
def test_copy_name(self, index_flat):
# GH#12309: Check that the "name" argument
# passed at initialization is honored.
index = index_flat
first = type(index)(index, copy=True, name="mario")
second = type(first)(first, copy=False)
# Even though "copy=False", we want a new object.
assert first is not second
tm.assert_index_equal(first, second)
# Not using tm.assert_index_equal() since names differ.
assert index.equals(first)
assert first.name == "mario"
assert second.name == "mario"
# TODO: belongs in series arithmetic tests?
s1 = pd.Series(2, index=first)
s2 = pd.Series(3, index=second[:-1])
# See GH#13365
s3 = s1 * s2
assert s3.index.name == "mario"
def test_copy_name2(self, index_flat):
# GH#35592
index = index_flat
assert index.copy(name="mario").name == "mario"
with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
index.copy(name=["mario", "luigi"])
msg = f"{type(index).__name__}.name must be a hashable type"
with pytest.raises(TypeError, match=msg):
index.copy(name=[["mario"]])
def test_unique_level(self, index_flat):
# don't test a MultiIndex here (as its tested separated)
index = index_flat
# GH 17896
expected = index.drop_duplicates()
for level in [0, index.name, None]:
result = index.unique(level=level)
tm.assert_index_equal(result, expected)
msg = "Too many levels: Index has only 1 level, not 4"
with pytest.raises(IndexError, match=msg):
index.unique(level=3)
msg = (
rf"Requested level \(wrong\) does not match index name "
rf"\({re.escape(index.name.__repr__())}\)"
)
with pytest.raises(KeyError, match=msg):
index.unique(level="wrong")
def test_unique(self, index_flat):
# MultiIndex tested separately
index = index_flat
if not len(index):
pytest.skip("Skip check for empty Index and MultiIndex")
idx = index[[0] * 5]
idx_unique = index[[0]]
# We test against `idx_unique`, so first we make sure it's unique
# and doesn't contain nans.
assert idx_unique.is_unique is True
try:
assert idx_unique.hasnans is False
except NotImplementedError:
pass
result = idx.unique()
tm.assert_index_equal(result, idx_unique)
# nans:
if not index._can_hold_na:
pytest.skip("Skip na-check if index cannot hold na")
vals = index._values[[0] * 5]
vals[0] = np.nan
vals_unique = vals[:2]
idx_nan = index._shallow_copy(vals)
idx_unique_nan = index._shallow_copy(vals_unique)
assert idx_unique_nan.is_unique is True
assert idx_nan.dtype == index.dtype
assert idx_unique_nan.dtype == index.dtype
expected = idx_unique_nan
for i in [idx_nan, idx_unique_nan]:
result = i.unique()
tm.assert_index_equal(result, expected)
def test_searchsorted_monotonic(self, index_flat, request):
# GH17271
index = index_flat
# not implemented for tuple searches in MultiIndex
# or Intervals searches in IntervalIndex
if isinstance(index, pd.IntervalIndex):
mark = pytest.mark.xfail(
reason="IntervalIndex.searchsorted does not support Interval arg",
raises=NotImplementedError,
)
request.node.add_marker(mark)
# nothing to test if the index is empty
if index.empty:
pytest.skip("Skip check for empty Index")
value = index[0]
# determine the expected results (handle dupes for 'right')
expected_left, expected_right = 0, (index == value).argmin()
if expected_right == 0:
# all values are the same, expected_right should be length
expected_right = len(index)
# test _searchsorted_monotonic in all cases
# test searchsorted only for increasing
if index.is_monotonic_increasing:
ssm_left = index._searchsorted_monotonic(value, side="left")
assert expected_left == ssm_left
ssm_right = index._searchsorted_monotonic(value, side="right")
assert expected_right == ssm_right
ss_left = index.searchsorted(value, side="left")
assert expected_left == ss_left
ss_right = index.searchsorted(value, side="right")
assert expected_right == ss_right
elif index.is_monotonic_decreasing:
ssm_left = index._searchsorted_monotonic(value, side="left")
assert expected_left == ssm_left
ssm_right = index._searchsorted_monotonic(value, side="right")
assert expected_right == ssm_right
else:
# non-monotonic should raise.
msg = "index must be monotonic increasing or decreasing"
with pytest.raises(ValueError, match=msg):
index._searchsorted_monotonic(value, side="left")
def test_drop_duplicates(self, index_flat, keep):
# MultiIndex is tested separately
index = index_flat
if isinstance(index, RangeIndex):
pytest.skip(
"RangeIndex is tested in test_drop_duplicates_no_duplicates "
"as it cannot hold duplicates"
)
if len(index) == 0:
pytest.skip(
"empty index is tested in test_drop_duplicates_no_duplicates "
"as it cannot hold duplicates"
)
# make unique index
holder = type(index)
unique_values = list(set(index))
dtype = index.dtype if isinstance(index, NumericIndex) else None
unique_idx = holder(unique_values, dtype=dtype)
# make duplicated index
n = len(unique_idx)
duplicated_selection = np.random.choice(n, int(n * 1.5))
idx = holder(unique_idx.values[duplicated_selection])
# Series.duplicated is tested separately
expected_duplicated = (
pd.Series(duplicated_selection).duplicated(keep=keep).values
)
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
# Series.drop_duplicates is tested separately
expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
def test_drop_duplicates_no_duplicates(self, index_flat):
# MultiIndex is tested separately
index = index_flat
# make unique index
if isinstance(index, RangeIndex):
# RangeIndex cannot have duplicates
unique_idx = index
else:
holder = type(index)
unique_values = list(set(index))
dtype = index.dtype if isinstance(index, NumericIndex) else None
unique_idx = holder(unique_values, dtype=dtype)
# check on unique index
expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
result_dropped = unique_idx.drop_duplicates()
tm.assert_index_equal(result_dropped, unique_idx)
# validate shallow copy
assert result_dropped is not unique_idx
def test_drop_duplicates_inplace(self, index):
msg = r"drop_duplicates\(\) got an unexpected keyword argument"
with pytest.raises(TypeError, match=msg):
index.drop_duplicates(inplace=True)
def test_has_duplicates(self, index_flat):
# MultiIndex tested separately in:
# tests/indexes/multi/test_unique_and_duplicates.
index = index_flat
holder = type(index)
if not len(index) or isinstance(index, RangeIndex):
# MultiIndex tested separately in:
# tests/indexes/multi/test_unique_and_duplicates.
# RangeIndex is unique by definition.
pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
idx = holder([index[0]] * 5)
assert idx.is_unique is False
assert idx.has_duplicates is True
@pytest.mark.parametrize(
"dtype",
["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
)
def test_astype_preserves_name(self, index, dtype):
# https://github.com/pandas-dev/pandas/issues/32013
if isinstance(index, MultiIndex):
index.names = ["idx" + str(i) for i in range(index.nlevels)]
else:
index.name = "idx"
warn = None
if (
isinstance(index, DatetimeIndex)
and index.tz is not None
and dtype == "datetime64[ns]"
):
# This astype is deprecated in favor of tz_localize
warn = FutureWarning
elif index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
# imaginary components discarded
warn = np.ComplexWarning
is_pyarrow_str = (
str(index.dtype) == "string[pyarrow]"
and pa_version_under7p0
and dtype == "category"
)
try:
# Some of these conversions cannot succeed so we use a try / except
with tm.assert_produces_warning(
warn,
raise_on_extra_warnings=is_pyarrow_str,
check_stacklevel=False,
):
result = index.astype(dtype)
except (ValueError, TypeError, NotImplementedError, SystemError):
return
if isinstance(index, MultiIndex):
assert result.names == index.names
else:
assert result.name == index.name
def test_asi8_deprecation(self, index):
# GH#37877
if isinstance(index, (DatetimeIndex, TimedeltaIndex, PeriodIndex)):
warn = None
else:
warn = FutureWarning
with tm.assert_produces_warning(warn):
index.asi8
def test_hasnans_isnans(self, index_flat):
# GH#11343, added tests for hasnans / isnans
index = index_flat
# cases in indices doesn't include NaN
idx = index.copy(deep=True)
expected = np.array([False] * len(idx), dtype=bool)
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is False
idx = index.copy(deep=True)
values = idx._values
if len(index) == 0:
return
elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype):
return
elif index.dtype == bool:
# values[1] = np.nan below casts to True!
return
values[1] = np.nan
idx = type(index)(values)
expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
tm.assert_numpy_array_equal(idx._isnan, expected)
assert idx.hasnans is True
@pytest.mark.parametrize("na_position", [None, "middle"])
def test_sort_values_invalid_na_position(index_with_missing, na_position):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
index_with_missing.sort_values(na_position=na_position)
@pytest.mark.parametrize("na_position", ["first", "last"])
def test_sort_values_with_missing(index_with_missing, na_position, request):
# GH 35584. Test that sort_values works with missing values,
# sort non-missing and place missing according to na_position
if isinstance(index_with_missing, CategoricalIndex):
request.node.add_marker(
pytest.mark.xfail(
reason="missing value sorting order not well-defined", strict=False
)
)
missing_count = np.sum(index_with_missing.isna())
not_na_vals = index_with_missing[index_with_missing.notna()].values
sorted_values = np.sort(not_na_vals)
if na_position == "first":
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
else:
sorted_values = np.concatenate([sorted_values, [None] * missing_count])
# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
result = index_with_missing.sort_values(na_position=na_position)
tm.assert_index_equal(result, expected)
def test_ndarray_compat_properties(index):
if isinstance(index, PeriodIndex) and not IS64:
pytest.skip("Overflow")
idx = index
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
assert idx.shape == values.shape
assert idx.ndim == values.ndim
assert idx.size == values.size
if not isinstance(index, (RangeIndex, MultiIndex)):
# These two are not backed by an ndarray
assert idx.nbytes == values.nbytes
# test for validity
idx.nbytes
idx.values.nbytes