ai-content-maker/.venv/Lib/site-packages/pandas/tests/indexing/multiindex/test_multiindex.py

230 lines
7.4 KiB
Python

import numpy as np
import pytest
import pandas._libs.index as _index
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestMultiIndexBasic:
def test_multiindex_perf_warn(self):
df = DataFrame(
{
"jim": [0, 0, 1, 1],
"joe": ["x", "x", "z", "y"],
"jolie": np.random.rand(4),
}
).set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(1, "z")]
df = df.iloc[[2, 1, 3, 0]]
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0,)]
def test_indexing_over_hashtable_size_cutoff(self):
n = 10000
old_cutoff = _index._SIZE_CUTOFF
_index._SIZE_CUTOFF = 20000
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
# hai it works!
assert s[("a", 5)] == 5
assert s[("a", 6)] == 6
assert s[("a", 7)] == 7
_index._SIZE_CUTOFF = old_cutoff
def test_multi_nan_indexing(self):
# GH 3588
df = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
result = df.set_index(["a", "b"], drop=False)
expected = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
},
index=[
Index(["R1", "R2", np.nan, "R4"], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(result, expected)
def test_exclusive_nat_column_indexing(self):
# GH 38025
# test multi indexing when one column exclusively contains NaT values
df = DataFrame(
{
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
df = df.set_index(["a", "b"])
expected = DataFrame(
{
"c": [10, 15, np.nan, 20],
},
index=[
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(df, expected)
def test_nested_tuples_duplicates(self):
# GH#30892
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
idx = Index(["a", "a", "c"])
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
df2 = df.copy(deep=True)
df2.loc[(dti[0], "a"), "c2"] = 1.0
tm.assert_frame_equal(df2, expected)
df3 = df.copy(deep=True)
df3.loc[[(dti[0], "a")], "c2"] = 1.0
tm.assert_frame_equal(df3, expected)
def test_multiindex_with_datatime_level_preserves_freq(self):
# https://github.com/pandas-dev/pandas/issues/35563
idx = Index(range(2), name="A")
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
mi = MultiIndex.from_product([idx, dti])
df = DataFrame(np.random.randn(14, 2), index=mi)
result = df.loc[0].index
tm.assert_index_equal(result, dti)
assert result.freq == dti.freq
def test_multiindex_complex(self):
# GH#42145
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
non_complex_data = [3, 4, 5]
result = DataFrame(
{
"x": complex_data,
"y": non_complex_data,
"z": non_complex_data,
}
)
result.set_index(["x", "y"], inplace=True)
expected = DataFrame(
{"z": non_complex_data},
index=MultiIndex.from_arrays(
[complex_data, non_complex_data],
names=("x", "y"),
),
)
tm.assert_frame_equal(result, expected)
def test_rename_multiindex_with_duplicates(self):
# GH 38015
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
df = DataFrame(index=mi)
df = df.rename(index={"A": "Apple"}, level=0)
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
expected = DataFrame(index=mi2)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"data_result, data_expected",
[
(
[
[(81.0, np.nan), (np.nan, np.nan)],
[(np.nan, np.nan), (82.0, np.nan)],
[1, 2],
[1, 2],
],
[
[(81.0, np.nan), (np.nan, np.nan)],
[(81.0, np.nan), (np.nan, np.nan)],
[1, 2],
[1, 1],
],
),
(
[
[(81.0, np.nan), (np.nan, np.nan)],
[(np.nan, np.nan), (81.0, np.nan)],
[1, 2],
[1, 2],
],
[
[(81.0, np.nan), (np.nan, np.nan)],
[(81.0, np.nan), (np.nan, np.nan)],
[1, 2],
[2, 1],
],
),
],
)
def test_subtracting_two_series_with_unordered_index_and_all_nan_index(
self, data_result, data_expected
):
# GH 38439
a_index_result = MultiIndex.from_tuples(data_result[0])
b_index_result = MultiIndex.from_tuples(data_result[1])
a_series_result = Series(data_result[2], index=a_index_result)
b_series_result = Series(data_result[3], index=b_index_result)
result = a_series_result.align(b_series_result)
a_index_expected = MultiIndex.from_tuples(data_expected[0])
b_index_expected = MultiIndex.from_tuples(data_expected[1])
a_series_expected = Series(data_expected[2], index=a_index_expected)
b_series_expected = Series(data_expected[3], index=b_index_expected)
a_series_expected.index = a_series_expected.index.set_levels(
[
a_series_expected.index.levels[0].astype("float"),
a_series_expected.index.levels[1].astype("float"),
]
)
b_series_expected.index = b_series_expected.index.set_levels(
[
b_series_expected.index.levels[0].astype("float"),
b_series_expected.index.levels[1].astype("float"),
]
)
tm.assert_series_equal(result[0], a_series_expected)
tm.assert_series_equal(result[1], b_series_expected)
def test_nunique_smoke(self):
# GH 34019
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
assert n == 1
def test_multiindex_repeated_keys(self):
# GH19414
tm.assert_series_equal(
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
["a", "a", "b", "b"]
],
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
)