ai-content-maker/.venv/Lib/site-packages/pandas/tests/frame/test_stack_unstack.py

2186 lines
75 KiB
Python

from datetime import datetime
from io import StringIO
import itertools
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Period,
Series,
Timedelta,
date_range,
)
import pandas._testing as tm
from pandas.core.reshape import reshape as reshape_lib
class TestDataFrameReshape:
def test_stack_unstack(self, float_frame, using_array_manager):
warn = DeprecationWarning if using_array_manager else None
msg = "will attempt to set the values inplace"
df = float_frame.copy()
with tm.assert_produces_warning(warn, match=msg):
df[:] = np.arange(np.prod(df.shape)).reshape(df.shape)
stacked = df.stack()
stacked_df = DataFrame({"foo": stacked, "bar": stacked})
unstacked = stacked.unstack()
unstacked_df = stacked_df.unstack()
tm.assert_frame_equal(unstacked, df)
tm.assert_frame_equal(unstacked_df["bar"], df)
unstacked_cols = stacked.unstack(0)
unstacked_cols_df = stacked_df.unstack(0)
tm.assert_frame_equal(unstacked_cols.T, df)
tm.assert_frame_equal(unstacked_cols_df["bar"].T, df)
def test_stack_mixed_level(self):
# GH 18310
levels = [range(3), [3, "a", "b"], [1, 2]]
# flat columns:
df = DataFrame(1, index=levels[0], columns=levels[1])
result = df.stack()
expected = Series(1, index=MultiIndex.from_product(levels[:2]))
tm.assert_series_equal(result, expected)
# MultiIndex columns:
df = DataFrame(1, index=levels[0], columns=MultiIndex.from_product(levels[1:]))
result = df.stack(1)
expected = DataFrame(
1, index=MultiIndex.from_product([levels[0], levels[2]]), columns=levels[1]
)
tm.assert_frame_equal(result, expected)
# as above, but used labels in level are actually of homogeneous type
result = df[["a", "b"]].stack(1)
expected = expected[["a", "b"]]
tm.assert_frame_equal(result, expected)
def test_unstack_not_consolidated(self, using_array_manager):
# Gh#34708
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
df2 = df[["x"]]
df2["y"] = df["y"]
if not using_array_manager:
assert len(df2._mgr.blocks) == 2
res = df2.unstack()
expected = df.unstack()
tm.assert_series_equal(res, expected)
def test_unstack_fill(self):
# GH #9746: fill_value keyword argument for Series
# and DataFrame unstack
# From a series
data = Series([1, 2, 4, 5], dtype=np.int16)
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
result = data.unstack(fill_value=-1)
expected = DataFrame(
{"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16
)
tm.assert_frame_equal(result, expected)
# From a series with incorrect data type for fill_value
result = data.unstack(fill_value=0.5)
expected = DataFrame(
{"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=float
)
tm.assert_frame_equal(result, expected)
# GH #13971: fill_value when unstacking multiple levels:
df = DataFrame(
{"x": ["a", "a", "b"], "y": ["j", "k", "j"], "z": [0, 1, 2], "w": [0, 1, 2]}
).set_index(["x", "y", "z"])
unstacked = df.unstack(["x", "y"], fill_value=0)
key = ("w", "b", "j")
expected = unstacked[key]
result = Series([0, 0, 2], index=unstacked.index, name=key)
tm.assert_series_equal(result, expected)
stacked = unstacked.stack(["x", "y"])
stacked.index = stacked.index.reorder_levels(df.index.names)
# Workaround for GH #17886 (unnecessarily casts to float):
stacked = stacked.astype(np.int64)
result = stacked.loc[df.index]
tm.assert_frame_equal(result, df)
# From a series
s = df["w"]
result = s.unstack(["x", "y"], fill_value=0)
expected = unstacked["w"]
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame(self):
# From a dataframe
rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
df = DataFrame(rows, columns=list("AB"), dtype=np.int32)
df.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
result = df.unstack(fill_value=-1)
rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]]
expected = DataFrame(rows, index=list("xyz"), dtype=np.int32)
expected.columns = MultiIndex.from_tuples(
[("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
)
tm.assert_frame_equal(result, expected)
# From a mixed type dataframe
df["A"] = df["A"].astype(np.int16)
df["B"] = df["B"].astype(np.float64)
result = df.unstack(fill_value=-1)
expected["A"] = expected["A"].astype(np.int16)
expected["B"] = expected["B"].astype(np.float64)
tm.assert_frame_equal(result, expected)
# From a dataframe with incorrect data type for fill_value
result = df.unstack(fill_value=0.5)
rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]]
expected = DataFrame(rows, index=list("xyz"), dtype=float)
expected.columns = MultiIndex.from_tuples(
[("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")]
)
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame_datetime(self):
# Test unstacking with date times
dv = date_range("2012-01-01", periods=4).values
data = Series(dv)
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
result = data.unstack()
expected = DataFrame(
{"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
result = data.unstack(fill_value=dv[0])
expected = DataFrame(
{"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame_timedelta(self):
# Test unstacking with time deltas
td = [Timedelta(days=i) for i in range(4)]
data = Series(td)
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
result = data.unstack()
expected = DataFrame(
{"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
result = data.unstack(fill_value=td[1])
expected = DataFrame(
{"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame_period(self):
# Test unstacking with period
periods = [
Period("2012-01"),
Period("2012-02"),
Period("2012-03"),
Period("2012-04"),
]
data = Series(periods)
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
result = data.unstack()
expected = DataFrame(
{"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
result = data.unstack(fill_value=periods[1])
expected = DataFrame(
{
"a": [periods[0], periods[1], periods[3]],
"b": [periods[1], periods[2], periods[1]],
},
index=["x", "y", "z"],
)
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame_categorical(self):
# Test unstacking with categorical
data = Series(["a", "b", "c", "a"], dtype="category")
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
# By default missing values will be NaN
result = data.unstack()
expected = DataFrame(
{
"a": pd.Categorical(list("axa"), categories=list("abc")),
"b": pd.Categorical(list("bcx"), categories=list("abc")),
},
index=list("xyz"),
)
tm.assert_frame_equal(result, expected)
# Fill with non-category results in a ValueError
msg = r"Cannot setitem on a Categorical with a new category \(d\)"
with pytest.raises(TypeError, match=msg):
data.unstack(fill_value="d")
# Fill with category value replaces missing values as expected
result = data.unstack(fill_value="c")
expected = DataFrame(
{
"a": pd.Categorical(list("aca"), categories=list("abc")),
"b": pd.Categorical(list("bcc"), categories=list("abc")),
},
index=list("xyz"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_tuplename_in_multiindex(self):
# GH 19966
idx = MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
df = DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx)
result = df.unstack(("A", "a"))
expected = DataFrame(
[[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]],
columns=MultiIndex.from_tuples(
[
("d", "a"),
("d", "b"),
("d", "c"),
("e", "a"),
("e", "b"),
("e", "c"),
],
names=[None, ("A", "a")],
),
index=Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]],
MultiIndex.from_tuples(
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
),
MultiIndex.from_tuples(
[("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")],
names=[None, ("A", "a")],
),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]],
Index([3, 4], name="C"),
MultiIndex.from_tuples(
[
("d", "a", 1),
("d", "a", 2),
("d", "b", 1),
("d", "b", 2),
("e", "a", 1),
("e", "a", 2),
("e", "b", 1),
("e", "b", 2),
],
names=[None, ("A", "a"), "B"],
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
self, unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
df = DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx)
result = df.unstack(unstack_idx)
expected = DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)
def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = DataFrame(
{
"state": ["IL", "MI", "NC"],
"index": ["a", "b", "c"],
"some_categories": Series(["a", "b", "c"]).astype("category"),
"A": np.random.rand(3),
"B": 1,
"C": "foo",
"D": pd.Timestamp("20010102"),
"E": Series([1.0, 50.0, 100.0]).astype("float32"),
"F": Series([3.0, 4.0, 5.0]).astype("float64"),
"G": False,
"H": Series([1, 200, 923442]).astype("int8"),
}
)
def unstack_and_compare(df, column_name):
unstacked1 = df.unstack([column_name])
unstacked2 = df.unstack(column_name)
tm.assert_frame_equal(unstacked1, unstacked2)
df1 = df.set_index(["state", "index"])
unstack_and_compare(df1, "index")
df1 = df.set_index(["state", "some_categories"])
unstack_and_compare(df1, "some_categories")
df1 = df.set_index(["F", "C"])
unstack_and_compare(df1, "F")
df1 = df.set_index(["G", "B", "state"])
unstack_and_compare(df1, "B")
df1 = df.set_index(["E", "A"])
unstack_and_compare(df1, "E")
df1 = df.set_index(["state", "index"])
s = df1["A"]
unstack_and_compare(s, "index")
def test_stack_ints(self):
columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3)))
df = DataFrame(np.random.randn(30, 27), columns=columns)
tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1))
tm.assert_frame_equal(
df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1)
)
df_named = df.copy()
return_value = df_named.columns.set_names(range(3), inplace=True)
assert return_value is None
tm.assert_frame_equal(
df_named.stack(level=[1, 2]), df_named.stack(level=1).stack(level=1)
)
def test_stack_mixed_levels(self):
columns = MultiIndex.from_tuples(
[
("A", "cat", "long"),
("B", "cat", "long"),
("A", "dog", "short"),
("B", "dog", "short"),
],
names=["exp", "animal", "hair_length"],
)
df = DataFrame(np.random.randn(4, 4), columns=columns)
animal_hair_stacked = df.stack(level=["animal", "hair_length"])
exp_hair_stacked = df.stack(level=["exp", "hair_length"])
# GH #8584: Need to check that stacking works when a number
# is passed that is both a level name and in the range of
# the level numbers
df2 = df.copy()
df2.columns.names = ["exp", "animal", 1]
tm.assert_frame_equal(
df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False
)
tm.assert_frame_equal(
df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False
)
# When mixed types are passed and the ints are not level
# names, raise
msg = (
"level should contain all level names or all level numbers, not "
"a mixture of the two"
)
with pytest.raises(ValueError, match=msg):
df2.stack(level=["animal", 0])
# GH #8584: Having 0 in the level names could raise a
# strange error about lexsort depth
df3 = df.copy()
df3.columns.names = ["exp", "animal", 0]
tm.assert_frame_equal(
df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False
)
def test_stack_int_level_names(self):
columns = MultiIndex.from_tuples(
[
("A", "cat", "long"),
("B", "cat", "long"),
("A", "dog", "short"),
("B", "dog", "short"),
],
names=["exp", "animal", "hair_length"],
)
df = DataFrame(np.random.randn(4, 4), columns=columns)
exp_animal_stacked = df.stack(level=["exp", "animal"])
animal_hair_stacked = df.stack(level=["animal", "hair_length"])
exp_hair_stacked = df.stack(level=["exp", "hair_length"])
df2 = df.copy()
df2.columns.names = [0, 1, 2]
tm.assert_frame_equal(
df2.stack(level=[1, 2]), animal_hair_stacked, check_names=False
)
tm.assert_frame_equal(
df2.stack(level=[0, 1]), exp_animal_stacked, check_names=False
)
tm.assert_frame_equal(
df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False
)
# Out-of-order int column names
df3 = df.copy()
df3.columns.names = [2, 0, 1]
tm.assert_frame_equal(
df3.stack(level=[0, 1]), animal_hair_stacked, check_names=False
)
tm.assert_frame_equal(
df3.stack(level=[2, 0]), exp_animal_stacked, check_names=False
)
tm.assert_frame_equal(
df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False
)
def test_unstack_bool(self):
df = DataFrame(
[False, False],
index=MultiIndex.from_arrays([["a", "b"], ["c", "l"]]),
columns=["col"],
)
rs = df.unstack()
xp = DataFrame(
np.array([[False, np.nan], [np.nan, False]], dtype=object),
index=["a", "b"],
columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]),
)
tm.assert_frame_equal(rs, xp)
def test_unstack_level_binding(self):
# GH9856
mi = MultiIndex(
levels=[["foo", "bar"], ["one", "two"], ["a", "b"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
names=["first", "second", "third"],
)
s = Series(0, index=mi)
result = s.unstack([1, 2]).stack(0)
expected_mi = MultiIndex(
levels=[["foo", "bar"], ["one", "two"]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=["first", "second"],
)
expected = DataFrame(
np.array(
[[np.nan, 0], [0, np.nan], [np.nan, 0], [0, np.nan]], dtype=np.float64
),
index=expected_mi,
columns=Index(["a", "b"], name="third"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_to_series(self, float_frame):
# check reversibility
data = float_frame.unstack()
assert isinstance(data, Series)
undo = data.unstack().T
tm.assert_frame_equal(undo, float_frame)
# check NA handling
data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
data.index = Index(["a", "b", "c"])
result = data.unstack()
midx = MultiIndex(
levels=[["x", "y"], ["a", "b", "c"]],
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
)
expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx)
tm.assert_series_equal(result, expected)
# check composability of unstack
old_data = data.copy()
for _ in range(4):
data = data.unstack()
tm.assert_frame_equal(old_data, data)
def test_unstack_dtypes(self):
# GH 2929
rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]]
df = DataFrame(rows, columns=list("ABCD"))
result = df.dtypes
expected = Series([np.dtype("int64")] * 4, index=list("ABCD"))
tm.assert_series_equal(result, expected)
# single dtype
df2 = df.set_index(["A", "B"])
df3 = df2.unstack("B")
result = df3.dtypes
expected = Series(
[np.dtype("int64")] * 4,
index=MultiIndex.from_arrays(
[["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B")
),
)
tm.assert_series_equal(result, expected)
# mixed
df2 = df.set_index(["A", "B"])
df2["C"] = 3.0
df3 = df2.unstack("B")
result = df3.dtypes
expected = Series(
[np.dtype("float64")] * 2 + [np.dtype("int64")] * 2,
index=MultiIndex.from_arrays(
[["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B")
),
)
tm.assert_series_equal(result, expected)
df2["D"] = "foo"
df3 = df2.unstack("B")
result = df3.dtypes
expected = Series(
[np.dtype("float64")] * 2 + [np.dtype("object")] * 2,
index=MultiIndex.from_arrays(
[["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B")
),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"c, d",
(
(np.zeros(5), np.zeros(5)),
(np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")),
),
)
def test_unstack_dtypes_mixed_date(self, c, d):
# GH7405
df = DataFrame(
{
"A": ["a"] * 5,
"C": c,
"D": d,
"B": date_range("2012-01-01", periods=5),
}
)
right = df.iloc[:3].copy(deep=True)
df = df.set_index(["A", "B"])
df["D"] = df["D"].astype("int64")
left = df.iloc[:3].unstack(0)
right = right.set_index(["A", "B"]).unstack(0)
right[("D", "a")] = right[("D", "a")].astype("int64")
assert left.shape == (3, 2)
tm.assert_frame_equal(left, right)
def test_unstack_non_unique_index_names(self):
idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"])
df = DataFrame([1, 2], index=idx)
msg = "The name c1 occurs multiple times, use a level number"
with pytest.raises(ValueError, match=msg):
df.unstack("c1")
with pytest.raises(ValueError, match=msg):
df.T.stack("c1")
def test_unstack_unused_levels(self):
# GH 17845: unused codes in index make unstack() cast int to float
idx = MultiIndex.from_product([["a"], ["A", "B", "C", "D"]])[:-1]
df = DataFrame([[1, 0]] * 3, index=idx)
result = df.unstack()
exp_col = MultiIndex.from_product([[0, 1], ["A", "B", "C"]])
expected = DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col)
tm.assert_frame_equal(result, expected)
assert (result.columns.levels[1] == idx.levels[1]).all()
# Unused items on both levels
levels = [[0, 1, 7], [0, 1, 2, 3]]
codes = [[0, 0, 1, 1], [0, 2, 0, 2]]
idx = MultiIndex(levels, codes)
block = np.arange(4).reshape(2, 2)
df = DataFrame(np.concatenate([block, block + 4]), index=idx)
result = df.unstack()
expected = DataFrame(
np.concatenate([block * 2, block * 2 + 1], axis=1), columns=idx
)
tm.assert_frame_equal(result, expected)
assert (result.columns.levels[1] == idx.levels[1]).all()
@pytest.mark.parametrize(
"level, idces, col_level, idx_level",
(
(0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]),
(1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]),
),
)
def test_unstack_unused_levels_mixed_with_nan(
self, level, idces, col_level, idx_level
):
# With mixed dtype and NaN
levels = [["a", 2, "c"], [1, 3, 5, 7]]
codes = [[0, -1, 1, 1], [0, 2, -1, 2]]
idx = MultiIndex(levels, codes)
data = np.arange(8)
df = DataFrame(data.reshape(4, 2), index=idx)
result = df.unstack(level=level)
exp_data = np.zeros(18) * np.nan
exp_data[idces] = data
cols = MultiIndex.from_product([[0, 1], col_level])
expected = DataFrame(exp_data.reshape(3, 6), index=idx_level, columns=cols)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("cols", [["A", "C"], slice(None)])
def test_unstack_unused_level(self, cols):
# GH 18562 : unused codes on the unstacked level
df = DataFrame([[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"])
ind = df.set_index(["A", "B", "C"], drop=False)
selection = ind.loc[(slice(None), slice(None), "I"), cols]
result = selection.unstack()
expected = ind.iloc[[0]][cols]
expected.columns = MultiIndex.from_product(
[expected.columns, ["I"]], names=[None, "C"]
)
expected.index = expected.index.droplevel("C")
tm.assert_frame_equal(result, expected)
def test_unstack_long_index(self):
# PH 32624: Error when using a lot of indices to unstack.
# The error occurred only, if a lot of indices are used.
df = DataFrame(
[[1]],
columns=MultiIndex.from_tuples([[0]], names=["c1"]),
index=MultiIndex.from_tuples(
[[0, 0, 1, 0, 0, 0, 1]],
names=["i1", "i2", "i3", "i4", "i5", "i6", "i7"],
),
)
result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"])
expected = DataFrame(
[[1]],
columns=MultiIndex.from_tuples(
[[0, 0, 1, 0, 0, 0, 1]],
names=["c1", "i2", "i3", "i4", "i5", "i6", "i7"],
),
index=Index([0], name="i1"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_multi_level_cols(self):
# PH 24729: Unstack a df with multi level columns
df = DataFrame(
[[0.0, 0.0], [0.0, 0.0]],
columns=MultiIndex.from_tuples(
[["B", "C"], ["B", "D"]], names=["c1", "c2"]
),
index=MultiIndex.from_tuples(
[[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"]
),
)
assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"]
def test_unstack_multi_level_rows_and_cols(self):
# PH 28306: Unstack df with multi level cols and rows
df = DataFrame(
[[1, 2], [3, 4], [-1, -2], [-3, -4]],
columns=MultiIndex.from_tuples([["a", "b", "c"], ["d", "e", "f"]]),
index=MultiIndex.from_tuples(
[
["m1", "P3", 222],
["m1", "A5", 111],
["m2", "P3", 222],
["m2", "A5", 111],
],
names=["i1", "i2", "i3"],
),
)
result = df.unstack(["i3", "i2"])
expected = df.unstack(["i3"]).unstack(["i2"])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("idx", [("jim", "joe"), ("joe", "jim")])
@pytest.mark.parametrize("lev", list(range(2)))
def test_unstack_nan_index1(self, idx, lev):
# GH7466
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"
df = DataFrame(
{
"jim": ["a", "b", np.nan, "d"],
"joe": ["w", "x", "y", "z"],
"jolie": ["a.w", "b.x", " .y", "d.z"],
}
)
left = df.set_index(["jim", "joe"]).unstack()["jolie"]
right = df.set_index(["joe", "jim"]).unstack()["jolie"].T
tm.assert_frame_equal(left, right)
mi = df.set_index(list(idx))
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == len(df)
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
rows, cols = udf["jolie"].notna().values.nonzero()
for i, j in zip(rows, cols):
left = sorted(udf["jolie"].iloc[i, j].split("."))
right = mk_list(udf["jolie"].index[i]) + mk_list(udf["jolie"].columns[j])
right = sorted(map(cast, right))
assert left == right
@pytest.mark.parametrize("idx", itertools.permutations(["1st", "2nd", "3rd"]))
@pytest.mark.parametrize("lev", list(range(3)))
@pytest.mark.parametrize("col", ["4th", "5th"])
def test_unstack_nan_index_repeats(self, idx, lev, col):
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"
df = DataFrame(
{
"1st": ["d"] * 3
+ [np.nan] * 5
+ ["a"] * 2
+ ["c"] * 3
+ ["e"] * 2
+ ["b"] * 5,
"2nd": ["y"] * 2
+ ["w"] * 3
+ [np.nan] * 3
+ ["z"] * 4
+ [np.nan] * 3
+ ["x"] * 3
+ [np.nan] * 2,
"3rd": [
67,
39,
53,
72,
57,
80,
31,
18,
11,
30,
59,
50,
62,
59,
76,
52,
14,
53,
60,
51,
],
}
)
df["4th"], df["5th"] = (
df.apply(lambda r: ".".join(map(cast, r)), axis=1),
df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1),
)
mi = df.set_index(list(idx))
udf = mi.unstack(level=lev)
assert udf.notna().values.sum() == 2 * len(df)
mk_list = lambda a: list(a) if isinstance(a, tuple) else [a]
rows, cols = udf[col].notna().values.nonzero()
for i, j in zip(rows, cols):
left = sorted(udf[col].iloc[i, j].split("."))
right = mk_list(udf[col].index[i]) + mk_list(udf[col].columns[j])
right = sorted(map(cast, right))
assert left == right
def test_unstack_nan_index2(self):
# GH7403
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
df.iloc[3, 1] = np.NaN
left = df.set_index(["A", "B"]).unstack(0)
vals = [
[3, 0, 1, 2, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, 4, 5, 6, 7],
]
vals = list(map(list, zip(*vals)))
idx = Index([np.nan, 0, 1, 2, 4, 5, 6, 7], name="B")
cols = MultiIndex(
levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"]
)
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)
df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)})
df.iloc[2, 1] = np.NaN
left = df.set_index(["A", "B"]).unstack(0)
vals = [[2, np.nan], [0, 4], [1, 5], [np.nan, 6], [3, 7]]
cols = MultiIndex(
levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"]
)
idx = Index([np.nan, 0, 1, 2, 3], name="B")
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)
df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)})
df.iloc[3, 1] = np.NaN
left = df.set_index(["A", "B"]).unstack(0)
vals = [[3, np.nan], [0, 4], [1, 5], [2, 6], [np.nan, 7]]
cols = MultiIndex(
levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"]
)
idx = Index([np.nan, 0, 1, 2, 3], name="B")
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)
def test_unstack_nan_index3(self, using_array_manager):
# GH7401
df = DataFrame(
{
"A": list("aaaaabbbbb"),
"B": (date_range("2012-01-01", periods=5).tolist() * 2),
"C": np.arange(10),
}
)
df.iloc[3, 1] = np.NaN
left = df.set_index(["A", "B"]).unstack()
vals = np.array([[3, 0, 1, 2, np.nan, 4], [np.nan, 5, 6, 7, 8, 9]])
idx = Index(["a", "b"], name="A")
cols = MultiIndex(
levels=[["C"], date_range("2012-01-01", periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, "B"],
)
right = DataFrame(vals, columns=cols, index=idx)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
cols = right.columns[[1, 2, 3, 5]]
right[cols] = right[cols].astype(df["C"].dtype)
tm.assert_frame_equal(left, right)
def test_unstack_nan_index4(self):
# GH4862
vals = [
["Hg", np.nan, np.nan, 680585148],
["U", 0.0, np.nan, 680585148],
["Pb", 7.07e-06, np.nan, 680585148],
["Sn", 2.3614e-05, 0.0133, 680607017],
["Ag", 0.0, 0.0133, 680607017],
["Hg", -0.00015, 0.0133, 680607017],
]
df = DataFrame(
vals,
columns=["agent", "change", "dosage", "s_id"],
index=[17263, 17264, 17265, 17266, 17267, 17268],
)
left = df.copy().set_index(["s_id", "dosage", "agent"]).unstack()
vals = [
[np.nan, np.nan, 7.07e-06, np.nan, 0.0],
[0.0, -0.00015, np.nan, 2.3614e-05, np.nan],
]
idx = MultiIndex(
levels=[[680585148, 680607017], [0.0133]],
codes=[[0, 1], [-1, 0]],
names=["s_id", "dosage"],
)
cols = MultiIndex(
levels=[["change"], ["Ag", "Hg", "Pb", "Sn", "U"]],
codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]],
names=[None, "agent"],
)
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
tm.assert_frame_equal(left.unstack(), right)
def test_unstack_nan_index5(self):
# GH9497 - multiple unstack with nulls
df = DataFrame(
{
"1st": [1, 2, 1, 2, 1, 2],
"2nd": date_range("2014-02-01", periods=6, freq="D"),
"jim": 100 + np.arange(6),
"joe": (np.random.randn(6) * 10).round(2),
}
)
df["3rd"] = df["2nd"] - pd.Timestamp("2014-02-02")
df.loc[1, "2nd"] = df.loc[3, "2nd"] = np.nan
df.loc[1, "3rd"] = df.loc[4, "3rd"] = np.nan
left = df.set_index(["1st", "2nd", "3rd"]).unstack(["2nd", "3rd"])
assert left.notna().values.sum() == 2 * len(df)
for col in ["jim", "joe"]:
for _, r in df.iterrows():
key = r["1st"], (col, r["2nd"], r["3rd"])
assert r[col] == left.loc[key]
def test_stack_datetime_column_multiIndex(self):
# GH 8039
t = datetime(2014, 1, 1)
df = DataFrame([1, 2, 3, 4], columns=MultiIndex.from_tuples([(t, "A", "B")]))
result = df.stack()
eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)])
ecols = MultiIndex.from_tuples([(t, "A")])
expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"multiindex_columns",
[
[0, 1, 2, 3, 4],
[0, 1, 2, 3],
[0, 1, 2, 4],
[0, 1, 2],
[1, 2, 3],
[2, 3, 4],
[0, 1],
[0, 2],
[0, 3],
[0],
[2],
[4],
[4, 3, 2, 1, 0],
[3, 2, 1, 0],
[4, 2, 1, 0],
[2, 1, 0],
[3, 2, 1],
[4, 3, 2],
[1, 0],
[2, 0],
[3, 0],
],
)
@pytest.mark.parametrize("level", (-1, 0, 1, [0, 1], [1, 0]))
def test_stack_partial_multiIndex(self, multiindex_columns, level):
# GH 8844
full_multiindex = MultiIndex.from_tuples(
[("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")],
names=["Upper", "Lower"],
)
multiindex = full_multiindex[multiindex_columns]
df = DataFrame(
np.arange(3 * len(multiindex)).reshape(3, len(multiindex)),
columns=multiindex,
)
result = df.stack(level=level, dropna=False)
if isinstance(level, int):
# Stacking a single level should not make any all-NaN rows,
# so df.stack(level=level, dropna=False) should be the same
# as df.stack(level=level, dropna=True).
expected = df.stack(level=level, dropna=True)
if isinstance(expected, Series):
tm.assert_series_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
df.columns = MultiIndex.from_tuples(
df.columns.to_numpy(), names=df.columns.names
)
expected = df.stack(level=level, dropna=False)
if isinstance(expected, Series):
tm.assert_series_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
def test_stack_full_multiIndex(self):
# GH 8844
full_multiindex = MultiIndex.from_tuples(
[("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")],
names=["Upper", "Lower"],
)
df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]])
result = df.stack(dropna=False)
expected = DataFrame(
[[0, 2], [1, np.nan], [3, 5], [4, np.nan]],
index=MultiIndex(
levels=[[0, 1], ["u", "x", "y", "z"]],
codes=[[0, 0, 1, 1], [1, 3, 1, 3]],
names=[None, "Lower"],
),
columns=Index(["B", "C"], name="Upper"),
)
expected["B"] = expected["B"].astype(df.dtypes[0])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize("labels", [list("yxz"), list("yxy")])
def test_stack_preserve_categorical_dtype(self, ordered, labels):
# GH13854
cidx = pd.CategoricalIndex(labels, categories=list("xyz"), ordered=ordered)
df = DataFrame([[10, 11, 12]], columns=cidx)
result = df.stack()
# `MultiIndex.from_product` preserves categorical dtype -
# it's tested elsewhere.
midx = MultiIndex.from_product([df.index, cidx])
expected = Series([10, 11, 12], index=midx)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ordered", [False, True])
@pytest.mark.parametrize(
"labels,data",
[
(list("xyz"), [10, 11, 12, 13, 14, 15]),
(list("zyx"), [14, 15, 12, 13, 10, 11]),
],
)
def test_stack_multi_preserve_categorical_dtype(self, ordered, labels, data):
# GH-36991
cidx = pd.CategoricalIndex(labels, categories=sorted(labels), ordered=ordered)
cidx2 = pd.CategoricalIndex(["u", "v"], ordered=ordered)
midx = MultiIndex.from_product([cidx, cidx2])
df = DataFrame([sorted(data)], columns=midx)
result = df.stack([0, 1])
s_cidx = pd.CategoricalIndex(sorted(labels), ordered=ordered)
expected = Series(data, index=MultiIndex.from_product([[0], s_cidx, cidx2]))
tm.assert_series_equal(result, expected)
def test_stack_preserve_categorical_dtype_values(self):
# GH-23077
cat = pd.Categorical(["a", "a", "b", "c"])
df = DataFrame({"A": cat, "B": cat})
result = df.stack()
index = MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]])
expected = Series(
pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"index, columns",
[
([0, 0, 1, 1], MultiIndex.from_product([[1, 2], ["a", "b"]])),
([0, 0, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])),
([0, 1, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])),
],
)
def test_stack_multi_columns_non_unique_index(self, index, columns):
# GH-28301
df = DataFrame(index=index, columns=columns).fillna(1)
stacked = df.stack()
new_index = MultiIndex.from_tuples(stacked.index.to_numpy())
expected = DataFrame(
stacked.to_numpy(), index=new_index, columns=stacked.columns
)
tm.assert_frame_equal(stacked, expected)
stacked_codes = np.asarray(stacked.index.codes)
expected_codes = np.asarray(new_index.codes)
tm.assert_numpy_array_equal(stacked_codes, expected_codes)
@pytest.mark.parametrize("level", [0, 1])
def test_unstack_mixed_extension_types(self, level):
index = MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 1)], names=["a", "b"])
df = DataFrame(
{
"A": pd.array([0, 1, None], dtype="Int64"),
"B": pd.Categorical(["a", "a", "b"]),
},
index=index,
)
result = df.unstack(level=level)
expected = df.astype(object).unstack(level=level)
expected_dtypes = Series(
[df.A.dtype] * 2 + [df.B.dtype] * 2, index=result.columns
)
tm.assert_series_equal(result.dtypes, expected_dtypes)
tm.assert_frame_equal(result.astype(object), expected)
@pytest.mark.parametrize("level", [0, "baz"])
def test_unstack_swaplevel_sortlevel(self, level):
# GH 20994
mi = MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"])
df = DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"])
df.columns.name = "foo"
expected = DataFrame(
[[3, 1, 2, 0]],
columns=MultiIndex.from_tuples(
[("c", "A"), ("c", "B"), ("d", "A"), ("d", "B")], names=["baz", "foo"]
),
)
expected.index.name = "bar"
result = df.unstack().swaplevel(axis=1).sort_index(axis=1, level=level)
tm.assert_frame_equal(result, expected)
def test_unstack_fill_frame_object():
# GH12815 Test unstacking with object.
data = Series(["a", "b", "c", "a"], dtype="object")
data.index = MultiIndex.from_tuples(
[("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")]
)
# By default missing values will be NaN
result = data.unstack()
expected = DataFrame(
{"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz")
)
tm.assert_frame_equal(result, expected)
# Fill with any value replaces missing values as expected
result = data.unstack(fill_value="d")
expected = DataFrame(
{"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz")
)
tm.assert_frame_equal(result, expected)
def test_unstack_timezone_aware_values():
# GH 18338
df = DataFrame(
{
"timestamp": [pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC")],
"a": ["a"],
"b": ["b"],
"c": ["c"],
},
columns=["timestamp", "a", "b", "c"],
)
result = df.set_index(["a", "b"]).unstack()
expected = DataFrame(
[[pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC"), "c"]],
index=Index(["a"], name="a"),
columns=MultiIndex(
levels=[["timestamp", "c"], ["b"]],
codes=[[0, 1], [0, 0]],
names=[None, "b"],
),
)
tm.assert_frame_equal(result, expected)
def test_stack_timezone_aware_values():
# GH 19420
ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York")
df = DataFrame({"A": ts}, index=["a", "b", "c"])
result = df.stack()
expected = Series(
ts,
index=MultiIndex(levels=[["a", "b", "c"], ["A"]], codes=[[0, 1, 2], [0, 0, 0]]),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dropna", [True, False])
def test_stack_empty_frame(dropna):
# GH 36113
expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64)
result = DataFrame(dtype=np.float64).stack(dropna=dropna)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dropna", [True, False])
@pytest.mark.parametrize("fill_value", [None, 0])
def test_stack_unstack_empty_frame(dropna, fill_value):
# GH 36113
result = (
DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value)
)
expected = DataFrame(dtype=np.int64)
tm.assert_frame_equal(result, expected)
def test_unstack_single_index_series():
# GH 36113
msg = r"index must be a MultiIndex to unstack.*"
with pytest.raises(ValueError, match=msg):
Series(dtype=np.int64).unstack()
def test_unstacking_multi_index_df():
# see gh-30740
df = DataFrame(
{
"name": ["Alice", "Bob"],
"score": [9.5, 8],
"employed": [False, True],
"kids": [0, 0],
"gender": ["female", "male"],
}
)
df = df.set_index(["name", "employed", "kids", "gender"])
df = df.unstack(["gender"], fill_value=0)
expected = df.unstack("employed", fill_value=0).unstack("kids", fill_value=0)
result = df.unstack(["employed", "kids"], fill_value=0)
expected = DataFrame(
[[9.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 8.0]],
index=Index(["Alice", "Bob"], name="name"),
columns=MultiIndex.from_tuples(
[
("score", "female", False, 0),
("score", "female", True, 0),
("score", "male", False, 0),
("score", "male", True, 0),
],
names=[None, "gender", "employed", "kids"],
),
)
tm.assert_frame_equal(result, expected)
def test_stack_positional_level_duplicate_column_names():
# https://github.com/pandas-dev/pandas/issues/36353
columns = MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"])
df = DataFrame([[1, 1, 1, 1]], columns=columns)
result = df.stack(0)
new_columns = Index(["y", "z"], name="a")
new_index = MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"])
expected = DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns)
tm.assert_frame_equal(result, expected)
def test_unstack_non_slice_like_blocks(using_array_manager):
# Case where the mgr_locs of a DataFrame's underlying blocks are not slice-like
mi = MultiIndex.from_product([range(5), ["A", "B", "C"]])
df = DataFrame(np.random.randn(15, 4), index=mi)
df[1] = df[1].astype(np.int64)
if not using_array_manager:
assert any(not x.mgr_locs.is_slice_like for x in df._mgr.blocks)
res = df.unstack()
expected = pd.concat([df[n].unstack() for n in range(4)], keys=range(4), axis=1)
tm.assert_frame_equal(res, expected)
class TestStackUnstackMultiLevel:
def test_unstack(self, multiindex_year_month_day_dataframe_random_data):
# just check that it works for now
ymd = multiindex_year_month_day_dataframe_random_data
unstacked = ymd.unstack()
unstacked.unstack()
# test that ints work
ymd.astype(int).unstack()
# test that int32 work
ymd.astype(np.int32).unstack()
@pytest.mark.parametrize(
"result_rows,result_columns,index_product,expected_row",
[
(
[[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]],
["ix1", "ix2", "col1", "col2", "col3", "col4"],
2,
[None, None, 30.0, None],
),
(
[[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]],
["ix1", "ix2", "col1", "col2", "col3"],
2,
[None, None, 30.0],
),
(
[[1, 1, None, None, 30.0], [2, None, None, None, 30.0]],
["ix1", "ix2", "col1", "col2", "col3"],
None,
[None, None, 30.0],
),
],
)
def test_unstack_partial(
self, result_rows, result_columns, index_product, expected_row
):
# check for regressions on this issue:
# https://github.com/pandas-dev/pandas/issues/19351
# make sure DataFrame.unstack() works when its run on a subset of the DataFrame
# and the Index levels contain values that are not present in the subset
result = DataFrame(result_rows, columns=result_columns).set_index(
["ix1", "ix2"]
)
result = result.iloc[1:2].unstack("ix2")
expected = DataFrame(
[expected_row],
columns=MultiIndex.from_product(
[result_columns[2:], [index_product]], names=[None, "ix2"]
),
index=Index([2], name="ix1"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_multiple_no_empty_columns(self):
index = MultiIndex.from_tuples(
[(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)]
)
s = Series(np.random.randn(4), index=index)
unstacked = s.unstack([1, 2])
expected = unstacked.dropna(axis=1, how="all")
tm.assert_frame_equal(unstacked, expected)
def test_stack(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
# regular roundtrip
unstacked = ymd.unstack()
restacked = unstacked.stack()
tm.assert_frame_equal(restacked, ymd)
unlexsorted = ymd.sort_index(level=2)
unstacked = unlexsorted.unstack(2)
restacked = unstacked.stack()
tm.assert_frame_equal(restacked.sort_index(level=0), ymd)
unlexsorted = unlexsorted[::-1]
unstacked = unlexsorted.unstack(1)
restacked = unstacked.stack().swaplevel(1, 2)
tm.assert_frame_equal(restacked.sort_index(level=0), ymd)
unlexsorted = unlexsorted.swaplevel(0, 1)
unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1)
restacked = unstacked.stack(0).swaplevel(1, 2)
tm.assert_frame_equal(restacked.sort_index(level=0), ymd)
# columns unsorted
unstacked = ymd.unstack()
unstacked = unstacked.sort_index(axis=1, ascending=False)
restacked = unstacked.stack()
tm.assert_frame_equal(restacked, ymd)
# more than 2 levels in the columns
unstacked = ymd.unstack(1).unstack(1)
result = unstacked.stack(1)
expected = ymd.unstack()
tm.assert_frame_equal(result, expected)
result = unstacked.stack(2)
expected = ymd.unstack(1)
tm.assert_frame_equal(result, expected)
result = unstacked.stack(0)
expected = ymd.stack().unstack(1).unstack(1)
tm.assert_frame_equal(result, expected)
# not all levels present in each echelon
unstacked = ymd.unstack(2).loc[:, ::3]
stacked = unstacked.stack().stack()
ymd_stacked = ymd.stack()
tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index))
# stack with negative number
result = ymd.unstack(0).stack(-2)
expected = ymd.unstack(0).stack(0)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"idx, columns, exp_idx",
[
[
list("abab"),
["1st", "2nd", "3rd"],
MultiIndex(
levels=[["a", "b"], ["1st", "2nd", "3rd"]],
codes=[
np.tile(np.arange(2).repeat(3), 2),
np.tile(np.arange(3), 4),
],
),
],
[
list("abab"),
["1st", "2nd", "1st"],
MultiIndex(
levels=[["a", "b"], ["1st", "2nd"]],
codes=[np.tile(np.arange(2).repeat(3), 2), np.tile([0, 1, 0], 4)],
),
],
[
MultiIndex.from_tuples((("a", 2), ("b", 1), ("a", 1), ("b", 2))),
["1st", "2nd", "1st"],
MultiIndex(
levels=[["a", "b"], [1, 2], ["1st", "2nd"]],
codes=[
np.tile(np.arange(2).repeat(3), 2),
np.repeat([1, 0, 1], [3, 6, 3]),
np.tile([0, 1, 0], 4),
],
),
],
],
)
def test_stack_duplicate_index(self, idx, columns, exp_idx):
# GH10417
df = DataFrame(
np.arange(12).reshape(4, 3),
index=idx,
columns=columns,
)
result = df.stack()
expected = Series(np.arange(12), index=exp_idx)
tm.assert_series_equal(result, expected)
assert result.index.is_unique is False
li, ri = result.index, expected.index
tm.assert_index_equal(li, ri)
def test_unstack_odd_failure(self):
data = """day,time,smoker,sum,len
Fri,Dinner,No,8.25,3.
Fri,Dinner,Yes,27.03,9
Fri,Lunch,No,3.0,1
Fri,Lunch,Yes,13.68,6
Sat,Dinner,No,139.63,45
Sat,Dinner,Yes,120.77,42
Sun,Dinner,No,180.57,57
Sun,Dinner,Yes,66.82,19
Thu,Dinner,No,3.0,1
Thu,Lunch,No,117.32,44
Thu,Lunch,Yes,51.51,17"""
df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"])
# it works, #2100
result = df.unstack(2)
recons = result.stack()
tm.assert_frame_equal(recons, df)
def test_stack_mixed_dtype(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T
df["foo", "four"] = "foo"
df = df.sort_index(level=1, axis=1)
stacked = df.stack()
result = df["foo"].stack().sort_index()
tm.assert_series_equal(stacked["foo"], result, check_names=False)
assert result.name is None
assert stacked["bar"].dtype == np.float_
def test_unstack_bug(self):
df = DataFrame(
{
"state": ["naive", "naive", "naive", "active", "active", "active"],
"exp": ["a", "b", "b", "b", "a", "a"],
"barcode": [1, 2, 3, 4, 1, 3],
"v": ["hi", "hi", "bye", "bye", "bye", "peace"],
"extra": np.arange(6.0),
}
)
result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
unstacked = result.unstack()
restacked = unstacked.stack()
tm.assert_series_equal(restacked, result.reindex(restacked.index).astype(float))
def test_stack_unstack_preserve_names(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
unstacked = frame.unstack()
assert unstacked.index.name == "first"
assert unstacked.columns.names == ["exp", "second"]
restacked = unstacked.stack()
assert restacked.index.names == frame.index.names
@pytest.mark.parametrize("method", ["stack", "unstack"])
def test_stack_unstack_wrong_level_name(
self, method, multiindex_dataframe_random_data
):
# GH 18303 - wrong level name should raise
frame = multiindex_dataframe_random_data
# A DataFrame with flat axes:
df = frame.loc["foo"]
with pytest.raises(KeyError, match="does not match index name"):
getattr(df, method)("mistake")
if method == "unstack":
# Same on a Series:
s = df.iloc[:, 0]
with pytest.raises(KeyError, match="does not match index name"):
getattr(s, method)("mistake")
def test_unstack_level_name(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
result = frame.unstack("second")
expected = frame.unstack(level=1)
tm.assert_frame_equal(result, expected)
def test_stack_level_name(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
unstacked = frame.unstack("second")
result = unstacked.stack("exp")
expected = frame.unstack().stack(0)
tm.assert_frame_equal(result, expected)
result = frame.stack("exp")
expected = frame.stack()
tm.assert_series_equal(result, expected)
def test_stack_unstack_multiple(
self, multiindex_year_month_day_dataframe_random_data
):
ymd = multiindex_year_month_day_dataframe_random_data
unstacked = ymd.unstack(["year", "month"])
expected = ymd.unstack("year").unstack("month")
tm.assert_frame_equal(unstacked, expected)
assert unstacked.columns.names == expected.columns.names
# series
s = ymd["A"]
s_unstacked = s.unstack(["year", "month"])
tm.assert_frame_equal(s_unstacked, expected["A"])
restacked = unstacked.stack(["year", "month"])
restacked = restacked.swaplevel(0, 1).swaplevel(1, 2)
restacked = restacked.sort_index(level=0)
tm.assert_frame_equal(restacked, ymd)
assert restacked.index.names == ymd.index.names
# GH #451
unstacked = ymd.unstack([1, 2])
expected = ymd.unstack(1).unstack(1).dropna(axis=1, how="all")
tm.assert_frame_equal(unstacked, expected)
unstacked = ymd.unstack([2, 1])
expected = ymd.unstack(2).unstack(1).dropna(axis=1, how="all")
tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns])
def test_stack_names_and_numbers(
self, multiindex_year_month_day_dataframe_random_data
):
ymd = multiindex_year_month_day_dataframe_random_data
unstacked = ymd.unstack(["year", "month"])
# Can't use mixture of names and numbers to stack
with pytest.raises(ValueError, match="level should contain"):
unstacked.stack([0, "month"])
def test_stack_multiple_out_of_bounds(
self, multiindex_year_month_day_dataframe_random_data
):
# nlevels == 3
ymd = multiindex_year_month_day_dataframe_random_data
unstacked = ymd.unstack(["year", "month"])
with pytest.raises(IndexError, match="Too many levels"):
unstacked.stack([2, 3])
with pytest.raises(IndexError, match="not a valid level number"):
unstacked.stack([-4, -3])
def test_unstack_period_series(self):
# GH4342
idx1 = pd.PeriodIndex(
["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"],
freq="M",
name="period",
)
idx2 = Index(["A", "B"] * 3, name="str")
value = [1, 2, 3, 4, 5, 6]
idx = MultiIndex.from_arrays([idx1, idx2])
s = Series(value, index=idx)
result1 = s.unstack()
result2 = s.unstack(level=1)
result3 = s.unstack(level=0)
e_idx = pd.PeriodIndex(
["2013-01", "2013-02", "2013-03"], freq="M", name="period"
)
expected = DataFrame(
{"A": [1, 3, 5], "B": [2, 4, 6]}, index=e_idx, columns=["A", "B"]
)
expected.columns.name = "str"
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected.T)
idx1 = pd.PeriodIndex(
["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"],
freq="M",
name="period1",
)
idx2 = pd.PeriodIndex(
["2013-12", "2013-11", "2013-10", "2013-09", "2013-08", "2013-07"],
freq="M",
name="period2",
)
idx = MultiIndex.from_arrays([idx1, idx2])
s = Series(value, index=idx)
result1 = s.unstack()
result2 = s.unstack(level=1)
result3 = s.unstack(level=0)
e_idx = pd.PeriodIndex(
["2013-01", "2013-02", "2013-03"], freq="M", name="period1"
)
e_cols = pd.PeriodIndex(
["2013-07", "2013-08", "2013-09", "2013-10", "2013-11", "2013-12"],
freq="M",
name="period2",
)
expected = DataFrame(
[
[np.nan, np.nan, np.nan, np.nan, 2, 1],
[np.nan, np.nan, 4, 3, np.nan, np.nan],
[6, 5, np.nan, np.nan, np.nan, np.nan],
],
index=e_idx,
columns=e_cols,
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected.T)
def test_unstack_period_frame(self):
# GH4342
idx1 = pd.PeriodIndex(
["2014-01", "2014-02", "2014-02", "2014-02", "2014-01", "2014-01"],
freq="M",
name="period1",
)
idx2 = pd.PeriodIndex(
["2013-12", "2013-12", "2014-02", "2013-10", "2013-10", "2014-02"],
freq="M",
name="period2",
)
value = {"A": [1, 2, 3, 4, 5, 6], "B": [6, 5, 4, 3, 2, 1]}
idx = MultiIndex.from_arrays([idx1, idx2])
df = DataFrame(value, index=idx)
result1 = df.unstack()
result2 = df.unstack(level=1)
result3 = df.unstack(level=0)
e_1 = pd.PeriodIndex(["2014-01", "2014-02"], freq="M", name="period1")
e_2 = pd.PeriodIndex(
["2013-10", "2013-12", "2014-02", "2013-10", "2013-12", "2014-02"],
freq="M",
name="period2",
)
e_cols = MultiIndex.from_arrays(["A A A B B B".split(), e_2])
expected = DataFrame(
[[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols
)
tm.assert_frame_equal(result1, expected)
tm.assert_frame_equal(result2, expected)
e_1 = pd.PeriodIndex(
["2014-01", "2014-02", "2014-01", "2014-02"], freq="M", name="period1"
)
e_2 = pd.PeriodIndex(
["2013-10", "2013-12", "2014-02"], freq="M", name="period2"
)
e_cols = MultiIndex.from_arrays(["A A B B".split(), e_1])
expected = DataFrame(
[[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols
)
tm.assert_frame_equal(result3, expected)
def test_stack_multiple_bug(self):
# bug when some uniques are not present in the data GH#3170
id_col = ([1] * 3) + ([2] * 3)
name = (["a"] * 3) + (["b"] * 3)
date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2)
var1 = np.random.randint(0, 100, 6)
df = DataFrame({"ID": id_col, "NAME": name, "DATE": date, "VAR1": var1})
multi = df.set_index(["DATE", "ID"])
multi.columns.name = "Params"
unst = multi.unstack("ID")
msg = "The default value of numeric_only"
with tm.assert_produces_warning(FutureWarning, match=msg):
down = unst.resample("W-THU").mean()
rs = down.stack("ID")
xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID")
xp.columns.name = "Params"
tm.assert_frame_equal(rs, xp)
def test_stack_dropna(self):
# GH#3997
df = DataFrame({"A": ["a1", "a2"], "B": ["b1", "b2"], "C": [1, 1]})
df = df.set_index(["A", "B"])
stacked = df.unstack().stack(dropna=False)
assert len(stacked) > len(stacked.dropna())
stacked = df.unstack().stack(dropna=True)
tm.assert_frame_equal(stacked, stacked.dropna())
def test_unstack_multiple_hierarchical(self):
df = DataFrame(
index=[
[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[0, 1, 0, 1, 0, 1, 0, 1],
],
columns=[[0, 0, 1, 1], [0, 1, 0, 1]],
)
df.index.names = ["a", "b", "c"]
df.columns.names = ["d", "e"]
# it works!
df.unstack(["b", "c"])
def test_unstack_sparse_keyspace(self):
# memory problems with naive impl GH#2278
# Generate Long File & Test Pivot
NUM_ROWS = 1000
df = DataFrame(
{
"A": np.random.randint(100, size=NUM_ROWS),
"B": np.random.randint(300, size=NUM_ROWS),
"C": np.random.randint(-7, 7, size=NUM_ROWS),
"D": np.random.randint(-19, 19, size=NUM_ROWS),
"E": np.random.randint(3000, size=NUM_ROWS),
"F": np.random.randn(NUM_ROWS),
}
)
idf = df.set_index(["A", "B", "C", "D", "E"])
# it works! is sufficient
idf.unstack("E")
def test_unstack_unobserved_keys(self):
# related to GH#2278 refactoring
levels = [[0, 1], [0, 1, 2, 3]]
codes = [[0, 0, 1, 1], [0, 2, 0, 2]]
index = MultiIndex(levels, codes)
df = DataFrame(np.random.randn(4, 2), index=index)
result = df.unstack()
assert len(result.columns) == 4
recons = result.stack()
tm.assert_frame_equal(recons, df)
@pytest.mark.slow
def test_unstack_number_of_levels_larger_than_int32(self, monkeypatch):
# GH#20601
# GH 26314: Change ValueError to PerformanceWarning
class MockUnstacker(reshape_lib._Unstacker):
def __init__(self, *args, **kwargs) -> None:
# __init__ will raise the warning
super().__init__(*args, **kwargs)
raise Exception("Don't compute final result.")
with monkeypatch.context() as m:
m.setattr(reshape_lib, "_Unstacker", MockUnstacker)
df = DataFrame(
np.random.randn(2**16, 2),
index=[np.arange(2**16), np.arange(2**16)],
)
msg = "The following operation may generate"
with tm.assert_produces_warning(PerformanceWarning, match=msg):
with pytest.raises(Exception, match="Don't compute final result."):
df.unstack()
@pytest.mark.parametrize(
"levels",
itertools.chain.from_iterable(
itertools.product(itertools.permutations([0, 1, 2], width), repeat=2)
for width in [2, 3]
),
)
@pytest.mark.parametrize("stack_lev", range(2))
def test_stack_order_with_unsorted_levels(self, levels, stack_lev):
# GH#16323
# deep check for 1-row case
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
df = DataFrame(columns=columns, data=[range(4)])
df_stacked = df.stack(stack_lev)
assert all(
df.loc[row, col]
== df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]]
for row in df.index
for col in df.columns
)
def test_stack_order_with_unsorted_levels_multi_row(self):
# GH#16323
# check multi-row case
mi = MultiIndex(
levels=[["A", "C", "B"], ["B", "A", "C"]],
codes=[np.repeat(range(3), 3), np.tile(range(3), 3)],
)
df = DataFrame(
columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1)
)
assert all(
df.loc[row, col] == df.stack(0).loc[(row, col[0]), col[1]]
for row in df.index
for col in df.columns
)
def test_stack_unstack_unordered_multiindex(self):
# GH# 18265
values = np.arange(5)
data = np.vstack(
[
[f"b{x}" for x in values], # b0, b1, ..
[f"a{x}" for x in values], # a0, a1, ..
]
)
df = DataFrame(data.T, columns=["b", "a"])
df.columns.name = "first"
second_level_dict = {"x": df}
multi_level_df = pd.concat(second_level_dict, axis=1)
multi_level_df.columns.names = ["second", "first"]
df = multi_level_df.reindex(sorted(multi_level_df.columns), axis=1)
result = df.stack(["first", "second"]).unstack(["first", "second"])
expected = DataFrame(
[["a0", "b0"], ["a1", "b1"], ["a2", "b2"], ["a3", "b3"], ["a4", "b4"]],
index=[0, 1, 2, 3, 4],
columns=MultiIndex.from_tuples(
[("a", "x"), ("b", "x")], names=["first", "second"]
),
)
tm.assert_frame_equal(result, expected)
def test_unstack_preserve_types(
self, multiindex_year_month_day_dataframe_random_data
):
# GH#403
ymd = multiindex_year_month_day_dataframe_random_data
ymd["E"] = "foo"
ymd["F"] = 2
unstacked = ymd.unstack("month")
assert unstacked["A", 1].dtype == np.float64
assert unstacked["E", 1].dtype == np.object_
assert unstacked["F", 1].dtype == np.float64
def test_unstack_group_index_overflow(self):
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
index = MultiIndex(
levels=[level] * 8 + [[0, 1]],
codes=[codes] * 8 + [np.arange(2).repeat(500)],
)
s = Series(np.arange(1000), index=index)
result = s.unstack()
assert result.shape == (500, 2)
# test roundtrip
stacked = result.stack()
tm.assert_series_equal(s, stacked.reindex(s.index))
# put it at beginning
index = MultiIndex(
levels=[[0, 1]] + [level] * 8,
codes=[np.arange(2).repeat(500)] + [codes] * 8,
)
s = Series(np.arange(1000), index=index)
result = s.unstack(0)
assert result.shape == (500, 2)
# put it in middle
index = MultiIndex(
levels=[level] * 4 + [[0, 1]] + [level] * 4,
codes=([codes] * 4 + [np.arange(2).repeat(500)] + [codes] * 4),
)
s = Series(np.arange(1000), index=index)
result = s.unstack(4)
assert result.shape == (500, 2)
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/37115
df = DataFrame(
{
"a": ["A", "A", "B"],
"b": ["ca", "cb", "cb"],
"v": [10] * 3,
}
).set_index(["a", "b"])
# add another int column to get 2 blocks
df["is_"] = 1
if not using_array_manager:
assert len(df._mgr.blocks) == 2
result = df.unstack("b")
result[("is_", "ca")] = result[("is_", "ca")].fillna(0)
expected = DataFrame(
[[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]],
index=Index(["A", "B"], dtype="object", name="a"),
columns=MultiIndex.from_tuples(
[("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")],
names=[None, "b"],
),
)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
expected[("v", "cb")] = expected[("v", "cb")].astype("int64")
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64")
tm.assert_frame_equal(result, expected)
def test_unstack_with_level_has_nan(self):
# GH 37510
df1 = DataFrame(
{
"L1": [1, 2, 3, 4],
"L2": [3, 4, 1, 2],
"L3": [1, 1, 1, 1],
"x": [1, 2, 3, 4],
}
)
df1 = df1.set_index(["L1", "L2", "L3"])
new_levels = ["n1", "n2", "n3", None]
df1.index = df1.index.set_levels(levels=new_levels, level="L1")
df1.index = df1.index.set_levels(levels=new_levels, level="L2")
result = df1.unstack("L3")[("x", 1)].sort_index().index
expected = MultiIndex(
levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]],
codes=[[0, 1, 2, 3], [2, 3, 0, 1]],
names=["L1", "L2"],
)
tm.assert_index_equal(result, expected)
def test_stack_nan_in_multiindex_columns(self):
# GH#39481
df = DataFrame(
np.zeros([1, 5]),
columns=MultiIndex.from_tuples(
[
(0, None, None),
(0, 2, 0),
(0, 2, 1),
(0, 3, 0),
(0, 3, 1),
],
),
)
result = df.stack(2)
expected = DataFrame(
[[0.0, np.nan, np.nan], [np.nan, 0.0, 0.0], [np.nan, 0.0, 0.0]],
index=Index([(0, None), (0, 0), (0, 1)]),
columns=Index([(0, None), (0, 2), (0, 3)]),
)
tm.assert_frame_equal(result, expected)
def test_multi_level_stack_categorical(self):
# GH 15239
midx = MultiIndex.from_arrays(
[
["A"] * 2 + ["B"] * 2,
pd.Categorical(list("abab")),
pd.Categorical(list("ccdd")),
]
)
df = DataFrame(np.arange(8).reshape(2, 4), columns=midx)
result = df.stack([1, 2])
expected = DataFrame(
[
[0, np.nan],
[np.nan, 2],
[1, np.nan],
[np.nan, 3],
[4, np.nan],
[np.nan, 6],
[5, np.nan],
[np.nan, 7],
],
columns=["A", "B"],
index=MultiIndex.from_arrays(
[
[0] * 4 + [1] * 4,
pd.Categorical(list("aabbaabb")),
pd.Categorical(list("cdcdcdcd")),
]
),
)
tm.assert_frame_equal(result, expected)
def test_stack_nan_level(self):
# GH 9406
df_nan = DataFrame(
np.arange(4).reshape(2, 2),
columns=MultiIndex.from_tuples(
[("A", np.nan), ("B", "b")], names=["Upper", "Lower"]
),
index=Index([0, 1], name="Num"),
dtype=np.float64,
)
result = df_nan.stack()
expected = DataFrame(
[[0.0, np.nan], [np.nan, 1], [2.0, np.nan], [np.nan, 3.0]],
columns=Index(["A", "B"], name="Upper"),
index=MultiIndex.from_tuples(
[(0, np.nan), (0, "b"), (1, np.nan), (1, "b")], names=["Num", "Lower"]
),
)
tm.assert_frame_equal(result, expected)
def test_unstack_categorical_columns(self):
# GH 14018
idx = MultiIndex.from_product([["A"], [0, 1]])
df = DataFrame({"cat": pd.Categorical(["a", "b"])}, index=idx)
result = df.unstack()
expected = DataFrame(
{
0: pd.Categorical(["a"], categories=["a", "b"]),
1: pd.Categorical(["b"], categories=["a", "b"]),
},
index=["A"],
)
expected.columns = MultiIndex.from_tuples([("cat", 0), ("cat", 1)])
tm.assert_frame_equal(result, expected)
def test_stack_unsorted(self):
# GH 16925
PAE = ["ITA", "FRA"]
VAR = ["A1", "A2"]
TYP = ["CRT", "DBT", "NET"]
MI = MultiIndex.from_product([PAE, VAR, TYP], names=["PAE", "VAR", "TYP"])
V = list(range(len(MI)))
DF = DataFrame(data=V, index=MI, columns=["VALUE"])
DF = DF.unstack(["VAR", "TYP"])
DF.columns = DF.columns.droplevel(0)
DF.loc[:, ("A0", "NET")] = 9999
result = DF.stack(["VAR", "TYP"]).sort_index()
expected = DF.sort_index(axis=1).stack(["VAR", "TYP"]).sort_index()
tm.assert_series_equal(result, expected)
def test_stack_nullable_dtype(self):
# GH#43561
columns = MultiIndex.from_product(
[["54511", "54515"], ["r", "t_mean"]], names=["station", "element"]
)
index = Index([1, 2, 3], name="time")
arr = np.array([[50, 226, 10, 215], [10, 215, 9, 220], [305, 232, 111, 220]])
df = DataFrame(arr, columns=columns, index=index, dtype=pd.Int64Dtype())
result = df.stack("station")
expected = df.astype(np.int64).stack("station").astype(pd.Int64Dtype())
tm.assert_frame_equal(result, expected)
# non-homogeneous case
df[df.columns[0]] = df[df.columns[0]].astype(pd.Float64Dtype())
result = df.stack("station")
# TODO(EA2D): we get object dtype because DataFrame.values can't
# be an EA
expected = df.astype(object).stack("station")
tm.assert_frame_equal(result, expected)