ai-content-maker/.venv/Lib/site-packages/pandas/tests/plotting/frame/test_frame.py

2256 lines
83 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
""" Test cases for DataFrame.plot """
from datetime import (
date,
datetime,
)
import itertools
import re
import string
import warnings
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas.core.dtypes.api import is_list_like
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
PeriodIndex,
Series,
bdate_range,
date_range,
)
import pandas._testing as tm
from pandas.tests.plotting.common import (
TestPlotBase,
_check_plot_works,
)
from pandas.io.formats.printing import pprint_thing
import pandas.plotting as plotting
try:
from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0
except ImportError:
mpl_ge_3_6_0 = lambda: True
@td.skip_if_no_mpl
class TestDataFramePlots(TestPlotBase):
@pytest.mark.xfail(mpl_ge_3_6_0(), reason="Api changed")
@pytest.mark.slow
def test_plot(self):
df = tm.makeTimeDataFrame()
_check_plot_works(df.plot, grid=False)
# _check_plot_works adds an ax so use default_axes=True to avoid warning
axes = _check_plot_works(df.plot, default_axes=True, subplots=True)
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
axes = _check_plot_works(
df.plot,
default_axes=True,
subplots=True,
layout=(-1, 2),
)
self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
axes = _check_plot_works(
df.plot,
default_axes=True,
subplots=True,
use_index=False,
)
self._check_ticks_props(axes, xrot=0)
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
df = DataFrame({"x": [1, 2], "y": [3, 4]})
msg = "'Line2D' object has no property 'blarg'"
with pytest.raises(AttributeError, match=msg):
df.plot.line(blarg=True)
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
ax = _check_plot_works(df.plot, use_index=True)
self._check_ticks_props(ax, xrot=0)
_check_plot_works(df.plot, sort_columns=False)
_check_plot_works(df.plot, yticks=[1, 5, 10])
_check_plot_works(df.plot, xticks=[1, 5, 10])
_check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100))
_check_plot_works(df.plot, default_axes=True, subplots=True, title="blah")
# We have to redo it here because _check_plot_works does two plots,
# once without an ax kwarg and once with an ax kwarg and the new sharex
# behaviour does not remove the visibility of the latter axis (as ax is
# present). see: https://github.com/pandas-dev/pandas/issues/9737
axes = df.plot(subplots=True, title="blah")
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
# axes[0].figure.savefig("test.png")
for ax in axes[:2]:
self._check_visible(ax.xaxis) # xaxis must be visible for grid
self._check_visible(ax.get_xticklabels(), visible=False)
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
self._check_visible([ax.xaxis.get_label()], visible=False)
for ax in [axes[2]]:
self._check_visible(ax.xaxis)
self._check_visible(ax.get_xticklabels())
self._check_visible([ax.xaxis.get_label()])
self._check_ticks_props(ax, xrot=0)
_check_plot_works(df.plot, title="blah")
tuples = zip(string.ascii_letters[:10], range(10))
df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
ax = _check_plot_works(df.plot, use_index=True)
self._check_ticks_props(ax, xrot=0)
# unicode
index = MultiIndex.from_tuples(
[
("\u03b1", 0),
("\u03b1", 1),
("\u03b2", 2),
("\u03b2", 3),
("\u03b3", 4),
("\u03b3", 5),
("\u03b4", 6),
("\u03b4", 7),
],
names=["i0", "i1"],
)
columns = MultiIndex.from_tuples(
[("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"]
)
df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index)
_check_plot_works(df.plot, title="\u03A3")
# GH 6951
# Test with single column
df = DataFrame({"x": np.random.rand(10)})
axes = _check_plot_works(df.plot.bar, subplots=True)
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1))
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
# When ax is supplied and required number of axes is 1,
# passed ax should be used:
fig, ax = self.plt.subplots()
axes = df.plot.bar(subplots=True, ax=ax)
assert len(axes) == 1
result = ax.axes
assert result is axes[0]
def test_nullable_int_plot(self):
# GH 32073
dates = ["2008", "2009", None, "2011", "2012"]
df = DataFrame(
{
"A": [1, 2, 3, 4, 5],
"B": [1, 2, 3, 4, 5],
"C": np.array([7, 5, np.nan, 3, 2], dtype=object),
"D": pd.to_datetime(dates, format="%Y").view("i8"),
"E": pd.to_datetime(dates, format="%Y", utc=True).view("i8"),
}
)
_check_plot_works(df.plot, x="A", y="B")
_check_plot_works(df[["A", "B"]].plot, x="A", y="B")
_check_plot_works(df[["C", "A"]].plot, x="C", y="A") # nullable value on x-axis
_check_plot_works(df[["A", "C"]].plot, x="A", y="C")
_check_plot_works(df[["B", "C"]].plot, x="B", y="C")
_check_plot_works(df[["A", "D"]].plot, x="A", y="D")
_check_plot_works(df[["A", "E"]].plot, x="A", y="E")
@pytest.mark.slow
def test_integer_array_plot(self):
# GH 25587
arr = pd.array([1, 2, 3, 4], dtype="UInt32")
s = Series(arr)
_check_plot_works(s.plot.line)
_check_plot_works(s.plot.bar)
_check_plot_works(s.plot.hist)
_check_plot_works(s.plot.pie)
df = DataFrame({"x": arr, "y": arr})
_check_plot_works(df.plot.line)
_check_plot_works(df.plot.bar)
_check_plot_works(df.plot.hist)
_check_plot_works(df.plot.pie, y="y")
_check_plot_works(df.plot.scatter, x="x", y="y")
_check_plot_works(df.plot.hexbin, x="x", y="y")
def test_nonnumeric_exclude(self):
df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]})
ax = df.plot()
assert len(ax.get_lines()) == 1 # B was plotted
def test_implicit_label(self):
df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
ax = df.plot(x="a", y="b")
self._check_text_labels(ax.xaxis.get_label(), "a")
def test_donot_overwrite_index_name(self):
# GH 8494
df = DataFrame(np.random.randn(2, 2), columns=["a", "b"])
df.index.name = "NAME"
df.plot(y="b", label="LABEL")
assert df.index.name == "NAME"
def test_plot_xy(self):
# columns.inferred_type == 'string'
df = tm.makeTimeDataFrame()
self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot())
self._check_data(df.plot(x=0), df.set_index("A").plot())
self._check_data(df.plot(y=0), df.B.plot())
self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot())
self._check_data(df.plot(x="A"), df.set_index("A").plot())
self._check_data(df.plot(y="B"), df.B.plot())
# columns.inferred_type == 'integer'
df.columns = np.arange(1, len(df.columns) + 1)
self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot())
self._check_data(df.plot(x=1), df.set_index(1).plot())
self._check_data(df.plot(y=1), df[1].plot())
# figsize and title
ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8))
self._check_text_labels(ax.title, "Test")
self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0))
# columns.inferred_type == 'mixed'
# TODO add MultiIndex test
@pytest.mark.parametrize(
"input_log, expected_log", [(True, "log"), ("sym", "symlog")]
)
def test_logscales(self, input_log, expected_log):
df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
ax = df.plot(logy=input_log)
self._check_ax_scales(ax, yaxis=expected_log)
assert ax.get_yscale() == expected_log
ax = df.plot(logx=input_log)
self._check_ax_scales(ax, xaxis=expected_log)
assert ax.get_xscale() == expected_log
ax = df.plot(loglog=input_log)
self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log)
assert ax.get_xscale() == expected_log
assert ax.get_yscale() == expected_log
@pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"])
def test_invalid_logscale(self, input_param):
# GH: 24867
df = DataFrame({"a": np.arange(100)}, index=np.arange(100))
msg = "Boolean, None and 'sym' are valid options, 'sm' is given."
with pytest.raises(ValueError, match=msg):
df.plot(**{input_param: "sm"})
def test_xcompat(self):
df = tm.makeTimeDataFrame()
ax = df.plot(x_compat=True)
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
self._check_ticks_props(ax, xrot=30)
tm.close()
plotting.plot_params["xaxis.compat"] = True
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
self._check_ticks_props(ax, xrot=30)
tm.close()
plotting.plot_params["x_compat"] = False
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
tm.close()
# useful if you're plotting a bunch together
with plotting.plot_params.use("x_compat", True):
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
self._check_ticks_props(ax, xrot=30)
tm.close()
ax = df.plot()
lines = ax.get_lines()
assert not isinstance(lines[0].get_xdata(), PeriodIndex)
assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex)
self._check_ticks_props(ax, xrot=0)
def test_period_compat(self):
# GH 9012
# period-array conversions
df = DataFrame(
np.random.rand(21, 2),
index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)),
columns=["a", "b"],
)
df.plot()
self.plt.axhline(y=0)
tm.close()
def test_unsorted_index(self):
df = DataFrame(
{"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64
)
ax = df.plot()
lines = ax.get_lines()[0]
rs = lines.get_xydata()
rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y")
tm.assert_series_equal(rs, df.y, check_index_type=False)
tm.close()
df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64)
ax = df.plot()
lines = ax.get_lines()[0]
rs = lines.get_xydata()
rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y")
tm.assert_series_equal(rs, df.y)
def test_unsorted_index_lims(self):
df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0])
ax = df.plot()
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= np.nanmin(lines[0].get_data()[0])
assert xmax >= np.nanmax(lines[0].get_data()[0])
df = DataFrame(
{"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]},
index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0],
)
ax = df.plot()
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= np.nanmin(lines[0].get_data()[0])
assert xmax >= np.nanmax(lines[0].get_data()[0])
df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]})
ax = df.plot(x="z", y="y")
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= np.nanmin(lines[0].get_data()[0])
assert xmax >= np.nanmax(lines[0].get_data()[0])
def test_negative_log(self):
df = -DataFrame(
np.random.rand(6, 4),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
msg = "Log-y scales are not supported in area plot"
with pytest.raises(ValueError, match=msg):
df.plot.area(logy=True)
with pytest.raises(ValueError, match=msg):
df.plot.area(loglog=True)
def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
base = np.zeros(len(normal_lines[0].get_data()[1]))
for nl, sl in zip(normal_lines, stacked_lines):
base += nl.get_data()[1] # get y coordinates
sy = sl.get_data()[1]
tm.assert_numpy_array_equal(base, sy)
@pytest.mark.parametrize("kind", ["line", "area"])
def test_line_area_stacked(self, kind):
with tm.RNGContext(42):
df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"])
neg_df = -df
# each column has either positive or negative value
sep_df = DataFrame(
{
"w": np.random.rand(6),
"x": np.random.rand(6),
"y": -np.random.rand(6),
"z": -np.random.rand(6),
}
)
# each column has positive-negative mixed value
mixed_df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["w", "x", "y", "z"],
)
ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)
ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)
ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
_check_plot_works(mixed_df.plot, stacked=False)
msg = (
"When stacked is True, each column must be either all positive or "
"all negative. Column 'w' contains both positive and negative "
"values"
)
with pytest.raises(ValueError, match=msg):
mixed_df.plot(stacked=True)
# Use an index with strictly positive values, preventing
# matplotlib from warning about ignoring xlim
df2 = df.set_index(df.index + 1)
_check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
def test_line_area_nan_df(self):
values1 = [1, 2, np.nan, 3]
values2 = [3, np.nan, 2, 1]
df = DataFrame({"a": values1, "b": values2})
tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4))
for d in [df, tdf]:
ax = _check_plot_works(d.plot)
masked1 = ax.lines[0].get_ydata()
masked2 = ax.lines[1].get_ydata()
# remove nan for comparison purpose
exp = np.array([1, 2, 3], dtype=np.float64)
tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp)
exp = np.array([3, 2, 1], dtype=np.float64)
tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp)
tm.assert_numpy_array_equal(
masked1.mask, np.array([False, False, True, False])
)
tm.assert_numpy_array_equal(
masked2.mask, np.array([False, True, False, False])
)
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
ax = _check_plot_works(d.plot, stacked=True)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
ax = _check_plot_works(d.plot.area)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
ax = _check_plot_works(d.plot.area, stacked=False)
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
def test_line_lim(self):
df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"])
ax = df.plot()
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
ax = df.plot(secondary_y=True)
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
axes = df.plot(secondary_y=True, subplots=True)
self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
for ax in axes:
assert hasattr(ax, "left_ax")
assert not hasattr(ax, "right_ax")
xmin, xmax = ax.get_xlim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
@pytest.mark.xfail(
strict=False,
reason="2020-12-01 this has been failing periodically on the "
"ymin==0 assertion for a week or so.",
)
@pytest.mark.parametrize("stacked", [True, False])
def test_area_lim(self, stacked):
df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"])
neg_df = -df
ax = _check_plot_works(df.plot.area, stacked=stacked)
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
lines = ax.get_lines()
assert xmin <= lines[0].get_data()[0][0]
assert xmax >= lines[0].get_data()[0][-1]
assert ymin == 0
ax = _check_plot_works(neg_df.plot.area, stacked=stacked)
ymin, ymax = ax.get_ylim()
assert ymax == 0
def test_area_sharey_dont_overwrite(self):
# GH37942
df = DataFrame(np.random.rand(4, 2), columns=["x", "y"])
fig, (ax1, ax2) = self.plt.subplots(1, 2, sharey=True)
df.plot(ax=ax1, kind="area")
df.plot(ax=ax2, kind="area")
assert self.get_y_axis(ax1).joined(ax1, ax2)
assert self.get_y_axis(ax2).joined(ax1, ax2)
def test_bar_linewidth(self):
df = DataFrame(np.random.randn(5, 5))
# regular
ax = df.plot.bar(linewidth=2)
for r in ax.patches:
assert r.get_linewidth() == 2
# stacked
ax = df.plot.bar(stacked=True, linewidth=2)
for r in ax.patches:
assert r.get_linewidth() == 2
# subplots
axes = df.plot.bar(linewidth=2, subplots=True)
self._check_axes_shape(axes, axes_num=5, layout=(5, 1))
for ax in axes:
for r in ax.patches:
assert r.get_linewidth() == 2
def test_bar_barwidth(self):
df = DataFrame(np.random.randn(5, 5))
width = 0.9
# regular
ax = df.plot.bar(width=width)
for r in ax.patches:
assert r.get_width() == width / len(df.columns)
# stacked
ax = df.plot.bar(stacked=True, width=width)
for r in ax.patches:
assert r.get_width() == width
# horizontal regular
ax = df.plot.barh(width=width)
for r in ax.patches:
assert r.get_height() == width / len(df.columns)
# horizontal stacked
ax = df.plot.barh(stacked=True, width=width)
for r in ax.patches:
assert r.get_height() == width
# subplots
axes = df.plot.bar(width=width, subplots=True)
for ax in axes:
for r in ax.patches:
assert r.get_width() == width
# horizontal subplots
axes = df.plot.barh(width=width, subplots=True)
for ax in axes:
for r in ax.patches:
assert r.get_height() == width
def test_bar_bottom_left(self):
df = DataFrame(np.random.rand(5, 5))
ax = df.plot.bar(stacked=False, bottom=1)
result = [p.get_y() for p in ax.patches]
assert result == [1] * 25
ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5])
result = [p.get_y() for p in ax.patches[:5]]
assert result == [-1, -2, -3, -4, -5]
ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1]))
result = [p.get_x() for p in ax.patches]
assert result == [1] * 25
ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5])
result = [p.get_x() for p in ax.patches[:5]]
assert result == [1, 2, 3, 4, 5]
axes = df.plot.bar(subplots=True, bottom=-1)
for ax in axes:
result = [p.get_y() for p in ax.patches]
assert result == [-1] * 5
axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1]))
for ax in axes:
result = [p.get_x() for p in ax.patches]
assert result == [1] * 5
def test_bar_nan(self):
df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]})
ax = df.plot.bar()
expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
result = [p.get_height() for p in ax.patches]
assert result == expected
ax = df.plot.bar(stacked=True)
result = [p.get_height() for p in ax.patches]
assert result == expected
result = [p.get_y() for p in ax.patches]
expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
assert result == expected
def test_bar_categorical(self):
# GH 13019
df1 = DataFrame(
np.random.randn(6, 5),
index=pd.Index(list("ABCDEF")),
columns=pd.Index(list("abcde")),
)
# categorical index must behave the same
df2 = DataFrame(
np.random.randn(6, 5),
index=pd.CategoricalIndex(list("ABCDEF")),
columns=pd.CategoricalIndex(list("abcde")),
)
for df in [df1, df2]:
ax = df.plot.bar()
ticks = ax.xaxis.get_ticklocs()
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
assert ax.get_xlim() == (-0.5, 5.5)
# check left-edge of bars
assert ax.patches[0].get_x() == -0.25
assert ax.patches[-1].get_x() == 5.15
ax = df.plot.bar(stacked=True)
tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5]))
assert ax.get_xlim() == (-0.5, 5.5)
assert ax.patches[0].get_x() == -0.25
assert ax.patches[-1].get_x() == 4.75
def test_plot_scatter(self):
df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
_check_plot_works(df.plot.scatter, x="x", y="y")
_check_plot_works(df.plot.scatter, x=1, y=2)
msg = re.escape("scatter() missing 1 required positional argument: 'y'")
with pytest.raises(TypeError, match=msg):
df.plot.scatter(x="x")
msg = re.escape("scatter() missing 1 required positional argument: 'x'")
with pytest.raises(TypeError, match=msg):
df.plot.scatter(y="y")
# GH 6951
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
def test_raise_error_on_datetime_time_data(self):
# GH 8113, datetime.time type is not supported by matplotlib in scatter
df = DataFrame(np.random.randn(10), columns=["a"])
df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time
msg = "must be a string or a (real )?number, not 'datetime.time'"
with pytest.raises(TypeError, match=msg):
df.plot(kind="scatter", x="dtime", y="a")
def test_scatterplot_datetime_data(self):
# GH 30391
dates = date_range(start=date(2019, 1, 1), periods=12, freq="W")
vals = np.random.normal(0, 1, len(dates))
df = DataFrame({"dates": dates, "vals": vals})
_check_plot_works(df.plot.scatter, x="dates", y="vals")
_check_plot_works(df.plot.scatter, x=0, y=1)
def test_scatterplot_object_data(self):
# GH 18755
df = DataFrame({"a": ["A", "B", "C"], "b": [2, 3, 4]})
_check_plot_works(df.plot.scatter, x="a", y="b")
_check_plot_works(df.plot.scatter, x=0, y=1)
df = DataFrame({"a": ["A", "B", "C"], "b": ["a", "b", "c"]})
_check_plot_works(df.plot.scatter, x="a", y="b")
_check_plot_works(df.plot.scatter, x=0, y=1)
@pytest.mark.parametrize("ordered", [True, False])
@pytest.mark.parametrize(
"categories",
(["setosa", "versicolor", "virginica"], ["versicolor", "virginica", "setosa"]),
)
def test_scatterplot_color_by_categorical(self, ordered, categories):
df = DataFrame(
[[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]],
columns=["length", "width"],
)
df["species"] = pd.Categorical(
["setosa", "setosa", "virginica", "virginica", "versicolor"],
ordered=ordered,
categories=categories,
)
ax = df.plot.scatter(x=0, y=1, c="species")
(colorbar_collection,) = ax.collections
colorbar = colorbar_collection.colorbar
expected_ticks = np.array([0.5, 1.5, 2.5])
result_ticks = colorbar.get_ticks()
tm.assert_numpy_array_equal(result_ticks, expected_ticks)
expected_boundaries = np.array([0.0, 1.0, 2.0, 3.0])
result_boundaries = colorbar._boundaries
tm.assert_numpy_array_equal(result_boundaries, expected_boundaries)
expected_yticklabels = categories
result_yticklabels = [i.get_text() for i in colorbar.ax.get_ymajorticklabels()]
assert all(i == j for i, j in zip(result_yticklabels, expected_yticklabels))
@pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")])
def test_plot_scatter_with_categorical_data(self, x, y):
# after fixing GH 18755, should be able to plot categorical data
df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])})
_check_plot_works(df.plot.scatter, x=x, y=y)
def test_plot_scatter_with_c(self):
from pandas.plotting._matplotlib.compat import mpl_ge_3_4_0
df = DataFrame(
np.random.randint(low=0, high=100, size=(6, 4)),
index=list(string.ascii_letters[:6]),
columns=["x", "y", "z", "four"],
)
axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)]
for ax in axes:
# default to Greys
assert ax.collections[0].cmap.name == "Greys"
if mpl_ge_3_4_0():
assert ax.collections[0].colorbar.ax.get_ylabel() == "z"
else:
assert ax.collections[0].colorbar._label == "z"
cm = "cubehelix"
ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm)
assert ax.collections[0].cmap.name == cm
# verify turning off colorbar works
ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False)
assert ax.collections[0].colorbar is None
# verify that we can still plot a solid color
ax = df.plot.scatter(x=0, y=1, c="red")
assert ax.collections[0].colorbar is None
self._check_colors(ax.collections, facecolors=["r"])
# Ensure that we can pass an np.array straight through to matplotlib,
# this functionality was accidentally removed previously.
# See https://github.com/pandas-dev/pandas/issues/8852 for bug report
#
# Exercise colormap path and non-colormap path as they are independent
#
df = DataFrame({"A": [1, 2], "B": [3, 4]})
red_rgba = [1.0, 0.0, 0.0, 1.0]
green_rgba = [0.0, 1.0, 0.0, 1.0]
rgba_array = np.array([red_rgba, green_rgba])
ax = df.plot.scatter(x="A", y="B", c=rgba_array)
# expect the face colors of the points in the non-colormap path to be
# identical to the values we supplied, normally we'd be on shaky ground
# comparing floats for equality but here we expect them to be
# identical.
tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array)
# we don't test the colors of the faces in this next plot because they
# are dependent on the spring colormap, which may change its colors
# later.
float_array = np.array([0.0, 1.0])
df.plot.scatter(x="A", y="B", c=float_array, cmap="spring")
def test_plot_scatter_with_s(self):
# this refers to GH 32904
df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"])
ax = df.plot.scatter(x="a", y="b", s="c")
tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes())
def test_plot_scatter_with_norm(self):
# added while fixing GH 45809
import matplotlib as mpl
df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"])
norm = mpl.colors.LogNorm()
ax = df.plot.scatter(x="a", y="b", c="c", norm=norm)
assert ax.collections[0].norm is norm
def test_plot_scatter_without_norm(self):
# added while fixing GH 45809
import matplotlib as mpl
df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"])
ax = df.plot.scatter(x="a", y="b", c="c")
plot_norm = ax.collections[0].norm
color_min_max = (df.c.min(), df.c.max())
default_norm = mpl.colors.Normalize(*color_min_max)
for value in df.c:
assert plot_norm(value) == default_norm(value)
@pytest.mark.slow
def test_plot_bar(self):
df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["one", "two", "three", "four"],
)
_check_plot_works(df.plot.bar)
_check_plot_works(df.plot.bar, legend=False)
_check_plot_works(df.plot.bar, default_axes=True, subplots=True)
_check_plot_works(df.plot.bar, stacked=True)
df = DataFrame(
np.random.randn(10, 15),
index=list(string.ascii_letters[:10]),
columns=range(15),
)
_check_plot_works(df.plot.bar)
df = DataFrame({"a": [0, 1], "b": [1, 0]})
ax = _check_plot_works(df.plot.bar)
self._check_ticks_props(ax, xrot=90)
ax = df.plot.bar(rot=35, fontsize=10)
self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10)
ax = _check_plot_works(df.plot.barh)
self._check_ticks_props(ax, yrot=0)
ax = df.plot.barh(rot=55, fontsize=11)
self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11)
def test_boxplot(self, hist_df):
df = hist_df
series = df["height"]
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
ax = _check_plot_works(df.plot.box)
self._check_text_labels(ax.get_xticklabels(), labels)
tm.assert_numpy_array_equal(
ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1)
)
assert len(ax.lines) == 7 * len(numeric_cols)
tm.close()
axes = series.plot.box(rot=40)
self._check_ticks_props(axes, xrot=40, yrot=0)
tm.close()
ax = _check_plot_works(series.plot.box)
positions = np.array([1, 6, 7])
ax = df.plot.box(positions=positions)
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
self._check_text_labels(ax.get_xticklabels(), labels)
tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
def test_boxplot_vertical(self, hist_df):
df = hist_df
numeric_cols = df._get_numeric_data().columns
labels = [pprint_thing(c) for c in numeric_cols]
# if horizontal, yticklabels are rotated
ax = df.plot.box(rot=50, fontsize=8, vert=False)
self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8)
self._check_text_labels(ax.get_yticklabels(), labels)
assert len(ax.lines) == 7 * len(numeric_cols)
axes = _check_plot_works(
df.plot.box,
default_axes=True,
subplots=True,
vert=False,
logx=True,
)
self._check_axes_shape(axes, axes_num=3, layout=(1, 3))
self._check_ax_scales(axes, xaxis="log")
for ax, label in zip(axes, labels):
self._check_text_labels(ax.get_yticklabels(), [label])
assert len(ax.lines) == 7
positions = np.array([3, 2, 8])
ax = df.plot.box(positions=positions, vert=False)
self._check_text_labels(ax.get_yticklabels(), labels)
tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions)
assert len(ax.lines) == 7 * len(numeric_cols)
def test_boxplot_return_type(self):
df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["one", "two", "three", "four"],
)
msg = "return_type must be {None, 'axes', 'dict', 'both'}"
with pytest.raises(ValueError, match=msg):
df.plot.box(return_type="not_a_type")
result = df.plot.box(return_type="dict")
self._check_box_return_type(result, "dict")
result = df.plot.box(return_type="axes")
self._check_box_return_type(result, "axes")
result = df.plot.box() # default axes
self._check_box_return_type(result, "axes")
result = df.plot.box(return_type="both")
self._check_box_return_type(result, "both")
@td.skip_if_no_scipy
def test_kde_df(self):
df = DataFrame(np.random.randn(100, 4))
ax = _check_plot_works(df.plot, kind="kde")
expected = [pprint_thing(c) for c in df.columns]
self._check_legend_labels(ax, labels=expected)
self._check_ticks_props(ax, xrot=0)
ax = df.plot(kind="kde", rot=20, fontsize=5)
self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5)
axes = _check_plot_works(
df.plot,
default_axes=True,
kind="kde",
subplots=True,
)
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
axes = df.plot(kind="kde", logy=True, subplots=True)
self._check_ax_scales(axes, yaxis="log")
@td.skip_if_no_scipy
def test_kde_missing_vals(self):
df = DataFrame(np.random.uniform(size=(100, 4)))
df.loc[0, 0] = np.nan
_check_plot_works(df.plot, kind="kde")
def test_hist_df(self):
from matplotlib.patches import Rectangle
df = DataFrame(np.random.randn(100, 4))
series = df[0]
ax = _check_plot_works(df.plot.hist)
expected = [pprint_thing(c) for c in df.columns]
self._check_legend_labels(ax, labels=expected)
axes = _check_plot_works(
df.plot.hist,
default_axes=True,
subplots=True,
logy=True,
)
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
self._check_ax_scales(axes, yaxis="log")
axes = series.plot.hist(rot=40)
self._check_ticks_props(axes, xrot=40, yrot=0)
tm.close()
ax = series.plot.hist(cumulative=True, bins=4, density=True)
# height of last bin (index 5) must be 1.0
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-1].get_height(), 1.0)
tm.close()
ax = series.plot.hist(cumulative=True, bins=4)
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-2].get_height(), 100.0)
tm.close()
# if horizontal, yticklabels are rotated
axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal")
self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)
@pytest.mark.parametrize(
"weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))]
)
def test_hist_weights(self, weights):
# GH 33173
np.random.seed(0)
df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100))))
ax1 = _check_plot_works(df.plot, kind="hist", weights=weights)
ax2 = _check_plot_works(df.plot, kind="hist")
patch_height_with_weights = [patch.get_height() for patch in ax1.patches]
# original heights with no weights, and we manually multiply with example
# weights, so after multiplication, they should be almost same
expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches]
tm.assert_almost_equal(patch_height_with_weights, expected_patch_height)
def _check_box_coord(
self,
patches,
expected_y=None,
expected_h=None,
expected_x=None,
expected_w=None,
):
result_y = np.array([p.get_y() for p in patches])
result_height = np.array([p.get_height() for p in patches])
result_x = np.array([p.get_x() for p in patches])
result_width = np.array([p.get_width() for p in patches])
# dtype is depending on above values, no need to check
if expected_y is not None:
tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False)
if expected_h is not None:
tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False)
if expected_x is not None:
tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False)
if expected_w is not None:
tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False)
def test_hist_df_coord(self):
normal_df = DataFrame(
{
"A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])),
"B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])),
"C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])),
},
columns=["A", "B", "C"],
)
nan_df = DataFrame(
{
"A": np.repeat(
np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6])
),
"B": np.repeat(
np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8])
),
"C": np.repeat(
np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10])
),
},
columns=["A", "B", "C"],
)
for df in [normal_df, nan_df]:
ax = df.plot.hist(bins=5)
self._check_box_coord(
ax.patches[:5],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
ax = df.plot.hist(bins=5, stacked=True)
self._check_box_coord(
ax.patches[:5],
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_y=np.array([10, 9, 8, 7, 6]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_y=np.array([18, 17, 16, 15, 14]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
axes = df.plot.hist(bins=5, stacked=True, subplots=True)
self._check_box_coord(
axes[0].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
axes[1].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
axes[2].patches,
expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]),
)
# horizontal
ax = df.plot.hist(bins=5, orientation="horizontal")
self._check_box_coord(
ax.patches[:5],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal")
self._check_box_coord(
ax.patches[:5],
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
ax.patches[5:10],
expected_x=np.array([10, 9, 8, 7, 6]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
ax.patches[10:],
expected_x=np.array([18, 17, 16, 15, 14]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
axes = df.plot.hist(
bins=5, stacked=True, subplots=True, orientation="horizontal"
)
self._check_box_coord(
axes[0].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]),
)
self._check_box_coord(
axes[1].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]),
)
self._check_box_coord(
axes[2].patches,
expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]),
)
def test_plot_int_columns(self):
df = DataFrame(np.random.randn(100, 4)).cumsum()
_check_plot_works(df.plot, legend=True)
def test_style_by_column(self):
import matplotlib.pyplot as plt
fig = plt.gcf()
df = DataFrame(np.random.randn(100, 3))
for markers in [
{0: "^", 1: "+", 2: "o"},
{0: "^", 1: "+"},
["^", "+", "o"],
["^", "+"],
]:
fig.clf()
fig.add_subplot(111)
ax = df.plot(style=markers)
for idx, line in enumerate(ax.get_lines()[: len(markers)]):
assert line.get_marker() == markers[idx]
def test_line_label_none(self):
s = Series([1, 2])
ax = s.plot()
assert ax.get_legend() is None
ax = s.plot(legend=True)
assert ax.get_legend().get_texts()[0].get_text() == ""
@pytest.mark.parametrize(
"props, expected",
[
("boxprops", "boxes"),
("whiskerprops", "whiskers"),
("capprops", "caps"),
("medianprops", "medians"),
],
)
def test_specified_props_kwd_plot_box(self, props, expected):
# GH 30346
df = DataFrame({k: np.random.random(100) for k in "ABC"})
kwd = {props: {"color": "C1"}}
result = df.plot.box(return_type="dict", **kwd)
assert result[expected][0].get_color() == "C1"
def test_unordered_ts(self):
df = DataFrame(
np.array([3.0, 2.0, 1.0]),
index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)],
columns=["test"],
)
ax = df.plot()
xticks = ax.lines[0].get_xdata()
assert xticks[0] < xticks[1]
ydata = ax.lines[0].get_ydata()
tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0]))
@td.skip_if_no_scipy
def test_kind_both_ways(self):
df = DataFrame({"x": [1, 2, 3]})
for kind in plotting.PlotAccessor._common_kinds:
df.plot(kind=kind)
getattr(df.plot, kind)()
for kind in ["scatter", "hexbin"]:
df.plot("x", "x", kind=kind)
getattr(df.plot, kind)("x", "x")
def test_all_invalid_plot_data(self):
df = DataFrame(list("abcd"))
for kind in plotting.PlotAccessor._common_kinds:
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)
def test_partially_invalid_plot_data(self):
with tm.RNGContext(42):
df = DataFrame(np.random.randn(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
for kind in plotting.PlotAccessor._common_kinds:
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)
with tm.RNGContext(42):
# area plot doesn't support positive/negative mixed data
df = DataFrame(np.random.rand(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
with pytest.raises(TypeError, match="no numeric data to plot"):
df.plot(kind="area")
def test_invalid_kind(self):
df = DataFrame(np.random.randn(10, 2))
msg = "invalid_plot_kind is not a valid plot kind"
with pytest.raises(ValueError, match=msg):
df.plot(kind="invalid_plot_kind")
@pytest.mark.parametrize(
"x,y,lbl",
[
(["B", "C"], "A", "a"),
(["A"], ["B", "C"], ["b", "c"]),
],
)
def test_invalid_xy_args(self, x, y, lbl):
# GH 18671, 19699 allows y to be list-like but not x
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
with pytest.raises(ValueError, match="x must be a label or position"):
df.plot(x=x, y=y, label=lbl)
def test_bad_label(self):
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
msg = "label should be list-like and same length as y"
with pytest.raises(ValueError, match=msg):
df.plot(x="A", y=["B", "C"], label="bad_label")
@pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")])
def test_invalid_xy_args_dup_cols(self, x, y):
# GH 18671, 19699 allows y to be list-like but not x
df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB"))
with pytest.raises(ValueError, match="x must be a label or position"):
df.plot(x=x, y=y)
@pytest.mark.parametrize(
"x,y,lbl,colors",
[
("A", ["B"], ["b"], ["red"]),
("A", ["B", "C"], ["b", "c"], ["red", "blue"]),
(0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]),
],
)
def test_y_listlike(self, x, y, lbl, colors):
# GH 19699: tests list-like y and verifies lbls & colors
df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
_check_plot_works(df.plot, x="A", y=y, label=lbl)
ax = df.plot(x=x, y=y, label=lbl, color=colors)
assert len(ax.lines) == len(y)
self._check_colors(ax.get_lines(), linecolors=colors)
@pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])])
def test_xy_args_integer(self, x, y, colnames):
# GH 20056: tests integer args for xy and checks col names
df = DataFrame({"A": [1, 2], "B": [3, 4]})
df.columns = colnames
_check_plot_works(df.plot, x=x, y=y)
def test_hexbin_basic(self):
df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", gridsize=10)
# TODO: need better way to test. This just does existence.
assert len(ax.collections) == 1
# GH 6951
axes = df.plot.hexbin(x="A", y="B", subplots=True)
# hexbin should have 2 axes in the figure, 1 for plotting and another
# is colorbar
assert len(axes[0].figure.axes) == 2
# return value is single axes
self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
def test_hexbin_with_c(self):
df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", C="C")
assert len(ax.collections) == 1
ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std)
assert len(ax.collections) == 1
@pytest.mark.parametrize(
"kwargs, expected",
[
({}, "BuGn"), # default cmap
({"colormap": "cubehelix"}, "cubehelix"),
({"cmap": "YlGn"}, "YlGn"),
],
)
def test_hexbin_cmap(self, kwargs, expected):
df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
ax = df.plot.hexbin(x="A", y="B", **kwargs)
assert ax.collections[0].cmap.name == expected
def test_pie_df(self):
df = DataFrame(
np.random.rand(5, 3),
columns=["X", "Y", "Z"],
index=["a", "b", "c", "d", "e"],
)
msg = "pie requires either y column or 'subplots=True'"
with pytest.raises(ValueError, match=msg):
df.plot.pie()
ax = _check_plot_works(df.plot.pie, y="Y")
self._check_text_labels(ax.texts, df.index)
ax = _check_plot_works(df.plot.pie, y=2)
self._check_text_labels(ax.texts, df.index)
axes = _check_plot_works(
df.plot.pie,
default_axes=True,
subplots=True,
)
assert len(axes) == len(df.columns)
for ax in axes:
self._check_text_labels(ax.texts, df.index)
for ax, ylabel in zip(axes, df.columns):
assert ax.get_ylabel() == ylabel
labels = ["A", "B", "C", "D", "E"]
color_args = ["r", "g", "b", "c", "m"]
axes = _check_plot_works(
df.plot.pie,
default_axes=True,
subplots=True,
labels=labels,
colors=color_args,
)
assert len(axes) == len(df.columns)
for ax in axes:
self._check_text_labels(ax.texts, labels)
self._check_colors(ax.patches, facecolors=color_args)
def test_pie_df_nan(self):
import matplotlib as mpl
df = DataFrame(np.random.rand(4, 4))
for i in range(4):
df.iloc[i, i] = np.nan
fig, axes = self.plt.subplots(ncols=4)
# GH 37668
kwargs = {}
if mpl.__version__ >= "3.3":
kwargs = {"normalize": True}
with tm.assert_produces_warning(None):
df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs)
base_expected = ["0", "1", "2", "3"]
for i, ax in enumerate(axes):
expected = list(base_expected) # force copy
expected[i] = ""
result = [x.get_text() for x in ax.texts]
assert result == expected
# legend labels
# NaN's not included in legend with subplots
# see https://github.com/pandas-dev/pandas/issues/8390
result_labels = [x.get_text() for x in ax.get_legend().get_texts()]
expected_labels = base_expected[:i] + base_expected[i + 1 :]
assert result_labels == expected_labels
@pytest.mark.slow
def test_errorbar_plot(self):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
df_err = DataFrame(d_err)
# check line plots
ax = _check_plot_works(df.plot, yerr=df_err, logy=True)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, yerr=df_err, loglog=True)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(
(df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True
)
self._check_has_errorbars(ax, xerr=2, yerr=2)
# yerr is raw error values
ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4)
self._check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)
self._check_has_errorbars(ax, xerr=0, yerr=2)
# yerr is column name
for yerr in ["yerr", "誤差"]:
s_df = df.copy()
s_df[yerr] = np.ones(12) * 0.2
ax = _check_plot_works(s_df.plot, yerr=yerr)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr)
self._check_has_errorbars(ax, xerr=0, yerr=1)
with tm.external_error_raised(ValueError):
df.plot(yerr=np.random.randn(11))
df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12})
with tm.external_error_raised(TypeError):
df.plot(yerr=df_err)
@pytest.mark.slow
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
def test_errorbar_plot_different_kinds(self, kind):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
df_err = DataFrame(d_err)
ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, yerr=d_err, kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
self._check_has_errorbars(ax, xerr=2, yerr=2)
ax = _check_plot_works(df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind)
self._check_has_errorbars(ax, xerr=2, yerr=2)
ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind)
self._check_has_errorbars(ax, xerr=2, yerr=2)
axes = _check_plot_works(
df.plot,
default_axes=True,
yerr=df_err,
xerr=df_err,
subplots=True,
kind=kind,
)
self._check_has_errorbars(axes, xerr=1, yerr=1)
@pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError)
def test_errorbar_plot_iterator(self):
with warnings.catch_warnings():
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
# yerr is iterator
ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df)))
self._check_has_errorbars(ax, xerr=0, yerr=2)
def test_errorbar_with_integer_column_names(self):
# test with integer column names
df = DataFrame(np.abs(np.random.randn(10, 2)))
df_err = DataFrame(np.abs(np.random.randn(10, 2)))
ax = _check_plot_works(df.plot, yerr=df_err)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(df.plot, y=0, yerr=1)
self._check_has_errorbars(ax, xerr=0, yerr=1)
@pytest.mark.slow
def test_errorbar_with_partial_columns(self):
df = DataFrame(np.abs(np.random.randn(10, 3)))
df_err = DataFrame(np.abs(np.random.randn(10, 2)), columns=[0, 2])
kinds = ["line", "bar"]
for kind in kinds:
ax = _check_plot_works(df.plot, yerr=df_err, kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ix = date_range("1/1/2000", periods=10, freq="M")
df.set_index(ix, inplace=True)
df_err.set_index(ix, inplace=True)
ax = _check_plot_works(df.plot, yerr=df_err, kind="line")
self._check_has_errorbars(ax, xerr=0, yerr=2)
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
df = DataFrame(d)
d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4}
df_err = DataFrame(d_err)
for err in [d_err, df_err]:
ax = _check_plot_works(df.plot, yerr=err)
self._check_has_errorbars(ax, xerr=0, yerr=1)
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
def test_errorbar_timeseries(self, kind):
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
# check time-series plots
ix = date_range("1/1/2000", "1/1/2001", freq="M")
tdf = DataFrame(d, index=ix)
tdf_err = DataFrame(d_err, index=ix)
ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
self._check_has_errorbars(ax, xerr=0, yerr=2)
axes = _check_plot_works(
tdf.plot,
default_axes=True,
kind=kind,
yerr=tdf_err,
subplots=True,
)
self._check_has_errorbars(axes, xerr=0, yerr=1)
def test_errorbar_asymmetrical(self):
np.random.seed(0)
err = np.random.rand(3, 2, 5)
# each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]...
df = DataFrame(np.arange(15).reshape(3, 5)).T
ax = df.plot(yerr=err, xerr=err / 2)
yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1]
expected_0_0 = err[0, :, 0] * np.array([-1, 1])
tm.assert_almost_equal(yerr_0_0, expected_0_0)
msg = re.escape(
"Asymmetrical error bars should be provided with the shape (3, 2, 5)"
)
with pytest.raises(ValueError, match=msg):
df.plot(yerr=err.T)
tm.close()
def test_table(self):
df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10]))
_check_plot_works(df.plot, table=True)
_check_plot_works(df.plot, table=df)
# GH 35945 UserWarning
with tm.assert_produces_warning(None):
ax = df.plot()
assert len(ax.tables) == 0
plotting.table(ax, df.T)
assert len(ax.tables) == 1
def test_errorbar_scatter(self):
df = DataFrame(
np.abs(np.random.randn(5, 2)), index=range(5), columns=["x", "y"]
)
df_err = DataFrame(
np.abs(np.random.randn(5, 2)) / 5, index=range(5), columns=["x", "y"]
)
ax = _check_plot_works(df.plot.scatter, x="x", y="y")
self._check_has_errorbars(ax, xerr=0, yerr=0)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err)
self._check_has_errorbars(ax, xerr=1, yerr=0)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err)
self._check_has_errorbars(ax, xerr=0, yerr=1)
ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err)
self._check_has_errorbars(ax, xerr=1, yerr=1)
def _check_errorbar_color(containers, expected, has_err="has_xerr"):
lines = []
errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0]
for el in errs:
if is_list_like(el):
lines.extend(el)
else:
lines.append(el)
err_lines = [x for x in lines if x in ax.collections]
self._check_colors(
err_lines, linecolors=np.array([expected] * len(err_lines))
)
# GH 8081
df = DataFrame(
np.abs(np.random.randn(10, 5)), columns=["a", "b", "c", "d", "e"]
)
ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red")
self._check_has_errorbars(ax, xerr=1, yerr=1)
_check_errorbar_color(ax.containers, "red", has_err="has_xerr")
_check_errorbar_color(ax.containers, "red", has_err="has_yerr")
ax = df.plot.scatter(x="a", y="b", yerr="e", color="green")
self._check_has_errorbars(ax, xerr=0, yerr=1)
_check_errorbar_color(ax.containers, "green", has_err="has_yerr")
def test_scatter_unknown_colormap(self):
# GH#48726
df = DataFrame({"a": [1, 2, 3], "b": 4})
with pytest.raises((ValueError, KeyError), match="'unknown' is not a"):
df.plot(x="a", y="b", colormap="unknown", kind="scatter")
def test_sharex_and_ax(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
# the axis in fig.get_axis() are sorted differently than pandas
# expected them, so make sure that only the right ones are removed
import matplotlib.pyplot as plt
plt.close("all")
gs, axes = _generate_4_axes_via_gridspec()
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
def _check(axes):
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[0], axes[2]]:
self._check_visible(ax.get_xticklabels(), visible=False)
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
for ax in [axes[1], axes[3]]:
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax, sharex=True)
gs.tight_layout(plt.gcf())
_check(axes)
tm.close()
gs, axes = _generate_4_axes_via_gridspec()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True)
_check(axes)
tm.close()
gs, axes = _generate_4_axes_via_gridspec()
# without sharex, no labels should be touched!
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax)
gs.tight_layout(plt.gcf())
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
def test_sharey_and_ax(self):
# https://github.com/pandas-dev/pandas/issues/9737 using gridspec,
# the axis in fig.get_axis() are sorted differently than pandas
# expected them, so make sure that only the right ones are removed
import matplotlib.pyplot as plt
gs, axes = _generate_4_axes_via_gridspec()
df = DataFrame(
{
"a": [1, 2, 3, 4, 5, 6],
"b": [1, 2, 3, 4, 5, 6],
"c": [1, 2, 3, 4, 5, 6],
"d": [1, 2, 3, 4, 5, 6],
}
)
def _check(axes):
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
for ax in [axes[0], axes[1]]:
self._check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[2], axes[3]]:
self._check_visible(ax.get_yticklabels(), visible=False)
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax, sharey=True)
gs.tight_layout(plt.gcf())
_check(axes)
tm.close()
gs, axes = _generate_4_axes_via_gridspec()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharey=True)
gs.tight_layout(plt.gcf())
_check(axes)
tm.close()
gs, axes = _generate_4_axes_via_gridspec()
# without sharex, no labels should be touched!
for ax in axes:
df.plot(x="a", y="b", title="title", ax=ax)
gs.tight_layout(plt.gcf())
for ax in axes:
assert len(ax.lines) == 1
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
@td.skip_if_no_scipy
def test_memory_leak(self):
"""Check that every plot type gets properly collected."""
import gc
import weakref
results = {}
for kind in plotting.PlotAccessor._all_kinds:
args = {}
if kind in ["hexbin", "scatter", "pie"]:
df = DataFrame(
{
"A": np.random.uniform(size=20),
"B": np.random.uniform(size=20),
"C": np.arange(20) + np.random.uniform(size=20),
}
)
args = {"x": "A", "y": "B"}
elif kind == "area":
df = tm.makeTimeDataFrame().abs()
else:
df = tm.makeTimeDataFrame()
# Use a weakref so we can see if the object gets collected without
# also preventing it from being collected
results[kind] = weakref.proxy(df.plot(kind=kind, **args))
# have matplotlib delete all the figures
tm.close()
# force a garbage collection
gc.collect()
msg = "weakly-referenced object no longer exists"
for key in results:
# check that every plot was collected
with pytest.raises(ReferenceError, match=msg):
# need to actually access something to get an error
results[key].lines
def test_df_gridspec_patterns(self):
# GH 10819
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10))
df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB"))
def _get_vertical_grid():
gs = gridspec.GridSpec(3, 1)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:2, :])
ax2 = fig.add_subplot(gs[2, :])
return ax1, ax2
def _get_horizontal_grid():
gs = gridspec.GridSpec(1, 3)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:, :2])
ax2 = fig.add_subplot(gs[:, 2])
return ax1, ax2
for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
ax1 = ts.plot(ax=ax1)
assert len(ax1.lines) == 1
ax2 = df.plot(ax=ax2)
assert len(ax2.lines) == 2
for ax in [ax1, ax2]:
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
# subplots=True
for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]:
axes = df.plot(subplots=True, ax=[ax1, ax2])
assert len(ax1.lines) == 1
assert len(ax2.lines) == 1
for ax in axes:
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
# vertical / subplots / sharex=True / sharey=True
ax1, ax2 = _get_vertical_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
assert len(axes[0].lines) == 1
assert len(axes[1].lines) == 1
for ax in [ax1, ax2]:
# yaxis are visible because there is only one column
self._check_visible(ax.get_yticklabels(), visible=True)
# xaxis of axes0 (top) are hidden
self._check_visible(axes[0].get_xticklabels(), visible=False)
self._check_visible(axes[0].get_xticklabels(minor=True), visible=False)
self._check_visible(axes[1].get_xticklabels(), visible=True)
self._check_visible(axes[1].get_xticklabels(minor=True), visible=True)
tm.close()
# horizontal / subplots / sharex=True / sharey=True
ax1, ax2 = _get_horizontal_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True)
assert len(axes[0].lines) == 1
assert len(axes[1].lines) == 1
self._check_visible(axes[0].get_yticklabels(), visible=True)
# yaxis of axes1 (right) are hidden
self._check_visible(axes[1].get_yticklabels(), visible=False)
for ax in [ax1, ax2]:
# xaxis are visible because there is only one column
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
# boxed
def _get_boxed_grid():
gs = gridspec.GridSpec(3, 3)
fig = plt.figure()
ax1 = fig.add_subplot(gs[:2, :2])
ax2 = fig.add_subplot(gs[:2, 2])
ax3 = fig.add_subplot(gs[2, :2])
ax4 = fig.add_subplot(gs[2, 2])
return ax1, ax2, ax3, ax4
axes = _get_boxed_grid()
df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD"))
axes = df.plot(subplots=True, ax=axes)
for ax in axes:
assert len(ax.lines) == 1
# axis are visible because these are not shared
self._check_visible(ax.get_yticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
# subplots / sharex=True / sharey=True
axes = _get_boxed_grid()
with tm.assert_produces_warning(UserWarning):
axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True)
for ax in axes:
assert len(ax.lines) == 1
for ax in [axes[0], axes[2]]: # left column
self._check_visible(ax.get_yticklabels(), visible=True)
for ax in [axes[1], axes[3]]: # right column
self._check_visible(ax.get_yticklabels(), visible=False)
for ax in [axes[0], axes[1]]: # top row
self._check_visible(ax.get_xticklabels(), visible=False)
self._check_visible(ax.get_xticklabels(minor=True), visible=False)
for ax in [axes[2], axes[3]]: # bottom row
self._check_visible(ax.get_xticklabels(), visible=True)
self._check_visible(ax.get_xticklabels(minor=True), visible=True)
tm.close()
def test_df_grid_settings(self):
# Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792
self._check_grid_settings(
DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}),
plotting.PlotAccessor._dataframe_kinds,
kws={"x": "a", "y": "b"},
)
def test_plain_axes(self):
# supplied ax itself is a SubplotAxes, but figure contains also
# a plain Axes object (GH11556)
fig, ax = self.plt.subplots()
fig.add_axes([0.2, 0.2, 0.2, 0.2])
Series(np.random.rand(10)).plot(ax=ax)
# supplied ax itself is a plain Axes, but because the cmap keyword
# a new ax is created for the colorbar -> also multiples axes (GH11520)
df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)})
fig = self.plt.figure()
ax = fig.add_axes((0, 0, 1, 1))
df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv")
# other examples
fig, ax = self.plt.subplots()
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
Series(np.random.rand(10)).plot(ax=ax)
Series(np.random.rand(10)).plot(ax=cax)
fig, ax = self.plt.subplots()
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
iax = inset_axes(ax, width="30%", height=1.0, loc=3)
Series(np.random.rand(10)).plot(ax=ax)
Series(np.random.rand(10)).plot(ax=iax)
@pytest.mark.parametrize("method", ["line", "barh", "bar"])
def test_secondary_axis_font_size(self, method):
# GH: 12565
df = (
DataFrame(np.random.randn(15, 2), columns=list("AB"))
.assign(C=lambda df: df.B.cumsum())
.assign(D=lambda df: df.C * 1.1)
)
fontsize = 20
sy = ["C", "D"]
kwargs = {"secondary_y": sy, "fontsize": fontsize, "mark_right": True}
ax = getattr(df.plot, method)(**kwargs)
self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize)
def test_x_string_values_ticks(self):
# Test if string plot index have a fixed xtick position
# GH: 7612, GH: 22334
df = DataFrame(
{
"sales": [3, 2, 3],
"visits": [20, 42, 28],
"day": ["Monday", "Tuesday", "Wednesday"],
}
)
ax = df.plot.area(x="day")
ax.set_xlim(-1, 3)
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
labels_position = dict(zip(xticklabels, ax.get_xticks()))
# Testing if the label stayed at the right position
assert labels_position["Monday"] == 0.0
assert labels_position["Tuesday"] == 1.0
assert labels_position["Wednesday"] == 2.0
def test_x_multiindex_values_ticks(self):
# Test if multiindex plot index have a fixed xtick position
# GH: 15912
index = MultiIndex.from_product([[2012, 2013], [1, 2]])
df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index)
ax = df.plot()
ax.set_xlim(-1, 4)
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
labels_position = dict(zip(xticklabels, ax.get_xticks()))
# Testing if the label stayed at the right position
assert labels_position["(2012, 1)"] == 0.0
assert labels_position["(2012, 2)"] == 1.0
assert labels_position["(2013, 1)"] == 2.0
assert labels_position["(2013, 2)"] == 3.0
@pytest.mark.parametrize("kind", ["line", "area"])
def test_xlim_plot_line(self, kind):
# test if xlim is set correctly in plot.line and plot.area
# GH 27686
df = DataFrame([2, 4], index=[1, 2])
ax = df.plot(kind=kind)
xlims = ax.get_xlim()
assert xlims[0] < 1
assert xlims[1] > 2
def test_xlim_plot_line_correctly_in_mixed_plot_type(self):
# test if xlim is set correctly when ax contains multiple different kinds
# of plots, GH 27686
fig, ax = self.plt.subplots()
indexes = ["k1", "k2", "k3", "k4"]
df = DataFrame(
{
"s1": [1000, 2000, 1500, 2000],
"s2": [900, 1400, 2000, 3000],
"s3": [1500, 1500, 1600, 1200],
"secondary_y": [1, 3, 4, 3],
},
index=indexes,
)
df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False)
df[["secondary_y"]].plot(ax=ax, secondary_y=True)
xlims = ax.get_xlim()
assert xlims[0] < 0
assert xlims[1] > 3
# make sure axis labels are plotted correctly as well
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
assert xticklabels == indexes
def test_plot_no_rows(self):
# GH 27758
df = DataFrame(columns=["foo"], dtype=int)
assert df.empty
ax = df.plot()
assert len(ax.get_lines()) == 1
line = ax.get_lines()[0]
assert len(line.get_xdata()) == 0
assert len(line.get_ydata()) == 0
def test_plot_no_numeric_data(self):
df = DataFrame(["a", "b", "c"])
with pytest.raises(TypeError, match="no numeric data to plot"):
df.plot()
@td.skip_if_no_scipy
@pytest.mark.parametrize(
"kind", ("line", "bar", "barh", "hist", "kde", "density", "area", "pie")
)
def test_group_subplot(self, kind):
d = {
"a": np.arange(10),
"b": np.arange(10) + 1,
"c": np.arange(10) + 1,
"d": np.arange(10),
"e": np.arange(10),
}
df = DataFrame(d)
axes = df.plot(subplots=[("b", "e"), ("c", "d")], kind=kind)
assert len(axes) == 3 # 2 groups + single column a
expected_labels = (["b", "e"], ["c", "d"], ["a"])
for ax, labels in zip(axes, expected_labels):
if kind != "pie":
self._check_legend_labels(ax, labels=labels)
if kind == "line":
assert len(ax.lines) == len(labels)
def test_group_subplot_series_notimplemented(self):
ser = Series(range(1))
msg = "An iterable subplots for a Series"
with pytest.raises(NotImplementedError, match=msg):
ser.plot(subplots=[("a",)])
def test_group_subplot_multiindex_notimplemented(self):
df = DataFrame(np.eye(2), columns=MultiIndex.from_tuples([(0, 1), (1, 2)]))
msg = "An iterable subplots for a DataFrame with a MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
df.plot(subplots=[(0, 1)])
def test_group_subplot_nonunique_cols_notimplemented(self):
df = DataFrame(np.eye(2), columns=["a", "a"])
msg = "An iterable subplots for a DataFrame with non-unique"
with pytest.raises(NotImplementedError, match=msg):
df.plot(subplots=[("a",)])
@pytest.mark.parametrize(
"subplots, expected_msg",
[
(123, "subplots should be a bool or an iterable"),
("a", "each entry should be a list/tuple"), # iterable of non-iterable
((1,), "each entry should be a list/tuple"), # iterable of non-iterable
(("a",), "each entry should be a list/tuple"), # iterable of strings
],
)
def test_group_subplot_bad_input(self, subplots, expected_msg):
# Make sure error is raised when subplots is not a properly
# formatted iterable. Only iterables of iterables are permitted, and
# entries should not be strings.
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match=expected_msg):
df.plot(subplots=subplots)
def test_group_subplot_invalid_column_name(self):
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match=r"Column label\(s\) \['bad_name'\]"):
df.plot(subplots=[("a", "bad_name")])
def test_group_subplot_duplicated_column(self):
d = {"a": np.arange(10), "b": np.arange(10), "c": np.arange(10)}
df = DataFrame(d)
with pytest.raises(ValueError, match="should be in only one subplot"):
df.plot(subplots=[("a", "b"), ("a", "c")])
@pytest.mark.parametrize("kind", ("box", "scatter", "hexbin"))
def test_group_subplot_invalid_kind(self, kind):
d = {"a": np.arange(10), "b": np.arange(10)}
df = DataFrame(d)
with pytest.raises(
ValueError, match="When subplots is an iterable, kind must be one of"
):
df.plot(subplots=[("a", "b")], kind=kind)
@pytest.mark.parametrize(
"index_name, old_label, new_label",
[
(None, "", "new"),
("old", "old", "new"),
(None, "", ""),
(None, "", 1),
(None, "", [1, 2]),
],
)
@pytest.mark.parametrize("kind", ["line", "area", "bar"])
def test_xlabel_ylabel_dataframe_single_plot(
self, kind, index_name, old_label, new_label
):
# GH 9093
df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"])
df.index.name = index_name
# default is the ylabel is not shown and xlabel is index name
ax = df.plot(kind=kind)
assert ax.get_xlabel() == old_label
assert ax.get_ylabel() == ""
# old xlabel will be overridden and assigned ylabel will be used as ylabel
ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label)
assert ax.get_ylabel() == str(new_label)
assert ax.get_xlabel() == str(new_label)
@pytest.mark.parametrize(
"xlabel, ylabel",
[
(None, None),
("X Label", None),
(None, "Y Label"),
("X Label", "Y Label"),
],
)
@pytest.mark.parametrize("kind", ["scatter", "hexbin"])
def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel):
# GH 37001
xcol = "Type A"
ycol = "Type B"
df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol])
# default is the labels are column names
ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel)
assert ax.get_xlabel() == (xcol if xlabel is None else xlabel)
assert ax.get_ylabel() == (ycol if ylabel is None else ylabel)
@pytest.mark.parametrize("secondary_y", (False, True))
def test_secondary_y(self, secondary_y):
ax_df = DataFrame([0]).plot(
secondary_y=secondary_y, ylabel="Y", ylim=(0, 100), yticks=[99]
)
for ax in ax_df.figure.axes:
if ax.yaxis.get_visible():
assert ax.get_ylabel() == "Y"
assert ax.get_ylim() == (0, 100)
assert ax.get_yticks()[0] == 99
def test_sort_columns_deprecated(self):
# GH 47563
df = DataFrame({"a": [1, 2], "b": [3, 4]})
with tm.assert_produces_warning(FutureWarning):
df.plot.box("a", sort_columns=True)
with tm.assert_produces_warning(FutureWarning):
df.plot.box(sort_columns=False)
with tm.assert_produces_warning(False):
df.plot.box("a")
def _generate_4_axes_via_gridspec():
import matplotlib as mpl
import matplotlib.gridspec
import matplotlib.pyplot as plt
gs = mpl.gridspec.GridSpec(2, 2)
ax_tl = plt.subplot(gs[0, 0])
ax_ll = plt.subplot(gs[1, 0])
ax_tr = plt.subplot(gs[0, 1])
ax_lr = plt.subplot(gs[1, 1])
return gs, [ax_tl, ax_ll, ax_tr, ax_lr]