232 lines
8.2 KiB
Python
232 lines
8.2 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas import (
|
|
DataFrame,
|
|
Series,
|
|
)
|
|
import pandas._testing as tm
|
|
from pandas.tests.copy_view.util import get_array
|
|
|
|
|
|
def test_copy(using_copy_on_write):
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_copy = df.copy()
|
|
|
|
# the deep copy doesn't share memory
|
|
assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
|
|
if using_copy_on_write:
|
|
assert df_copy._mgr.refs is None
|
|
|
|
# mutating copy doesn't mutate original
|
|
df_copy.iloc[0, 0] = 0
|
|
assert df.iloc[0, 0] == 1
|
|
|
|
|
|
def test_copy_shallow(using_copy_on_write):
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_copy = df.copy(deep=False)
|
|
|
|
# the shallow copy still shares memory
|
|
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
|
|
if using_copy_on_write:
|
|
assert df_copy._mgr.refs is not None
|
|
|
|
if using_copy_on_write:
|
|
# mutating shallow copy doesn't mutate original
|
|
df_copy.iloc[0, 0] = 0
|
|
assert df.iloc[0, 0] == 1
|
|
# mutating triggered a copy-on-write -> no longer shares memory
|
|
assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
|
|
# but still shares memory for the other columns/blocks
|
|
assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c"))
|
|
else:
|
|
# mutating shallow copy does mutate original
|
|
df_copy.iloc[0, 0] = 0
|
|
assert df.iloc[0, 0] == 0
|
|
# and still shares memory
|
|
assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# DataFrame methods returning new DataFrame using shallow copy
|
|
|
|
|
|
def test_reset_index(using_copy_on_write):
|
|
# Case: resetting the index (i.e. adding a new column) + mutating the
|
|
# resulting dataframe
|
|
df = DataFrame(
|
|
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12]
|
|
)
|
|
df_orig = df.copy()
|
|
df2 = df.reset_index()
|
|
df2._mgr._verify_integrity()
|
|
|
|
if using_copy_on_write:
|
|
# still shares memory (df2 is a shallow copy)
|
|
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
|
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
|
# mutating df2 triggers a copy-on-write for that column / block
|
|
df2.iloc[0, 2] = 0
|
|
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
|
tm.assert_frame_equal(df, df_orig)
|
|
|
|
|
|
def test_rename_columns(using_copy_on_write):
|
|
# Case: renaming columns returns a new dataframe
|
|
# + afterwards modifying the result
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_orig = df.copy()
|
|
df2 = df.rename(columns=str.upper)
|
|
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
|
|
df2.iloc[0, 0] = 0
|
|
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
|
|
expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]})
|
|
tm.assert_frame_equal(df2, expected)
|
|
tm.assert_frame_equal(df, df_orig)
|
|
|
|
|
|
def test_rename_columns_modify_parent(using_copy_on_write):
|
|
# Case: renaming columns returns a new dataframe
|
|
# + afterwards modifying the original (parent) dataframe
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df2 = df.rename(columns=str.upper)
|
|
df2_orig = df2.copy()
|
|
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
|
|
else:
|
|
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
|
|
df.iloc[0, 0] = 0
|
|
assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
|
|
expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
tm.assert_frame_equal(df, expected)
|
|
tm.assert_frame_equal(df2, df2_orig)
|
|
|
|
|
|
def test_reindex_columns(using_copy_on_write):
|
|
# Case: reindexing the column returns a new dataframe
|
|
# + afterwards modifying the result
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_orig = df.copy()
|
|
df2 = df.reindex(columns=["a", "c"])
|
|
|
|
if using_copy_on_write:
|
|
# still shares memory (df2 is a shallow copy)
|
|
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
else:
|
|
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
# mutating df2 triggers a copy-on-write for that column
|
|
df2.iloc[0, 0] = 0
|
|
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
|
tm.assert_frame_equal(df, df_orig)
|
|
|
|
|
|
def test_select_dtypes(using_copy_on_write):
|
|
# Case: selecting columns using `select_dtypes()` returns a new dataframe
|
|
# + afterwards modifying the result
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_orig = df.copy()
|
|
df2 = df.select_dtypes("int64")
|
|
df2._mgr._verify_integrity()
|
|
|
|
if using_copy_on_write:
|
|
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
else:
|
|
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
|
|
# mutating df2 triggers a copy-on-write for that column/block
|
|
df2.iloc[0, 0] = 0
|
|
if using_copy_on_write:
|
|
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
|
tm.assert_frame_equal(df, df_orig)
|
|
|
|
|
|
def test_to_frame(using_copy_on_write):
|
|
# Case: converting a Series to a DataFrame with to_frame
|
|
ser = Series([1, 2, 3])
|
|
ser_orig = ser.copy()
|
|
|
|
df = ser[:].to_frame()
|
|
|
|
# currently this always returns a "view"
|
|
assert np.shares_memory(ser.values, get_array(df, 0))
|
|
|
|
df.iloc[0, 0] = 0
|
|
|
|
if using_copy_on_write:
|
|
# mutating df triggers a copy-on-write for that column
|
|
assert not np.shares_memory(ser.values, get_array(df, 0))
|
|
tm.assert_series_equal(ser, ser_orig)
|
|
else:
|
|
# but currently select_dtypes() actually returns a view -> mutates parent
|
|
expected = ser_orig.copy()
|
|
expected.iloc[0] = 0
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
# modify original series -> don't modify dataframe
|
|
df = ser[:].to_frame()
|
|
ser.iloc[0] = 0
|
|
|
|
if using_copy_on_write:
|
|
tm.assert_frame_equal(df, ser_orig.to_frame())
|
|
else:
|
|
expected = ser_orig.copy().to_frame()
|
|
expected.iloc[0, 0] = 0
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"method, idx",
|
|
[
|
|
(lambda df: df.copy(deep=False).copy(deep=False), 0),
|
|
(lambda df: df.reset_index().reset_index(), 2),
|
|
(lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0),
|
|
(lambda df: df.copy(deep=False).select_dtypes(include="number"), 0),
|
|
],
|
|
ids=["shallow-copy", "reset_index", "rename", "select_dtypes"],
|
|
)
|
|
def test_chained_methods(request, method, idx, using_copy_on_write):
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
|
|
df_orig = df.copy()
|
|
|
|
# when not using CoW, only the copy() variant actually gives a view
|
|
df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy"
|
|
|
|
# modify df2 -> don't modify df
|
|
df2 = method(df)
|
|
df2.iloc[0, idx] = 0
|
|
if not df2_is_view:
|
|
tm.assert_frame_equal(df, df_orig)
|
|
|
|
# modify df -> don't modify df2
|
|
df2 = method(df)
|
|
df.iloc[0, 0] = 0
|
|
if not df2_is_view:
|
|
tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)
|
|
|
|
|
|
def test_putmask(using_copy_on_write):
|
|
df = DataFrame({"a": [1, 2], "b": 1, "c": 2})
|
|
view = df[:]
|
|
df_orig = df.copy()
|
|
df[df == df] = 5
|
|
|
|
if using_copy_on_write:
|
|
assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
|
|
tm.assert_frame_equal(view, df_orig)
|
|
else:
|
|
# Without CoW the original will be modified
|
|
assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
|
|
assert view.iloc[0, 0] == 5
|