ai-content-maker/.venv/Lib/site-packages/pandas/tests/libs/test_hashtable.py

from contextlib import contextmanager
import struct
import tracemalloc

import numpy as np
import pytest

from pandas._libs import hashtable as ht

import pandas as pd
import pandas._testing as tm
from pandas.core.algorithms import isin


@contextmanager
def activated_tracemalloc():
    tracemalloc.start()
    try:
        yield
    finally:
        tracemalloc.stop()


def get_allocated_khash_memory():
    snapshot = tracemalloc.take_snapshot()
    snapshot = snapshot.filter_traces(
        (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
    )
    return sum(map(lambda x: x.size, snapshot.traces))


@pytest.mark.parametrize(
    "table_type, dtype",
    [
        (ht.PyObjectHashTable, np.object_),
        (ht.Complex128HashTable, np.complex128),
        (ht.Int64HashTable, np.int64),
        (ht.UInt64HashTable, np.uint64),
        (ht.Float64HashTable, np.float64),
        (ht.Complex64HashTable, np.complex64),
        (ht.Int32HashTable, np.int32),
        (ht.UInt32HashTable, np.uint32),
        (ht.Float32HashTable, np.float32),
        (ht.Int16HashTable, np.int16),
        (ht.UInt16HashTable, np.uint16),
        (ht.Int8HashTable, np.int8),
        (ht.UInt8HashTable, np.uint8),
        (ht.IntpHashTable, np.intp),
    ],
)
class TestHashTable:
    def test_get_set_contains_len(self, table_type, dtype):
        index = 5
        table = table_type(55)
        assert len(table) == 0
        assert index not in table

        table.set_item(index, 42)
        assert len(table) == 1
        assert index in table
        assert table.get_item(index) == 42

        table.set_item(index + 1, 41)
        assert index in table
        assert index + 1 in table
        assert len(table) == 2
        assert table.get_item(index) == 42
        assert table.get_item(index + 1) == 41

        table.set_item(index, 21)
        assert index in table
        assert index + 1 in table
        assert len(table) == 2
        assert table.get_item(index) == 21
        assert table.get_item(index + 1) == 41
        assert index + 2 not in table

        with pytest.raises(KeyError, match=str(index + 2)):
            table.get_item(index + 2)

    def test_map_keys_to_values(self, table_type, dtype, writable):
        # only Int64HashTable has this method
        if table_type == ht.Int64HashTable:
            N = 77
            table = table_type()
            keys = np.arange(N).astype(dtype)
            vals = np.arange(N).astype(np.int64) + N
            keys.flags.writeable = writable
            vals.flags.writeable = writable
            table.map_keys_to_values(keys, vals)
            for i in range(N):
                assert table.get_item(keys[i]) == i + N

    def test_map_locations(self, table_type, dtype, writable):
        N = 8
        table = table_type()
        keys = (np.arange(N) + N).astype(dtype)
        keys.flags.writeable = writable
        table.map_locations(keys)
        for i in range(N):
            assert table.get_item(keys[i]) == i

    def test_lookup(self, table_type, dtype, writable):
        N = 3
        table = table_type()
        keys = (np.arange(N) + N).astype(dtype)
        keys.flags.writeable = writable
        table.map_locations(keys)
        result = table.lookup(keys)
        expected = np.arange(N)
        tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64))

    def test_lookup_wrong(self, table_type, dtype):
        if dtype in (np.int8, np.uint8):
            N = 100
        else:
            N = 512
        table = table_type()
        keys = (np.arange(N) + N).astype(dtype)
        table.map_locations(keys)
        wrong_keys = np.arange(N).astype(dtype)
        result = table.lookup(wrong_keys)
        assert np.all(result == -1)

    def test_unique(self, table_type, dtype, writable):
        if dtype in (np.int8, np.uint8):
            N = 88
        else:
            N = 1000
        table = table_type()
        expected = (np.arange(N) + N).astype(dtype)
        keys = np.repeat(expected, 5)
        keys.flags.writeable = writable
        unique = table.unique(keys)
        tm.assert_numpy_array_equal(unique, expected)

    def test_tracemalloc_works(self, table_type, dtype):
        if dtype in (np.int8, np.uint8):
            N = 256
        else:
            N = 30000
        keys = np.arange(N).astype(dtype)
        with activated_tracemalloc():
            table = table_type()
            table.map_locations(keys)
            used = get_allocated_khash_memory()
            my_size = table.sizeof()
            assert used == my_size
            del table
            assert get_allocated_khash_memory() == 0

    def test_tracemalloc_for_empty(self, table_type, dtype):
        with activated_tracemalloc():
            table = table_type()
            used = get_allocated_khash_memory()
            my_size = table.sizeof()
            assert used == my_size
            del table
            assert get_allocated_khash_memory() == 0

    def test_get_state(self, table_type, dtype):
        table = table_type(1000)
        state = table.get_state()
        assert state["size"] == 0
        assert state["n_occupied"] == 0
        assert "n_buckets" in state
        assert "upper_bound" in state

    @pytest.mark.parametrize("N", range(1, 110))
    def test_no_reallocation(self, table_type, dtype, N):
        keys = np.arange(N).astype(dtype)
        preallocated_table = table_type(N)
        n_buckets_start = preallocated_table.get_state()["n_buckets"]
        preallocated_table.map_locations(keys)
        n_buckets_end = preallocated_table.get_state()["n_buckets"]
        # original number of buckets was enough:
        assert n_buckets_start == n_buckets_end
        # check with clean table (not too much preallocated)
        clean_table = table_type()
        clean_table.map_locations(keys)
        assert n_buckets_start == clean_table.get_state()["n_buckets"]


class TestHashTableUnsorted:
    # TODO: moved from test_algos; may be redundancies with other tests
    def test_string_hashtable_set_item_signature(self):
        # GH#30419 fix typing in StringHashTable.set_item to prevent segfault
        tbl = ht.StringHashTable()

        tbl.set_item("key", 1)
        assert tbl.get_item("key") == 1

        with pytest.raises(TypeError, match="'key' has incorrect type"):
            # key arg typed as string, not object
            tbl.set_item(4, 6)
        with pytest.raises(TypeError, match="'val' has incorrect type"):
            tbl.get_item(4)

    def test_lookup_nan(self, writable):
        # GH#21688 ensure we can deal with readonly memory views
        xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
        xs.setflags(write=writable)
        m = ht.Float64HashTable()
        m.map_locations(xs)
        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))

    def test_add_signed_zeros(self):
        # GH#21866 inconsistent hash-function for float64
        # default hash-function would lead to different hash-buckets
        # for 0.0 and -0.0 if there are more than 2^30 hash-buckets
        # but this would mean 16GB
        N = 4  # 12 * 10**8 would trigger the error, if you have enough memory
        m = ht.Float64HashTable(N)
        m.set_item(0.0, 0)
        m.set_item(-0.0, 0)
        assert len(m) == 1  # 0.0 and -0.0 are equivalent

    def test_add_different_nans(self):
        # GH#21866 inconsistent hash-function for float64
        # create different nans from bit-patterns:
        NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]
        NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]
        assert NAN1 != NAN1
        assert NAN2 != NAN2
        # default hash function would lead to different hash-buckets
        # for NAN1 and NAN2 even if there are only 4 buckets:
        m = ht.Float64HashTable()
        m.set_item(NAN1, 0)
        m.set_item(NAN2, 0)
        assert len(m) == 1  # NAN1 and NAN2 are equivalent

    def test_lookup_overflow(self, writable):
        xs = np.array([1, 2, 2**63], dtype=np.uint64)
        # GH 21688 ensure we can deal with readonly memory views
        xs.setflags(write=writable)
        m = ht.UInt64HashTable()
        m.map_locations(xs)
        tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))

    @pytest.mark.parametrize("nvals", [0, 10])  # resizing to 0 is special case
    @pytest.mark.parametrize(
        "htable, uniques, dtype, safely_resizes",
        [
            (ht.PyObjectHashTable, ht.ObjectVector, "object", False),
            (ht.StringHashTable, ht.ObjectVector, "object", True),
            (ht.Float64HashTable, ht.Float64Vector, "float64", False),
            (ht.Int64HashTable, ht.Int64Vector, "int64", False),
            (ht.Int32HashTable, ht.Int32Vector, "int32", False),
            (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False),
        ],
    )
    def test_vector_resize(
        self, writable, htable, uniques, dtype, safely_resizes, nvals
    ):
        # Test for memory errors after internal vector
        # reallocations (GH 7157)
        # Changed from using np.random.rand to range
        # which could cause flaky CI failures when safely_resizes=False
        vals = np.array(range(1000), dtype=dtype)

        # GH 21688 ensures we can deal with read-only memory views
        vals.setflags(write=writable)

        # initialise instances; cannot initialise in parametrization,
        # as otherwise external views would be held on the array (which is
        # one of the things this test is checking)
        htable = htable()
        uniques = uniques()

        # get_labels may append to uniques
        htable.get_labels(vals[:nvals], uniques, 0, -1)
        # to_array() sets an external_view_exists flag on uniques.
        tmp = uniques.to_array()
        oldshape = tmp.shape

        # subsequent get_labels() calls can no longer append to it
        # (except for StringHashTables + ObjectVector)
        if safely_resizes:
            htable.get_labels(vals, uniques, 0, -1)
        else:
            with pytest.raises(ValueError, match="external reference.*"):
                htable.get_labels(vals, uniques, 0, -1)

        uniques.to_array()  # should not raise here
        assert tmp.shape == oldshape

    @pytest.mark.parametrize(
        "hashtable",
        [
            ht.PyObjectHashTable,
            ht.StringHashTable,
            ht.Float64HashTable,
            ht.Int64HashTable,
            ht.Int32HashTable,
            ht.UInt64HashTable,
        ],
    )
    def test_hashtable_large_sizehint(self, hashtable):
        # GH#22729 smoketest for not raising when passing a large size_hint
        size_hint = np.iinfo(np.uint32).max + 1
        hashtable(size_hint=size_hint)


class TestPyObjectHashTableWithNans:
    def test_nan_float(self):
        nan1 = float("nan")
        nan2 = float("nan")
        assert nan1 is not nan2
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42

    def test_nan_complex_both(self):
        nan1 = complex(float("nan"), float("nan"))
        nan2 = complex(float("nan"), float("nan"))
        assert nan1 is not nan2
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42

    def test_nan_complex_real(self):
        nan1 = complex(float("nan"), 1)
        nan2 = complex(float("nan"), 1)
        other = complex(float("nan"), 2)
        assert nan1 is not nan2
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42
        with pytest.raises(KeyError, match=None) as error:
            table.get_item(other)
        assert str(error.value) == str(other)

    def test_nan_complex_imag(self):
        nan1 = complex(1, float("nan"))
        nan2 = complex(1, float("nan"))
        other = complex(2, float("nan"))
        assert nan1 is not nan2
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42
        with pytest.raises(KeyError, match=None) as error:
            table.get_item(other)
        assert str(error.value) == str(other)

    def test_nan_in_tuple(self):
        nan1 = (float("nan"),)
        nan2 = (float("nan"),)
        assert nan1[0] is not nan2[0]
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42

    def test_nan_in_nested_tuple(self):
        nan1 = (1, (2, (float("nan"),)))
        nan2 = (1, (2, (float("nan"),)))
        other = (1, 2)
        table = ht.PyObjectHashTable()
        table.set_item(nan1, 42)
        assert table.get_item(nan2) == 42
        with pytest.raises(KeyError, match=None) as error:
            table.get_item(other)
        assert str(error.value) == str(other)


def test_hash_equal_tuple_with_nans():
    a = (float("nan"), (float("nan"), float("nan")))
    b = (float("nan"), (float("nan"), float("nan")))
    assert ht.object_hash(a) == ht.object_hash(b)
    assert ht.objects_are_equal(a, b)


def test_get_labels_groupby_for_Int64(writable):
    table = ht.Int64HashTable()
    vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)
    vals.flags.writeable = writable
    arr, unique = table.get_labels_groupby(vals)
    expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp)
    expected_unique = np.array([1, 2], dtype=np.int64)
    tm.assert_numpy_array_equal(arr, expected_arr)
    tm.assert_numpy_array_equal(unique, expected_unique)


def test_tracemalloc_works_for_StringHashTable():
    N = 1000
    keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
    with activated_tracemalloc():
        table = ht.StringHashTable()
        table.map_locations(keys)
        used = get_allocated_khash_memory()
        my_size = table.sizeof()
        assert used == my_size
        del table
        assert get_allocated_khash_memory() == 0


def test_tracemalloc_for_empty_StringHashTable():
    with activated_tracemalloc():
        table = ht.StringHashTable()
        used = get_allocated_khash_memory()
        my_size = table.sizeof()
        assert used == my_size
        del table
        assert get_allocated_khash_memory() == 0


@pytest.mark.parametrize("N", range(1, 110))
def test_no_reallocation_StringHashTable(N):
    keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
    preallocated_table = ht.StringHashTable(N)
    n_buckets_start = preallocated_table.get_state()["n_buckets"]
    preallocated_table.map_locations(keys)
    n_buckets_end = preallocated_table.get_state()["n_buckets"]
    # original number of buckets was enough:
    assert n_buckets_start == n_buckets_end
    # check with clean table (not too much preallocated)
    clean_table = ht.StringHashTable()
    clean_table.map_locations(keys)
    assert n_buckets_start == clean_table.get_state()["n_buckets"]


@pytest.mark.parametrize(
    "table_type, dtype",
    [
        (ht.Float64HashTable, np.float64),
        (ht.Float32HashTable, np.float32),
        (ht.Complex128HashTable, np.complex128),
        (ht.Complex64HashTable, np.complex64),
    ],
)
class TestHashTableWithNans:
    def test_get_set_contains_len(self, table_type, dtype):
        index = float("nan")
        table = table_type()
        assert index not in table

        table.set_item(index, 42)
        assert len(table) == 1
        assert index in table
        assert table.get_item(index) == 42

        table.set_item(index, 41)
        assert len(table) == 1
        assert index in table
        assert table.get_item(index) == 41

    def test_map_locations(self, table_type, dtype):
        N = 10
        table = table_type()
        keys = np.full(N, np.nan, dtype=dtype)
        table.map_locations(keys)
        assert len(table) == 1
        assert table.get_item(np.nan) == N - 1

    def test_unique(self, table_type, dtype):
        N = 1020
        table = table_type()
        keys = np.full(N, np.nan, dtype=dtype)
        unique = table.unique(keys)
        assert np.all(np.isnan(unique)) and len(unique) == 1


def test_unique_for_nan_objects_floats():
    table = ht.PyObjectHashTable()
    keys = np.array([float("nan") for i in range(50)], dtype=np.object_)
    unique = table.unique(keys)
    assert len(unique) == 1


def test_unique_for_nan_objects_complex():
    table = ht.PyObjectHashTable()
    keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_)
    unique = table.unique(keys)
    assert len(unique) == 1


def test_unique_for_nan_objects_tuple():
    table = ht.PyObjectHashTable()
    keys = np.array(
        [1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_
    )
    unique = table.unique(keys)
    assert len(unique) == 2


@pytest.mark.parametrize(
    "dtype",
    [
        np.object_,
        np.complex128,
        np.int64,
        np.uint64,
        np.float64,
        np.complex64,
        np.int32,
        np.uint32,
        np.float32,
        np.int16,
        np.uint16,
        np.int8,
        np.uint8,
        np.intp,
    ],
)
class TestHelpFunctions:
    def test_value_count(self, dtype, writable):
        N = 43
        expected = (np.arange(N) + N).astype(dtype)
        values = np.repeat(expected, 5)
        values.flags.writeable = writable
        keys, counts = ht.value_count(values, False)
        tm.assert_numpy_array_equal(np.sort(keys), expected)
        assert np.all(counts == 5)

    def test_value_count_stable(self, dtype, writable):
        # GH12679
        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
        values.flags.writeable = writable
        keys, counts = ht.value_count(values, False)
        tm.assert_numpy_array_equal(keys, values)
        assert np.all(counts == 1)

    def test_duplicated_first(self, dtype, writable):
        N = 100
        values = np.repeat(np.arange(N).astype(dtype), 5)
        values.flags.writeable = writable
        result = ht.duplicated(values)
        expected = np.ones_like(values, dtype=np.bool_)
        expected[::5] = False
        tm.assert_numpy_array_equal(result, expected)

    def test_ismember_yes(self, dtype, writable):
        N = 127
        arr = np.arange(N).astype(dtype)
        values = np.arange(N).astype(dtype)
        arr.flags.writeable = writable
        values.flags.writeable = writable
        result = ht.ismember(arr, values)
        expected = np.ones_like(values, dtype=np.bool_)
        tm.assert_numpy_array_equal(result, expected)

    def test_ismember_no(self, dtype):
        N = 17
        arr = np.arange(N).astype(dtype)
        values = (np.arange(N) + N).astype(dtype)
        result = ht.ismember(arr, values)
        expected = np.zeros_like(values, dtype=np.bool_)
        tm.assert_numpy_array_equal(result, expected)

    def test_mode(self, dtype, writable):
        if dtype in (np.int8, np.uint8):
            N = 53
        else:
            N = 11111
        values = np.repeat(np.arange(N).astype(dtype), 5)
        values[0] = 42
        values.flags.writeable = writable
        result = ht.mode(values, False)
        assert result == 42

    def test_mode_stable(self, dtype, writable):
        values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)
        values.flags.writeable = writable
        keys = ht.mode(values, False)
        tm.assert_numpy_array_equal(keys, values)


def test_modes_with_nans():
    # GH42688, nans aren't mangled
    nulls = [pd.NA, np.nan, pd.NaT, None]
    values = np.array([True] + nulls * 2, dtype=np.object_)
    modes = ht.mode(values, False)
    assert modes.size == len(nulls)


def test_unique_label_indices_intp(writable):
    keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
    keys.flags.writeable = writable
    result = ht.unique_label_indices(keys)
    expected = np.array([0, 1, 5], dtype=np.intp)
    tm.assert_numpy_array_equal(result, expected)


def test_unique_label_indices():

    a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)

    left = ht.unique_label_indices(a)
    right = np.unique(a, return_index=True)[1]

    tm.assert_numpy_array_equal(left, right, check_dtype=False)

    a[np.random.choice(len(a), 10)] = -1
    left = ht.unique_label_indices(a)
    right = np.unique(a, return_index=True)[1][1:]
    tm.assert_numpy_array_equal(left, right, check_dtype=False)


@pytest.mark.parametrize(
    "dtype",
    [
        np.float64,
        np.float32,
        np.complex128,
        np.complex64,
    ],
)
class TestHelpFunctionsWithNans:
    def test_value_count(self, dtype):
        values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
        keys, counts = ht.value_count(values, True)
        assert len(keys) == 0
        keys, counts = ht.value_count(values, False)
        assert len(keys) == 1 and np.all(np.isnan(keys))
        assert counts[0] == 3

    def test_duplicated_first(self, dtype):
        values = np.array([np.nan, np.nan, np.nan], dtype=dtype)
        result = ht.duplicated(values)
        expected = np.array([False, True, True])
        tm.assert_numpy_array_equal(result, expected)

    def test_ismember_yes(self, dtype):
        arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
        values = np.array([np.nan, np.nan], dtype=dtype)
        result = ht.ismember(arr, values)
        expected = np.array([True, True, True], dtype=np.bool_)
        tm.assert_numpy_array_equal(result, expected)

    def test_ismember_no(self, dtype):
        arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)
        values = np.array([1], dtype=dtype)
        result = ht.ismember(arr, values)
        expected = np.array([False, False, False], dtype=np.bool_)
        tm.assert_numpy_array_equal(result, expected)

    def test_mode(self, dtype):
        values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)
        assert ht.mode(values, True) == 42
        assert np.isnan(ht.mode(values, False))


def test_ismember_tuple_with_nans():
    # GH-41836
    values = [("a", float("nan")), ("b", 1)]
    comps = [("a", float("nan"))]
    result = isin(values, comps)
    expected = np.array([True, False], dtype=np.bool_)
    tm.assert_numpy_array_equal(result, expected)


def test_float_complex_int_are_equal_as_objects():
    values = ["a", 5, 5.0, 5.0 + 0j]
    comps = list(range(129))
    result = isin(values, comps)
    expected = np.array([False, True, True, True], dtype=np.bool_)
    tm.assert_numpy_array_equal(result, expected)
first commit 2024-05-03 04:18:51 +03:00			`from contextlib import contextmanager`
			`import struct`
			`import tracemalloc`

			`import numpy as np`
			`import pytest`

			`from pandas._libs import hashtable as ht`

			`import pandas as pd`
			`import pandas._testing as tm`
			`from pandas.core.algorithms import isin`


			`@contextmanager`
			`def activated_tracemalloc():`
			`tracemalloc.start()`
			`try:`
			`yield`
			`finally:`
			`tracemalloc.stop()`


			`def get_allocated_khash_memory():`
			`snapshot = tracemalloc.take_snapshot()`
			`snapshot = snapshot.filter_traces(`
			`(tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)`
			`)`
			`return sum(map(lambda x: x.size, snapshot.traces))`


			`@pytest.mark.parametrize(`
			`"table_type, dtype",`
			`[`
			`(ht.PyObjectHashTable, np.object_),`
			`(ht.Complex128HashTable, np.complex128),`
			`(ht.Int64HashTable, np.int64),`
			`(ht.UInt64HashTable, np.uint64),`
			`(ht.Float64HashTable, np.float64),`
			`(ht.Complex64HashTable, np.complex64),`
			`(ht.Int32HashTable, np.int32),`
			`(ht.UInt32HashTable, np.uint32),`
			`(ht.Float32HashTable, np.float32),`
			`(ht.Int16HashTable, np.int16),`
			`(ht.UInt16HashTable, np.uint16),`
			`(ht.Int8HashTable, np.int8),`
			`(ht.UInt8HashTable, np.uint8),`
			`(ht.IntpHashTable, np.intp),`
			`],`
			`)`
			`class TestHashTable:`
			`def test_get_set_contains_len(self, table_type, dtype):`
			`index = 5`
			`table = table_type(55)`
			`assert len(table) == 0`
			`assert index not in table`

			`table.set_item(index, 42)`
			`assert len(table) == 1`
			`assert index in table`
			`assert table.get_item(index) == 42`

			`table.set_item(index + 1, 41)`
			`assert index in table`
			`assert index + 1 in table`
			`assert len(table) == 2`
			`assert table.get_item(index) == 42`
			`assert table.get_item(index + 1) == 41`

			`table.set_item(index, 21)`
			`assert index in table`
			`assert index + 1 in table`
			`assert len(table) == 2`
			`assert table.get_item(index) == 21`
			`assert table.get_item(index + 1) == 41`
			`assert index + 2 not in table`

			`with pytest.raises(KeyError, match=str(index + 2)):`
			`table.get_item(index + 2)`

			`def test_map_keys_to_values(self, table_type, dtype, writable):`
			`# only Int64HashTable has this method`
			`if table_type == ht.Int64HashTable:`
			`N = 77`
			`table = table_type()`
			`keys = np.arange(N).astype(dtype)`
			`vals = np.arange(N).astype(np.int64) + N`
			`keys.flags.writeable = writable`
			`vals.flags.writeable = writable`
			`table.map_keys_to_values(keys, vals)`
			`for i in range(N):`
			`assert table.get_item(keys[i]) == i + N`

			`def test_map_locations(self, table_type, dtype, writable):`
			`N = 8`
			`table = table_type()`
			`keys = (np.arange(N) + N).astype(dtype)`
			`keys.flags.writeable = writable`
			`table.map_locations(keys)`
			`for i in range(N):`
			`assert table.get_item(keys[i]) == i`

			`def test_lookup(self, table_type, dtype, writable):`
			`N = 3`
			`table = table_type()`
			`keys = (np.arange(N) + N).astype(dtype)`
			`keys.flags.writeable = writable`
			`table.map_locations(keys)`
			`result = table.lookup(keys)`
			`expected = np.arange(N)`
			`tm.assert_numpy_array_equal(result.astype(np.int64), expected.astype(np.int64))`

			`def test_lookup_wrong(self, table_type, dtype):`
			`if dtype in (np.int8, np.uint8):`
			`N = 100`
			`else:`
			`N = 512`
			`table = table_type()`
			`keys = (np.arange(N) + N).astype(dtype)`
			`table.map_locations(keys)`
			`wrong_keys = np.arange(N).astype(dtype)`
			`result = table.lookup(wrong_keys)`
			`assert np.all(result == -1)`

			`def test_unique(self, table_type, dtype, writable):`
			`if dtype in (np.int8, np.uint8):`
			`N = 88`
			`else:`
			`N = 1000`
			`table = table_type()`
			`expected = (np.arange(N) + N).astype(dtype)`
			`keys = np.repeat(expected, 5)`
			`keys.flags.writeable = writable`
			`unique = table.unique(keys)`
			`tm.assert_numpy_array_equal(unique, expected)`

			`def test_tracemalloc_works(self, table_type, dtype):`
			`if dtype in (np.int8, np.uint8):`
			`N = 256`
			`else:`
			`N = 30000`
			`keys = np.arange(N).astype(dtype)`
			`with activated_tracemalloc():`
			`table = table_type()`
			`table.map_locations(keys)`
			`used = get_allocated_khash_memory()`
			`my_size = table.sizeof()`
			`assert used == my_size`
			`del table`
			`assert get_allocated_khash_memory() == 0`

			`def test_tracemalloc_for_empty(self, table_type, dtype):`
			`with activated_tracemalloc():`
			`table = table_type()`
			`used = get_allocated_khash_memory()`
			`my_size = table.sizeof()`
			`assert used == my_size`
			`del table`
			`assert get_allocated_khash_memory() == 0`

			`def test_get_state(self, table_type, dtype):`
			`table = table_type(1000)`
			`state = table.get_state()`
			`assert state["size"] == 0`
			`assert state["n_occupied"] == 0`
			`assert "n_buckets" in state`
			`assert "upper_bound" in state`

			`@pytest.mark.parametrize("N", range(1, 110))`
			`def test_no_reallocation(self, table_type, dtype, N):`
			`keys = np.arange(N).astype(dtype)`
			`preallocated_table = table_type(N)`
			`n_buckets_start = preallocated_table.get_state()["n_buckets"]`
			`preallocated_table.map_locations(keys)`
			`n_buckets_end = preallocated_table.get_state()["n_buckets"]`
			`# original number of buckets was enough:`
			`assert n_buckets_start == n_buckets_end`
			`# check with clean table (not too much preallocated)`
			`clean_table = table_type()`
			`clean_table.map_locations(keys)`
			`assert n_buckets_start == clean_table.get_state()["n_buckets"]`


			`class TestHashTableUnsorted:`
			`# TODO: moved from test_algos; may be redundancies with other tests`
			`def test_string_hashtable_set_item_signature(self):`
			`# GH#30419 fix typing in StringHashTable.set_item to prevent segfault`
			`tbl = ht.StringHashTable()`

			`tbl.set_item("key", 1)`
			`assert tbl.get_item("key") == 1`

			`with pytest.raises(TypeError, match="'key' has incorrect type"):`
			`# key arg typed as string, not object`
			`tbl.set_item(4, 6)`
			`with pytest.raises(TypeError, match="'val' has incorrect type"):`
			`tbl.get_item(4)`

			`def test_lookup_nan(self, writable):`
			`# GH#21688 ensure we can deal with readonly memory views`
			`xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])`
			`xs.setflags(write=writable)`
			`m = ht.Float64HashTable()`
			`m.map_locations(xs)`
			`tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))`

			`def test_add_signed_zeros(self):`
			`# GH#21866 inconsistent hash-function for float64`
			`# default hash-function would lead to different hash-buckets`
			`# for 0.0 and -0.0 if there are more than 2^30 hash-buckets`
			`# but this would mean 16GB`
			`N = 4 # 12 * 10**8 would trigger the error, if you have enough memory`
			`m = ht.Float64HashTable(N)`
			`m.set_item(0.0, 0)`
			`m.set_item(-0.0, 0)`
			`assert len(m) == 1 # 0.0 and -0.0 are equivalent`

			`def test_add_different_nans(self):`
			`# GH#21866 inconsistent hash-function for float64`
			`# create different nans from bit-patterns:`
			`NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0]`
			`NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0]`
			`assert NAN1 != NAN1`
			`assert NAN2 != NAN2`
			`# default hash function would lead to different hash-buckets`
			`# for NAN1 and NAN2 even if there are only 4 buckets:`
			`m = ht.Float64HashTable()`
			`m.set_item(NAN1, 0)`
			`m.set_item(NAN2, 0)`
			`assert len(m) == 1 # NAN1 and NAN2 are equivalent`

			`def test_lookup_overflow(self, writable):`
			`xs = np.array([1, 2, 2**63], dtype=np.uint64)`
			`# GH 21688 ensure we can deal with readonly memory views`
			`xs.setflags(write=writable)`
			`m = ht.UInt64HashTable()`
			`m.map_locations(xs)`
			`tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.intp))`

			`@pytest.mark.parametrize("nvals", [0, 10]) # resizing to 0 is special case`
			`@pytest.mark.parametrize(`
			`"htable, uniques, dtype, safely_resizes",`
			`[`
			`(ht.PyObjectHashTable, ht.ObjectVector, "object", False),`
			`(ht.StringHashTable, ht.ObjectVector, "object", True),`
			`(ht.Float64HashTable, ht.Float64Vector, "float64", False),`
			`(ht.Int64HashTable, ht.Int64Vector, "int64", False),`
			`(ht.Int32HashTable, ht.Int32Vector, "int32", False),`
			`(ht.UInt64HashTable, ht.UInt64Vector, "uint64", False),`
			`],`
			`)`
			`def test_vector_resize(`
			`self, writable, htable, uniques, dtype, safely_resizes, nvals`
			`):`
			`# Test for memory errors after internal vector`
			`# reallocations (GH 7157)`
			`# Changed from using np.random.rand to range`
			`# which could cause flaky CI failures when safely_resizes=False`
			`vals = np.array(range(1000), dtype=dtype)`

			`# GH 21688 ensures we can deal with read-only memory views`
			`vals.setflags(write=writable)`

			`# initialise instances; cannot initialise in parametrization,`
			`# as otherwise external views would be held on the array (which is`
			`# one of the things this test is checking)`
			`htable = htable()`
			`uniques = uniques()`

			`# get_labels may append to uniques`
			`htable.get_labels(vals[:nvals], uniques, 0, -1)`
			`# to_array() sets an external_view_exists flag on uniques.`
			`tmp = uniques.to_array()`
			`oldshape = tmp.shape`

			`# subsequent get_labels() calls can no longer append to it`
			`# (except for StringHashTables + ObjectVector)`
			`if safely_resizes:`
			`htable.get_labels(vals, uniques, 0, -1)`
			`else:`
			`with pytest.raises(ValueError, match="external reference.*"):`
			`htable.get_labels(vals, uniques, 0, -1)`

			`uniques.to_array() # should not raise here`
			`assert tmp.shape == oldshape`

			`@pytest.mark.parametrize(`
			`"hashtable",`
			`[`
			`ht.PyObjectHashTable,`
			`ht.StringHashTable,`
			`ht.Float64HashTable,`
			`ht.Int64HashTable,`
			`ht.Int32HashTable,`
			`ht.UInt64HashTable,`
			`],`
			`)`
			`def test_hashtable_large_sizehint(self, hashtable):`
			`# GH#22729 smoketest for not raising when passing a large size_hint`
			`size_hint = np.iinfo(np.uint32).max + 1`
			`hashtable(size_hint=size_hint)`


			`class TestPyObjectHashTableWithNans:`
			`def test_nan_float(self):`
			`nan1 = float("nan")`
			`nan2 = float("nan")`
			`assert nan1 is not nan2`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`

			`def test_nan_complex_both(self):`
			`nan1 = complex(float("nan"), float("nan"))`
			`nan2 = complex(float("nan"), float("nan"))`
			`assert nan1 is not nan2`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`

			`def test_nan_complex_real(self):`
			`nan1 = complex(float("nan"), 1)`
			`nan2 = complex(float("nan"), 1)`
			`other = complex(float("nan"), 2)`
			`assert nan1 is not nan2`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`
			`with pytest.raises(KeyError, match=None) as error:`
			`table.get_item(other)`
			`assert str(error.value) == str(other)`

			`def test_nan_complex_imag(self):`
			`nan1 = complex(1, float("nan"))`
			`nan2 = complex(1, float("nan"))`
			`other = complex(2, float("nan"))`
			`assert nan1 is not nan2`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`
			`with pytest.raises(KeyError, match=None) as error:`
			`table.get_item(other)`
			`assert str(error.value) == str(other)`

			`def test_nan_in_tuple(self):`
			`nan1 = (float("nan"),)`
			`nan2 = (float("nan"),)`
			`assert nan1[0] is not nan2[0]`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`

			`def test_nan_in_nested_tuple(self):`
			`nan1 = (1, (2, (float("nan"),)))`
			`nan2 = (1, (2, (float("nan"),)))`
			`other = (1, 2)`
			`table = ht.PyObjectHashTable()`
			`table.set_item(nan1, 42)`
			`assert table.get_item(nan2) == 42`
			`with pytest.raises(KeyError, match=None) as error:`
			`table.get_item(other)`
			`assert str(error.value) == str(other)`


			`def test_hash_equal_tuple_with_nans():`
			`a = (float("nan"), (float("nan"), float("nan")))`
			`b = (float("nan"), (float("nan"), float("nan")))`
			`assert ht.object_hash(a) == ht.object_hash(b)`
			`assert ht.objects_are_equal(a, b)`


			`def test_get_labels_groupby_for_Int64(writable):`
			`table = ht.Int64HashTable()`
			`vals = np.array([1, 2, -1, 2, 1, -1], dtype=np.int64)`
			`vals.flags.writeable = writable`
			`arr, unique = table.get_labels_groupby(vals)`
			`expected_arr = np.array([0, 1, -1, 1, 0, -1], dtype=np.intp)`
			`expected_unique = np.array([1, 2], dtype=np.int64)`
			`tm.assert_numpy_array_equal(arr, expected_arr)`
			`tm.assert_numpy_array_equal(unique, expected_unique)`


			`def test_tracemalloc_works_for_StringHashTable():`
			`N = 1000`
			`keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)`
			`with activated_tracemalloc():`
			`table = ht.StringHashTable()`
			`table.map_locations(keys)`
			`used = get_allocated_khash_memory()`
			`my_size = table.sizeof()`
			`assert used == my_size`
			`del table`
			`assert get_allocated_khash_memory() == 0`


			`def test_tracemalloc_for_empty_StringHashTable():`
			`with activated_tracemalloc():`
			`table = ht.StringHashTable()`
			`used = get_allocated_khash_memory()`
			`my_size = table.sizeof()`
			`assert used == my_size`
			`del table`
			`assert get_allocated_khash_memory() == 0`


			`@pytest.mark.parametrize("N", range(1, 110))`
			`def test_no_reallocation_StringHashTable(N):`
			`keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)`
			`preallocated_table = ht.StringHashTable(N)`
			`n_buckets_start = preallocated_table.get_state()["n_buckets"]`
			`preallocated_table.map_locations(keys)`
			`n_buckets_end = preallocated_table.get_state()["n_buckets"]`
			`# original number of buckets was enough:`
			`assert n_buckets_start == n_buckets_end`
			`# check with clean table (not too much preallocated)`
			`clean_table = ht.StringHashTable()`
			`clean_table.map_locations(keys)`
			`assert n_buckets_start == clean_table.get_state()["n_buckets"]`


			`@pytest.mark.parametrize(`
			`"table_type, dtype",`
			`[`
			`(ht.Float64HashTable, np.float64),`
			`(ht.Float32HashTable, np.float32),`
			`(ht.Complex128HashTable, np.complex128),`
			`(ht.Complex64HashTable, np.complex64),`
			`],`
			`)`
			`class TestHashTableWithNans:`
			`def test_get_set_contains_len(self, table_type, dtype):`
			`index = float("nan")`
			`table = table_type()`
			`assert index not in table`

			`table.set_item(index, 42)`
			`assert len(table) == 1`
			`assert index in table`
			`assert table.get_item(index) == 42`

			`table.set_item(index, 41)`
			`assert len(table) == 1`
			`assert index in table`
			`assert table.get_item(index) == 41`

			`def test_map_locations(self, table_type, dtype):`
			`N = 10`
			`table = table_type()`
			`keys = np.full(N, np.nan, dtype=dtype)`
			`table.map_locations(keys)`
			`assert len(table) == 1`
			`assert table.get_item(np.nan) == N - 1`

			`def test_unique(self, table_type, dtype):`
			`N = 1020`
			`table = table_type()`
			`keys = np.full(N, np.nan, dtype=dtype)`
			`unique = table.unique(keys)`
			`assert np.all(np.isnan(unique)) and len(unique) == 1`


			`def test_unique_for_nan_objects_floats():`
			`table = ht.PyObjectHashTable()`
			`keys = np.array([float("nan") for i in range(50)], dtype=np.object_)`
			`unique = table.unique(keys)`
			`assert len(unique) == 1`


			`def test_unique_for_nan_objects_complex():`
			`table = ht.PyObjectHashTable()`
			`keys = np.array([complex(float("nan"), 1.0) for i in range(50)], dtype=np.object_)`
			`unique = table.unique(keys)`
			`assert len(unique) == 1`


			`def test_unique_for_nan_objects_tuple():`
			`table = ht.PyObjectHashTable()`
			`keys = np.array(`
			`[1] + [(1.0, (float("nan"), 1.0)) for i in range(50)], dtype=np.object_`
			`)`
			`unique = table.unique(keys)`
			`assert len(unique) == 2`


			`@pytest.mark.parametrize(`
			`"dtype",`
			`[`
			`np.object_,`
			`np.complex128,`
			`np.int64,`
			`np.uint64,`
			`np.float64,`
			`np.complex64,`
			`np.int32,`
			`np.uint32,`
			`np.float32,`
			`np.int16,`
			`np.uint16,`
			`np.int8,`
			`np.uint8,`
			`np.intp,`
			`],`
			`)`
			`class TestHelpFunctions:`
			`def test_value_count(self, dtype, writable):`
			`N = 43`
			`expected = (np.arange(N) + N).astype(dtype)`
			`values = np.repeat(expected, 5)`
			`values.flags.writeable = writable`
			`keys, counts = ht.value_count(values, False)`
			`tm.assert_numpy_array_equal(np.sort(keys), expected)`
			`assert np.all(counts == 5)`

			`def test_value_count_stable(self, dtype, writable):`
			`# GH12679`
			`values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)`
			`values.flags.writeable = writable`
			`keys, counts = ht.value_count(values, False)`
			`tm.assert_numpy_array_equal(keys, values)`
			`assert np.all(counts == 1)`

			`def test_duplicated_first(self, dtype, writable):`
			`N = 100`
			`values = np.repeat(np.arange(N).astype(dtype), 5)`
			`values.flags.writeable = writable`
			`result = ht.duplicated(values)`
			`expected = np.ones_like(values, dtype=np.bool_)`
			`expected[::5] = False`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_ismember_yes(self, dtype, writable):`
			`N = 127`
			`arr = np.arange(N).astype(dtype)`
			`values = np.arange(N).astype(dtype)`
			`arr.flags.writeable = writable`
			`values.flags.writeable = writable`
			`result = ht.ismember(arr, values)`
			`expected = np.ones_like(values, dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_ismember_no(self, dtype):`
			`N = 17`
			`arr = np.arange(N).astype(dtype)`
			`values = (np.arange(N) + N).astype(dtype)`
			`result = ht.ismember(arr, values)`
			`expected = np.zeros_like(values, dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_mode(self, dtype, writable):`
			`if dtype in (np.int8, np.uint8):`
			`N = 53`
			`else:`
			`N = 11111`
			`values = np.repeat(np.arange(N).astype(dtype), 5)`
			`values[0] = 42`
			`values.flags.writeable = writable`
			`result = ht.mode(values, False)`
			`assert result == 42`

			`def test_mode_stable(self, dtype, writable):`
			`values = np.array([2, 1, 5, 22, 3, -1, 8]).astype(dtype)`
			`values.flags.writeable = writable`
			`keys = ht.mode(values, False)`
			`tm.assert_numpy_array_equal(keys, values)`


			`def test_modes_with_nans():`
			`# GH42688, nans aren't mangled`
			`nulls = [pd.NA, np.nan, pd.NaT, None]`
			`values = np.array([True] + nulls * 2, dtype=np.object_)`
			`modes = ht.mode(values, False)`
			`assert modes.size == len(nulls)`


			`def test_unique_label_indices_intp(writable):`
			`keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)`
			`keys.flags.writeable = writable`
			`result = ht.unique_label_indices(keys)`
			`expected = np.array([0, 1, 5], dtype=np.intp)`
			`tm.assert_numpy_array_equal(result, expected)`


			`def test_unique_label_indices():`

			`a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)`

			`left = ht.unique_label_indices(a)`
			`right = np.unique(a, return_index=True)[1]`

			`tm.assert_numpy_array_equal(left, right, check_dtype=False)`

			`a[np.random.choice(len(a), 10)] = -1`
			`left = ht.unique_label_indices(a)`
			`right = np.unique(a, return_index=True)[1][1:]`
			`tm.assert_numpy_array_equal(left, right, check_dtype=False)`


			`@pytest.mark.parametrize(`
			`"dtype",`
			`[`
			`np.float64,`
			`np.float32,`
			`np.complex128,`
			`np.complex64,`
			`],`
			`)`
			`class TestHelpFunctionsWithNans:`
			`def test_value_count(self, dtype):`
			`values = np.array([np.nan, np.nan, np.nan], dtype=dtype)`
			`keys, counts = ht.value_count(values, True)`
			`assert len(keys) == 0`
			`keys, counts = ht.value_count(values, False)`
			`assert len(keys) == 1 and np.all(np.isnan(keys))`
			`assert counts[0] == 3`

			`def test_duplicated_first(self, dtype):`
			`values = np.array([np.nan, np.nan, np.nan], dtype=dtype)`
			`result = ht.duplicated(values)`
			`expected = np.array([False, True, True])`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_ismember_yes(self, dtype):`
			`arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)`
			`values = np.array([np.nan, np.nan], dtype=dtype)`
			`result = ht.ismember(arr, values)`
			`expected = np.array([True, True, True], dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_ismember_no(self, dtype):`
			`arr = np.array([np.nan, np.nan, np.nan], dtype=dtype)`
			`values = np.array([1], dtype=dtype)`
			`result = ht.ismember(arr, values)`
			`expected = np.array([False, False, False], dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`

			`def test_mode(self, dtype):`
			`values = np.array([42, np.nan, np.nan, np.nan], dtype=dtype)`
			`assert ht.mode(values, True) == 42`
			`assert np.isnan(ht.mode(values, False))`


			`def test_ismember_tuple_with_nans():`
			`# GH-41836`
			`values = [("a", float("nan")), ("b", 1)]`
			`comps = [("a", float("nan"))]`
			`result = isin(values, comps)`
			`expected = np.array([True, False], dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`


			`def test_float_complex_int_are_equal_as_objects():`
			`values = ["a", 5, 5.0, 5.0 + 0j]`
			`comps = list(range(129))`
			`result = isin(values, comps)`
			`expected = np.array([False, True, True, True], dtype=np.bool_)`
			`tm.assert_numpy_array_equal(result, expected)`