ai-content-maker/.venv/Lib/site-packages/pandas/tests/extension/json/array.py

"""
Test extension array for storing nested data in a pandas container.

The JSONArray stores lists of dictionaries. The storage mechanism is a list,
not an ndarray.

Note
----
We currently store lists of UserDicts. Pandas has a few places
internally that specifically check for dicts, and does non-scalar things
in that case. We *want* the dictionaries to be treated as scalars, so we
hack around pandas by using UserDicts.
"""
from __future__ import annotations

from collections import (
    UserDict,
    abc,
)
import itertools
import numbers
import random
import string
import sys
from typing import (
    Any,
    Mapping,
)

import numpy as np

from pandas._typing import type_t

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
    is_bool_dtype,
    is_list_like,
    pandas_dtype,
)

import pandas as pd
from pandas.api.extensions import (
    ExtensionArray,
    ExtensionDtype,
)
from pandas.core.indexers import unpack_tuple_and_ellipses


class JSONDtype(ExtensionDtype):
    type = abc.Mapping
    name = "json"
    na_value: Mapping[str, Any] = UserDict()

    @classmethod
    def construct_array_type(cls) -> type_t[JSONArray]:
        """
        Return the array type associated with this dtype.

        Returns
        -------
        type
        """
        return JSONArray


class JSONArray(ExtensionArray):
    dtype = JSONDtype()
    __array_priority__ = 1000

    def __init__(self, values, dtype=None, copy=False) -> None:
        for val in values:
            if not isinstance(val, self.dtype.type):
                raise TypeError("All values must be of type " + str(self.dtype.type))
        self.data = values

        # Some aliases for common attribute names to ensure pandas supports
        # these
        self._items = self._data = self.data
        # those aliases are currently not working due to assumptions
        # in internal code (GH-20735)
        # self._values = self.values = self.data

    @classmethod
    def _from_sequence(cls, scalars, dtype=None, copy=False):
        return cls(scalars)

    @classmethod
    def _from_factorized(cls, values, original):
        return cls([UserDict(x) for x in values if x != ()])

    def __getitem__(self, item):
        if isinstance(item, tuple):
            item = unpack_tuple_and_ellipses(item)

        if isinstance(item, numbers.Integral):
            return self.data[item]
        elif isinstance(item, slice) and item == slice(None):
            # Make sure we get a view
            return type(self)(self.data)
        elif isinstance(item, slice):
            # slice
            return type(self)(self.data[item])
        elif not is_list_like(item):
            # e.g. "foo" or 2.5
            # exception message copied from numpy
            raise IndexError(
                r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
                r"(`None`) and integer or boolean arrays are valid indices"
            )
        else:
            item = pd.api.indexers.check_array_indexer(self, item)
            if is_bool_dtype(item.dtype):
                return self._from_sequence([x for x, m in zip(self, item) if m])
            # integer
            return type(self)([self.data[i] for i in item])

    def __setitem__(self, key, value):
        if isinstance(key, numbers.Integral):
            self.data[key] = value
        else:
            if not isinstance(value, (type(self), abc.Sequence)):
                # broadcast value
                value = itertools.cycle([value])

            if isinstance(key, np.ndarray) and key.dtype == "bool":
                # masking
                for i, (k, v) in enumerate(zip(key, value)):
                    if k:
                        assert isinstance(v, self.dtype.type)
                        self.data[i] = v
            else:
                for k, v in zip(key, value):
                    assert isinstance(v, self.dtype.type)
                    self.data[k] = v

    def __len__(self) -> int:
        return len(self.data)

    def __eq__(self, other):
        return NotImplemented

    def __ne__(self, other):
        return NotImplemented

    def __array__(self, dtype=None):
        if dtype is None:
            dtype = object
        if dtype == object:
            # on py38 builds it looks like numpy is inferring to a non-1D array
            return construct_1d_object_array_from_listlike(list(self))
        return np.asarray(self.data, dtype=dtype)

    @property
    def nbytes(self) -> int:
        return sys.getsizeof(self.data)

    def isna(self):
        return np.array([x == self.dtype.na_value for x in self.data], dtype=bool)

    def take(self, indexer, allow_fill=False, fill_value=None):
        # re-implement here, since NumPy has trouble setting
        # sized objects like UserDicts into scalar slots of
        # an ndarary.
        indexer = np.asarray(indexer)
        msg = (
            "Index is out of bounds or cannot do a "
            "non-empty take from an empty array."
        )

        if allow_fill:
            if fill_value is None:
                fill_value = self.dtype.na_value
            # bounds check
            if (indexer < -1).any():
                raise ValueError
            try:
                output = [
                    self.data[loc] if loc != -1 else fill_value for loc in indexer
                ]
            except IndexError as err:
                raise IndexError(msg) from err
        else:
            try:
                output = [self.data[loc] for loc in indexer]
            except IndexError as err:
                raise IndexError(msg) from err

        return self._from_sequence(output)

    def copy(self):
        return type(self)(self.data[:])

    def astype(self, dtype, copy=True):
        # NumPy has issues when all the dicts are the same length.
        # np.array([UserDict(...), UserDict(...)]) fails,
        # but np.array([{...}, {...}]) works, so cast.
        from pandas.core.arrays.string_ import StringDtype

        dtype = pandas_dtype(dtype)
        # needed to add this check for the Series constructor
        if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:
            if copy:
                return self.copy()
            return self
        elif isinstance(dtype, StringDtype):
            value = self.astype(str)  # numpy doesn'y like nested dicts
            return dtype.construct_array_type()._from_sequence(value, copy=False)

        return np.array([dict(x) for x in self], dtype=dtype, copy=copy)

    def unique(self):
        # Parent method doesn't work since np.array will try to infer
        # a 2-dim object.
        return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}])

    @classmethod
    def _concat_same_type(cls, to_concat):
        data = list(itertools.chain.from_iterable(x.data for x in to_concat))
        return cls(data)

    def _values_for_factorize(self):
        frozen = self._values_for_argsort()
        if len(frozen) == 0:
            # factorize_array expects 1-d array, this is a len-0 2-d array.
            frozen = frozen.ravel()
        return frozen, ()

    def _values_for_argsort(self):
        # Bypass NumPy's shape inference to get a (N,) array of tuples.
        frozen = [tuple(x.items()) for x in self]
        return construct_1d_object_array_from_listlike(frozen)


def make_data():
    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
    return [
        UserDict(
            [
                (random.choice(string.ascii_letters), random.randint(0, 100))
                for _ in range(random.randint(0, 10))
            ]
        )
        for _ in range(100)
    ]
first commit 2024-05-03 04:18:51 +03:00			`"""`
			`Test extension array for storing nested data in a pandas container.`

			`The JSONArray stores lists of dictionaries. The storage mechanism is a list,`
			`not an ndarray.`

			`Note`
			`----`
			`We currently store lists of UserDicts. Pandas has a few places`
			`internally that specifically check for dicts, and does non-scalar things`
			`in that case. We want the dictionaries to be treated as scalars, so we`
			`hack around pandas by using UserDicts.`
			`"""`
			`from __future__ import annotations`

			`from collections import (`
			`UserDict,`
			`abc,`
			`)`
			`import itertools`
			`import numbers`
			`import random`
			`import string`
			`import sys`
			`from typing import (`
			`Any,`
			`Mapping,`
			`)`

			`import numpy as np`

			`from pandas._typing import type_t`

			`from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike`
			`from pandas.core.dtypes.common import (`
			`is_bool_dtype,`
			`is_list_like,`
			`pandas_dtype,`
			`)`

			`import pandas as pd`
			`from pandas.api.extensions import (`
			`ExtensionArray,`
			`ExtensionDtype,`
			`)`
			`from pandas.core.indexers import unpack_tuple_and_ellipses`


			`class JSONDtype(ExtensionDtype):`
			`type = abc.Mapping`
			`name = "json"`
			`na_value: Mapping[str, Any] = UserDict()`

			`@classmethod`
			`def construct_array_type(cls) -> type_t[JSONArray]:`
			`"""`
			`Return the array type associated with this dtype.`

			`Returns`
			`-------`
			`type`
			`"""`
			`return JSONArray`


			`class JSONArray(ExtensionArray):`
			`dtype = JSONDtype()`
			`__array_priority__ = 1000`

			`def __init__(self, values, dtype=None, copy=False) -> None:`
			`for val in values:`
			`if not isinstance(val, self.dtype.type):`
			`raise TypeError("All values must be of type " + str(self.dtype.type))`
			`self.data = values`

			`# Some aliases for common attribute names to ensure pandas supports`
			`# these`
			`self._items = self._data = self.data`
			`# those aliases are currently not working due to assumptions`
			`# in internal code (GH-20735)`
			`# self._values = self.values = self.data`

			`@classmethod`
			`def _from_sequence(cls, scalars, dtype=None, copy=False):`
			`return cls(scalars)`

			`@classmethod`
			`def _from_factorized(cls, values, original):`
			`return cls([UserDict(x) for x in values if x != ()])`

			`def __getitem__(self, item):`
			`if isinstance(item, tuple):`
			`item = unpack_tuple_and_ellipses(item)`

			`if isinstance(item, numbers.Integral):`
			`return self.data[item]`
			`elif isinstance(item, slice) and item == slice(None):`
			`# Make sure we get a view`
			`return type(self)(self.data)`
			`elif isinstance(item, slice):`
			`# slice`
			`return type(self)(self.data[item])`
			`elif not is_list_like(item):`
			`# e.g. "foo" or 2.5`
			`# exception message copied from numpy`
			`raise IndexError(`
			r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
			r"(`None`) and integer or boolean arrays are valid indices"
			`)`
			`else:`
			`item = pd.api.indexers.check_array_indexer(self, item)`
			`if is_bool_dtype(item.dtype):`
			`return self._from_sequence([x for x, m in zip(self, item) if m])`
			`# integer`
			`return type(self)([self.data[i] for i in item])`

			`def __setitem__(self, key, value):`
			`if isinstance(key, numbers.Integral):`
			`self.data[key] = value`
			`else:`
			`if not isinstance(value, (type(self), abc.Sequence)):`
			`# broadcast value`
			`value = itertools.cycle([value])`

			`if isinstance(key, np.ndarray) and key.dtype == "bool":`
			`# masking`
			`for i, (k, v) in enumerate(zip(key, value)):`
			`if k:`
			`assert isinstance(v, self.dtype.type)`
			`self.data[i] = v`
			`else:`
			`for k, v in zip(key, value):`
			`assert isinstance(v, self.dtype.type)`
			`self.data[k] = v`

			`def __len__(self) -> int:`
			`return len(self.data)`

			`def __eq__(self, other):`
			`return NotImplemented`

			`def __ne__(self, other):`
			`return NotImplemented`

			`def __array__(self, dtype=None):`
			`if dtype is None:`
			`dtype = object`
			`if dtype == object:`
			`# on py38 builds it looks like numpy is inferring to a non-1D array`
			`return construct_1d_object_array_from_listlike(list(self))`
			`return np.asarray(self.data, dtype=dtype)`

			`@property`
			`def nbytes(self) -> int:`
			`return sys.getsizeof(self.data)`

			`def isna(self):`
			`return np.array([x == self.dtype.na_value for x in self.data], dtype=bool)`

			`def take(self, indexer, allow_fill=False, fill_value=None):`
			`# re-implement here, since NumPy has trouble setting`
			`# sized objects like UserDicts into scalar slots of`
			`# an ndarary.`
			`indexer = np.asarray(indexer)`
			`msg = (`
			`"Index is out of bounds or cannot do a "`
			`"non-empty take from an empty array."`
			`)`

			`if allow_fill:`
			`if fill_value is None:`
			`fill_value = self.dtype.na_value`
			`# bounds check`
			`if (indexer < -1).any():`
			`raise ValueError`
			`try:`
			`output = [`
			`self.data[loc] if loc != -1 else fill_value for loc in indexer`
			`]`
			`except IndexError as err:`
			`raise IndexError(msg) from err`
			`else:`
			`try:`
			`output = [self.data[loc] for loc in indexer]`
			`except IndexError as err:`
			`raise IndexError(msg) from err`

			`return self._from_sequence(output)`

			`def copy(self):`
			`return type(self)(self.data[:])`

			`def astype(self, dtype, copy=True):`
			`# NumPy has issues when all the dicts are the same length.`
			`# np.array([UserDict(...), UserDict(...)]) fails,`
			`# but np.array([{...}, {...}]) works, so cast.`
			`from pandas.core.arrays.string_ import StringDtype`

			`dtype = pandas_dtype(dtype)`
			`# needed to add this check for the Series constructor`
			`if isinstance(dtype, type(self.dtype)) and dtype == self.dtype:`
			`if copy:`
			`return self.copy()`
			`return self`
			`elif isinstance(dtype, StringDtype):`
			`value = self.astype(str) # numpy doesn'y like nested dicts`
			`return dtype.construct_array_type()._from_sequence(value, copy=False)`

			`return np.array([dict(x) for x in self], dtype=dtype, copy=copy)`

			`def unique(self):`
			`# Parent method doesn't work since np.array will try to infer`
			`# a 2-dim object.`
			`return type(self)([dict(x) for x in {tuple(d.items()) for d in self.data}])`

			`@classmethod`
			`def _concat_same_type(cls, to_concat):`
			`data = list(itertools.chain.from_iterable(x.data for x in to_concat))`
			`return cls(data)`

			`def _values_for_factorize(self):`
			`frozen = self._values_for_argsort()`
			`if len(frozen) == 0:`
			`# factorize_array expects 1-d array, this is a len-0 2-d array.`
			`frozen = frozen.ravel()`
			`return frozen, ()`

			`def _values_for_argsort(self):`
			`# Bypass NumPy's shape inference to get a (N,) array of tuples.`
			`frozen = [tuple(x.items()) for x in self]`
			`return construct_1d_object_array_from_listlike(frozen)`


			`def make_data():`
			`# TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer`
			`return [`
			`UserDict(`
			`[`
			`(random.choice(string.ascii_letters), random.randint(0, 100))`
			`for _ in range(random.randint(0, 10))`
			`]`
			`)`
			`for _ in range(100)`
			`]`