ai-content-maker/.venv/Lib/site-packages/numba/cpython/charseq.py

1021 lines
34 KiB
Python
Raw Permalink Normal View History

2024-05-03 04:18:51 +03:00
"""Implements operations on bytes and str (unicode) array items."""
import operator
import numpy as np
from llvmlite import ir
from numba.core import types, cgutils
from numba.core.extending import (overload, intrinsic, overload_method,
lower_cast, register_jitable)
from numba.core.cgutils import is_nonelike
from numba.cpython import unicode
# bytes and str arrays items are of type CharSeq and UnicodeCharSeq,
# respectively. See numpy/types/npytypes.py for CharSeq,
# UnicodeCharSeq definitions. The corresponding data models are
# defined in numpy/datamodel/models.py. Boxing/unboxing of item types
# are defined in numpy/targets/boxing.py, see box_unicodecharseq,
# unbox_unicodecharseq, box_charseq, unbox_charseq.
s1_dtype = np.dtype('S1')
assert s1_dtype.itemsize == 1
bytes_type = types.Bytes(types.uint8, 1, "C", readonly=True)
# Currently, NumPy supports only UTF-32 arrays but this may change in
# future and the approach used here for supporting str arrays may need
# a revision depending on how NumPy will support UTF-8 and UTF-16
# arrays.
u1_dtype = np.dtype('U1')
unicode_byte_width = u1_dtype.itemsize
unicode_uint = {1: np.uint8, 2: np.uint16, 4: np.uint32}[unicode_byte_width]
unicode_kind = {1: unicode.PY_UNICODE_1BYTE_KIND,
2: unicode.PY_UNICODE_2BYTE_KIND,
4: unicode.PY_UNICODE_4BYTE_KIND}[unicode_byte_width]
# this is modified version of numba.unicode.make_deref_codegen
def make_deref_codegen(bitsize):
def codegen(context, builder, signature, args):
data, idx = args
rawptr = cgutils.alloca_once_value(builder, value=data)
ptr = builder.bitcast(rawptr, ir.IntType(bitsize).as_pointer())
ch = builder.load(builder.gep(ptr, [idx]))
return builder.zext(ch, ir.IntType(32))
return codegen
@intrinsic
def deref_uint8(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(8)
@intrinsic
def deref_uint16(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(16)
@intrinsic
def deref_uint32(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(32)
@register_jitable(_nrt=False)
def charseq_get_code(a, i):
"""Access i-th item of CharSeq object via code value
"""
return deref_uint8(a, i)
@register_jitable
def charseq_get_value(a, i):
"""Access i-th item of CharSeq object via code value.
null code is interpreted as IndexError
"""
code = charseq_get_code(a, i)
if code == 0:
raise IndexError('index out of range')
return code
@register_jitable(_nrt=False)
def unicode_charseq_get_code(a, i):
"""Access i-th item of UnicodeCharSeq object via code value
"""
if unicode_byte_width == 4:
return deref_uint32(a, i)
elif unicode_byte_width == 2:
return deref_uint16(a, i)
elif unicode_byte_width == 1:
return deref_uint8(a, i)
else:
raise NotImplementedError(
'unicode_charseq_get_code: unicode_byte_width not in [1, 2, 4]')
@register_jitable
def unicode_get_code(a, i):
"""Access i-th item of UnicodeType object.
"""
return unicode._get_code_point(a, i)
@register_jitable
def bytes_get_code(a, i):
"""Access i-th item of Bytes object.
"""
return a[i]
def _get_code_impl(a):
if isinstance(a, types.CharSeq):
return charseq_get_code
elif isinstance(a, types.Bytes):
return bytes_get_code
elif isinstance(a, types.UnicodeCharSeq):
return unicode_charseq_get_code
elif isinstance(a, types.UnicodeType):
return unicode_get_code
def _same_kind(a, b):
for t in [(types.CharSeq, types.Bytes),
(types.UnicodeCharSeq, types.UnicodeType)]:
if isinstance(a, t) and isinstance(b, t):
return True
return False
def _is_bytes(a):
return isinstance(a, (types.CharSeq, types.Bytes))
def is_default(x, default):
return x == default or isinstance(x, types.Omitted)
@register_jitable
def unicode_charseq_get_value(a, i):
"""Access i-th item of UnicodeCharSeq object via unicode value
null code is interpreted as IndexError
"""
code = unicode_charseq_get_code(a, i)
if code == 0:
raise IndexError('index out of range')
# Return numpy equivalent of `chr(code)`
return np.array(code, unicode_uint).view(u1_dtype)[()]
#
# CAST
#
# Currently, the following casting operations are supported:
# Bytes -> CharSeq (ex: a=np.array(b'abc'); a[()] = b'123')
# UnicodeType -> UnicodeCharSeq (ex: a=np.array('abc'); a[()] = '123')
# CharSeq -> Bytes (ex: a=np.array(b'abc'); b = bytes(a[()]))
# UnicodeType -> Bytes (ex: str('123')._to_bytes())
#
# The following casting operations can be implemented when required:
# Bytes -> UnicodeCharSeq (ex: a=np.array('abc'); a[()] = b'123')
# UnicodeType -> CharSeq (ex: a=np.array(b'abc'); a[()] = '123')
# UnicodeType -> Bytes (ex: bytes('123', 'utf8'))
#
@lower_cast(types.Bytes, types.CharSeq)
def bytes_to_charseq(context, builder, fromty, toty, val):
barr = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src = builder.bitcast(barr.data, ir.IntType(8).as_pointer())
src_length = barr.nitems
lty = context.get_value_type(toty)
dstint_t = ir.IntType(8)
dst_ptr = cgutils.alloca_once(builder, lty)
dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())
dst_length = ir.Constant(src_length.type, toty.count)
is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length)
count = builder.select(is_shorter_value, src_length, dst_length)
with builder.if_then(is_shorter_value):
cgutils.memset(builder,
dst,
ir.Constant(src_length.type,
toty.count), 0)
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
return builder.load(dst_ptr)
def _make_constant_bytes(context, builder, nbytes):
bstr_ctor = cgutils.create_struct_proxy(bytes_type)
bstr = bstr_ctor(context, builder)
if isinstance(nbytes, int):
nbytes = ir.Constant(bstr.nitems.type, nbytes)
bstr.meminfo = context.nrt.meminfo_alloc(builder, nbytes)
bstr.nitems = nbytes
bstr.itemsize = ir.Constant(bstr.itemsize.type, 1)
bstr.data = context.nrt.meminfo_data(builder, bstr.meminfo)
bstr.parent = cgutils.get_null_value(bstr.parent.type)
# bstr.shape and bstr.strides are not used
bstr.shape = cgutils.get_null_value(bstr.shape.type)
bstr.strides = cgutils.get_null_value(bstr.strides.type)
return bstr
@lower_cast(types.CharSeq, types.Bytes)
def charseq_to_bytes(context, builder, fromty, toty, val):
bstr = _make_constant_bytes(context, builder, val.type.count)
rawptr = cgutils.alloca_once_value(builder, value=val)
ptr = builder.bitcast(rawptr, bstr.data.type)
cgutils.memcpy(builder, bstr.data, ptr, bstr.nitems)
return bstr
@lower_cast(types.UnicodeType, types.Bytes)
def unicode_to_bytes_cast(context, builder, fromty, toty, val):
uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer())
notkind1 = builder.icmp_unsigned('!=', uni_str.kind,
ir.Constant(uni_str.kind.type, 1))
src_length = uni_str.length
with builder.if_then(notkind1):
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast higher than 8-bit unicode_type to bytes",))
bstr = _make_constant_bytes(context, builder, src_length)
cgutils.memcpy(builder, bstr.data, src1, bstr.nitems)
return bstr
@intrinsic
def _unicode_to_bytes(typingctx, s):
# used in _to_bytes method
assert s == types.unicode_type
sig = bytes_type(s)
def codegen(context, builder, signature, args):
return unicode_to_bytes_cast(
context, builder, s, bytes_type, args[0])._getvalue()
return sig, codegen
@lower_cast(types.UnicodeType, types.UnicodeCharSeq)
def unicode_to_unicode_charseq(context, builder, fromty, toty, val):
uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer())
src2 = builder.bitcast(uni_str.data, ir.IntType(16).as_pointer())
src4 = builder.bitcast(uni_str.data, ir.IntType(32).as_pointer())
kind1 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 1))
kind2 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 2))
kind4 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 4))
src_length = uni_str.length
lty = context.get_value_type(toty)
dstint_t = ir.IntType(8 * unicode_byte_width)
dst_ptr = cgutils.alloca_once(builder, lty)
dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())
dst_length = ir.Constant(src_length.type, toty.count)
is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length)
count = builder.select(is_shorter_value, src_length, dst_length)
with builder.if_then(is_shorter_value):
cgutils.memset(builder,
dst,
ir.Constant(src_length.type,
toty.count * unicode_byte_width), 0)
with builder.if_then(kind1):
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src1, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
with builder.if_then(kind2):
if unicode_byte_width >= 2:
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src2, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
else:
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast 16-bit unicode_type to %s-bit %s"
% (unicode_byte_width * 8, toty)))
with builder.if_then(kind4):
if unicode_byte_width >= 4:
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src4, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
else:
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast 32-bit unicode_type to %s-bit %s"
% (unicode_byte_width * 8, toty)))
return builder.load(dst_ptr)
#
# Operations on bytes/str array items
#
# Implementation note: while some operations need
# CharSeq/UnicodeCharSeq specific implementations (getitem, len, str,
# etc), many operations can be supported by casting
# CharSeq/UnicodeCharSeq objects to Bytes/UnicodeType objects and
# re-use existing operations.
#
# However, in numba more operations are implemented for UnicodeType
# than for Bytes objects, hence the support for operations with bytes
# array items will be less complete than for str arrays. Although, in
# some cases (hash, contains, etc) the UnicodeType implementations can
# be reused for Bytes objects via using `_to_str` method.
#
@overload(operator.getitem)
def charseq_getitem(s, i):
get_value = None
if isinstance(i, types.Integer):
if isinstance(s, types.CharSeq):
get_value = charseq_get_value
if isinstance(s, types.UnicodeCharSeq):
get_value = unicode_charseq_get_value
if get_value is not None:
max_i = s.count
msg = 'index out of range [0, %s]' % (max_i - 1)
def getitem_impl(s, i):
if i < max_i and i >= 0:
return get_value(s, i)
raise IndexError(msg)
return getitem_impl
@overload(len)
def charseq_len(s):
if isinstance(s, (types.CharSeq, types.UnicodeCharSeq)):
get_code = _get_code_impl(s)
n = s.count
if n == 0:
def len_impl(s):
return 0
return len_impl
else:
def len_impl(s):
# return the index of the last non-null value (numpy
# behavior)
i = n
code = 0
while code == 0:
i = i - 1
if i < 0:
break
code = get_code(s, i)
return i + 1
return len_impl
@overload(operator.add)
@overload(operator.iadd)
def charseq_concat(a, b):
if not _same_kind(a, b):
return
if (isinstance(a, types.UnicodeCharSeq) and
isinstance(b, types.UnicodeType)):
def impl(a, b):
return str(a) + b
return impl
if (isinstance(b, types.UnicodeCharSeq) and
isinstance(a, types.UnicodeType)):
def impl(a, b):
return a + str(b)
return impl
if (isinstance(a, types.UnicodeCharSeq) and
isinstance(b, types.UnicodeCharSeq)):
def impl(a, b):
return str(a) + str(b)
return impl
if (isinstance(a, (types.CharSeq, types.Bytes)) and
isinstance(b, (types.CharSeq, types.Bytes))):
def impl(a, b):
return (a._to_str() + b._to_str())._to_bytes()
return impl
@overload(operator.mul)
def charseq_repeat(a, b):
if isinstance(a, types.UnicodeCharSeq):
def wrap(a, b):
return str(a) * b
return wrap
if isinstance(b, types.UnicodeCharSeq):
def wrap(a, b):
return a * str(b)
return wrap
if isinstance(a, (types.CharSeq, types.Bytes)):
def wrap(a, b):
return (a._to_str() * b)._to_bytes()
return wrap
if isinstance(b, (types.CharSeq, types.Bytes)):
def wrap(a, b):
return (a * b._to_str())._to_bytes()
return wrap
@overload(operator.not_)
def charseq_not(a):
if isinstance(a, (types.UnicodeCharSeq, types.CharSeq, types.Bytes)):
def impl(a):
return len(a) == 0
return impl
@overload(operator.eq)
def charseq_eq(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def eq_impl(a, b):
n = len(a)
if n != len(b):
return False
for i in range(n):
if left_code(a, i) != right_code(b, i):
return False
return True
return eq_impl
@overload(operator.ne)
def charseq_ne(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def ne_impl(a, b):
return not (a == b)
return ne_impl
@overload(operator.lt)
def charseq_lt(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def lt_impl(a, b):
na = len(a)
nb = len(b)
n = min(na, nb)
for i in range(n):
ca, cb = left_code(a, i), right_code(b, i)
if ca != cb:
return ca < cb
return na < nb
return lt_impl
@overload(operator.gt)
def charseq_gt(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def gt_impl(a, b):
return b < a
return gt_impl
@overload(operator.le)
def charseq_le(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def le_impl(a, b):
return not (a > b)
return le_impl
@overload(operator.ge)
def charseq_ge(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def ge_impl(a, b):
return not (a < b)
return ge_impl
@overload(operator.contains)
def charseq_contains(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
if _is_bytes(a):
def contains_impl(a, b):
# Ideally, `return bytes(b) in bytes(a)` would be used
# here, but numba Bytes does not implement
# contains. So, using `unicode_type` implementation
# here:
return b._to_str() in a._to_str()
else:
def contains_impl(a, b):
return str(b) in str(a)
return contains_impl
@overload_method(types.UnicodeCharSeq, 'isascii')
@overload_method(types.CharSeq, 'isascii')
@overload_method(types.Bytes, 'isascii')
def charseq_isascii(s):
get_code = _get_code_impl(s)
def impl(s):
for i in range(len(s)):
if get_code(s, i) > 127:
return False
return True
return impl
@overload_method(types.UnicodeCharSeq, '_get_kind')
@overload_method(types.CharSeq, '_get_kind')
def charseq_get_kind(s):
get_code = _get_code_impl(s)
def impl(s):
max_code = 0
for i in range(len(s)):
code = get_code(s, i)
if code > max_code:
max_code = code
if max_code > 0xffff:
return unicode.PY_UNICODE_4BYTE_KIND
if max_code > 0xff:
return unicode.PY_UNICODE_2BYTE_KIND
return unicode.PY_UNICODE_1BYTE_KIND
return impl
@overload_method(types.UnicodeType, '_to_bytes')
def unicode_to_bytes_mth(s):
"""Convert unicode_type object to Bytes object.
Note: The usage of _to_bytes method can be eliminated once all
Python bytes operations are implemented for numba Bytes objects.
"""
def impl(s):
return _unicode_to_bytes(s)
return impl
@overload_method(types.CharSeq, '_to_str')
@overload_method(types.Bytes, '_to_str')
def charseq_to_str_mth(s):
"""Convert bytes array item or bytes instance to UTF-8 str.
Note: The usage of _to_str method can be eliminated once all
Python bytes operations are implemented for numba Bytes objects.
"""
get_code = _get_code_impl(s)
def tostr_impl(s):
n = len(s)
is_ascii = s.isascii()
result = unicode._empty_string(
unicode.PY_UNICODE_1BYTE_KIND, n, is_ascii)
for i in range(n):
code = get_code(s, i)
unicode._set_code_point(result, i, code)
return result
return tostr_impl
@overload_method(types.UnicodeCharSeq, "__str__")
def charseq_str(s):
get_code = _get_code_impl(s)
def str_impl(s):
n = len(s)
kind = s._get_kind()
is_ascii = kind == 1 and s.isascii()
result = unicode._empty_string(kind, n, is_ascii)
for i in range(n):
code = get_code(s, i)
unicode._set_code_point(result, i, code)
return result
return str_impl
@overload(bytes)
def charseq_bytes(s):
if isinstance(s, types.CharSeq):
return lambda s: s
@overload_method(types.UnicodeCharSeq, '__hash__')
def unicode_charseq_hash(s):
def impl(s):
return hash(str(s))
return impl
@overload_method(types.CharSeq, '__hash__')
def charseq_hash(s):
def impl(s):
# Ideally, `return hash(bytes(s))` would be used here but
# numba Bytes does not implement hash (yet). However, for a
# UTF-8 string `s`, we have hash(bytes(s)) == hash(s), hence,
# we can convert CharSeq object to unicode_type and reuse its
# hash implementation:
return hash(s._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'isupper')
def unicode_charseq_isupper(s):
def impl(s):
# workaround unicode_type.isupper bug: it returns int value
return not not str(s).isupper()
return impl
@overload_method(types.CharSeq, 'isupper')
def charseq_isupper(s):
def impl(s):
# return bytes(s).isupper() # TODO: implement isupper for Bytes
return not not s._to_str().isupper()
return impl
@overload_method(types.UnicodeCharSeq, 'upper')
def unicode_charseq_upper(s):
def impl(s):
return str(s).upper()
return impl
@overload_method(types.CharSeq, 'upper')
def charseq_upper(s):
def impl(s):
# return bytes(s).upper() # TODO: implement upper for Bytes
return s._to_str().upper()._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'find')
@overload_method(types.CharSeq, 'find')
@overload_method(types.Bytes, 'find')
def unicode_charseq_find(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).find(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).find(b)
return impl
if isinstance(a, types.CharSeq):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().find(b._to_str())
return impl
if isinstance(a, types.UnicodeType):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return a.find(str(b))
return impl
if isinstance(a, types.Bytes):
if isinstance(b, types.CharSeq):
def impl(a, b):
return a._to_str().find(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'rfind')
@overload_method(types.CharSeq, 'rfind')
@overload_method(types.Bytes, 'rfind')
def unicode_charseq_rfind(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).rfind(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).rfind(b)
return impl
if isinstance(a, types.CharSeq):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().rfind(b._to_str())
return impl
if isinstance(a, types.UnicodeType):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return a.rfind(str(b))
return impl
if isinstance(a, types.Bytes):
if isinstance(b, types.CharSeq):
def impl(a, b):
return a._to_str().rfind(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'startswith')
@overload_method(types.CharSeq, 'startswith')
@overload_method(types.Bytes, 'startswith')
def unicode_charseq_startswith(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).startswith(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).startswith(b)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().startswith(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'endswith')
@overload_method(types.CharSeq, 'endswith')
@overload_method(types.Bytes, 'endswith')
def unicode_charseq_endswith(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).endswith(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).endswith(b)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().endswith(b._to_str())
return impl
@register_jitable
def _map_bytes(seq):
return [s._to_bytes() for s in seq]
@overload_method(types.UnicodeCharSeq, 'split')
@overload_method(types.CharSeq, 'split')
@overload_method(types.Bytes, 'split')
def unicode_charseq_split(a, sep=None, maxsplit=-1):
if not (maxsplit == -1 or
isinstance(maxsplit, (types.Omitted, types.Integer,
types.IntegerLiteral))):
return None
if isinstance(a, types.UnicodeCharSeq):
if isinstance(sep, types.UnicodeCharSeq):
def impl(a, sep=None, maxsplit=-1):
return str(a).split(sep=str(sep), maxsplit=maxsplit)
return impl
if isinstance(sep, types.UnicodeType):
def impl(a, sep=None, maxsplit=-1):
return str(a).split(sep=sep, maxsplit=maxsplit)
return impl
if is_nonelike(sep):
if is_default(maxsplit, -1):
def impl(a, sep=None, maxsplit=-1):
return str(a).split()
else:
def impl(a, sep=None, maxsplit=-1):
return str(a).split(maxsplit=maxsplit)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(sep, (types.CharSeq, types.Bytes)):
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split(sep._to_str(),
maxsplit=maxsplit))
return impl
if is_nonelike(sep):
if is_default(maxsplit, -1):
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split())
else:
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split(maxsplit=maxsplit))
return impl
# NOT IMPLEMENTED: rsplit
@overload_method(types.UnicodeCharSeq, 'ljust')
@overload_method(types.CharSeq, 'ljust')
@overload_method(types.Bytes, 'ljust')
def unicode_charseq_ljust(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).ljust(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).ljust(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).ljust(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().ljust(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().ljust(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'rjust')
@overload_method(types.CharSeq, 'rjust')
@overload_method(types.Bytes, 'rjust')
def unicode_charseq_rjust(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).rjust(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).rjust(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).rjust(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().rjust(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().rjust(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'center')
@overload_method(types.CharSeq, 'center')
@overload_method(types.Bytes, 'center')
def unicode_charseq_center(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).center(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).center(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).center(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().center(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().center(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'zfill')
@overload_method(types.CharSeq, 'zfill')
@overload_method(types.Bytes, 'zfill')
def unicode_charseq_zfill(a, width):
if isinstance(a, types.UnicodeCharSeq):
def impl(a, width):
return str(a).zfill(width)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
def impl(a, width):
return a._to_str().zfill(width)._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'lstrip')
@overload_method(types.CharSeq, 'lstrip')
@overload_method(types.Bytes, 'lstrip')
def unicode_charseq_lstrip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).lstrip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).lstrip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).lstrip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().lstrip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().lstrip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'rstrip')
@overload_method(types.CharSeq, 'rstrip')
@overload_method(types.Bytes, 'rstrip')
def unicode_charseq_rstrip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).rstrip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).rstrip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).rstrip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().rstrip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().rstrip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'strip')
@overload_method(types.CharSeq, 'strip')
@overload_method(types.Bytes, 'strip')
def unicode_charseq_strip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).strip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).strip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).strip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().strip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().strip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'join')
@overload_method(types.CharSeq, 'join')
@overload_method(types.Bytes, 'join')
def unicode_charseq_join(a, parts):
if isinstance(a, types.UnicodeCharSeq):
# assuming parts contains UnicodeCharSeq or UnicodeType objects
def impl(a, parts):
_parts = [str(p) for p in parts]
return str(a).join(_parts)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
# assuming parts contains CharSeq or Bytes objects
def impl(a, parts):
_parts = [p._to_str() for p in parts]
return a._to_str().join(_parts)._to_bytes()
return impl