ai-content-maker/.venv/Lib/site-packages/numba/cpython/charseq.py

1021 lines
34 KiB
Python

"""Implements operations on bytes and str (unicode) array items."""
import operator
import numpy as np
from llvmlite import ir
from numba.core import types, cgutils
from numba.core.extending import (overload, intrinsic, overload_method,
lower_cast, register_jitable)
from numba.core.cgutils import is_nonelike
from numba.cpython import unicode
# bytes and str arrays items are of type CharSeq and UnicodeCharSeq,
# respectively. See numpy/types/npytypes.py for CharSeq,
# UnicodeCharSeq definitions. The corresponding data models are
# defined in numpy/datamodel/models.py. Boxing/unboxing of item types
# are defined in numpy/targets/boxing.py, see box_unicodecharseq,
# unbox_unicodecharseq, box_charseq, unbox_charseq.
s1_dtype = np.dtype('S1')
assert s1_dtype.itemsize == 1
bytes_type = types.Bytes(types.uint8, 1, "C", readonly=True)
# Currently, NumPy supports only UTF-32 arrays but this may change in
# future and the approach used here for supporting str arrays may need
# a revision depending on how NumPy will support UTF-8 and UTF-16
# arrays.
u1_dtype = np.dtype('U1')
unicode_byte_width = u1_dtype.itemsize
unicode_uint = {1: np.uint8, 2: np.uint16, 4: np.uint32}[unicode_byte_width]
unicode_kind = {1: unicode.PY_UNICODE_1BYTE_KIND,
2: unicode.PY_UNICODE_2BYTE_KIND,
4: unicode.PY_UNICODE_4BYTE_KIND}[unicode_byte_width]
# this is modified version of numba.unicode.make_deref_codegen
def make_deref_codegen(bitsize):
def codegen(context, builder, signature, args):
data, idx = args
rawptr = cgutils.alloca_once_value(builder, value=data)
ptr = builder.bitcast(rawptr, ir.IntType(bitsize).as_pointer())
ch = builder.load(builder.gep(ptr, [idx]))
return builder.zext(ch, ir.IntType(32))
return codegen
@intrinsic
def deref_uint8(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(8)
@intrinsic
def deref_uint16(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(16)
@intrinsic
def deref_uint32(typingctx, data, offset):
sig = types.uint32(data, types.intp)
return sig, make_deref_codegen(32)
@register_jitable(_nrt=False)
def charseq_get_code(a, i):
"""Access i-th item of CharSeq object via code value
"""
return deref_uint8(a, i)
@register_jitable
def charseq_get_value(a, i):
"""Access i-th item of CharSeq object via code value.
null code is interpreted as IndexError
"""
code = charseq_get_code(a, i)
if code == 0:
raise IndexError('index out of range')
return code
@register_jitable(_nrt=False)
def unicode_charseq_get_code(a, i):
"""Access i-th item of UnicodeCharSeq object via code value
"""
if unicode_byte_width == 4:
return deref_uint32(a, i)
elif unicode_byte_width == 2:
return deref_uint16(a, i)
elif unicode_byte_width == 1:
return deref_uint8(a, i)
else:
raise NotImplementedError(
'unicode_charseq_get_code: unicode_byte_width not in [1, 2, 4]')
@register_jitable
def unicode_get_code(a, i):
"""Access i-th item of UnicodeType object.
"""
return unicode._get_code_point(a, i)
@register_jitable
def bytes_get_code(a, i):
"""Access i-th item of Bytes object.
"""
return a[i]
def _get_code_impl(a):
if isinstance(a, types.CharSeq):
return charseq_get_code
elif isinstance(a, types.Bytes):
return bytes_get_code
elif isinstance(a, types.UnicodeCharSeq):
return unicode_charseq_get_code
elif isinstance(a, types.UnicodeType):
return unicode_get_code
def _same_kind(a, b):
for t in [(types.CharSeq, types.Bytes),
(types.UnicodeCharSeq, types.UnicodeType)]:
if isinstance(a, t) and isinstance(b, t):
return True
return False
def _is_bytes(a):
return isinstance(a, (types.CharSeq, types.Bytes))
def is_default(x, default):
return x == default or isinstance(x, types.Omitted)
@register_jitable
def unicode_charseq_get_value(a, i):
"""Access i-th item of UnicodeCharSeq object via unicode value
null code is interpreted as IndexError
"""
code = unicode_charseq_get_code(a, i)
if code == 0:
raise IndexError('index out of range')
# Return numpy equivalent of `chr(code)`
return np.array(code, unicode_uint).view(u1_dtype)[()]
#
# CAST
#
# Currently, the following casting operations are supported:
# Bytes -> CharSeq (ex: a=np.array(b'abc'); a[()] = b'123')
# UnicodeType -> UnicodeCharSeq (ex: a=np.array('abc'); a[()] = '123')
# CharSeq -> Bytes (ex: a=np.array(b'abc'); b = bytes(a[()]))
# UnicodeType -> Bytes (ex: str('123')._to_bytes())
#
# The following casting operations can be implemented when required:
# Bytes -> UnicodeCharSeq (ex: a=np.array('abc'); a[()] = b'123')
# UnicodeType -> CharSeq (ex: a=np.array(b'abc'); a[()] = '123')
# UnicodeType -> Bytes (ex: bytes('123', 'utf8'))
#
@lower_cast(types.Bytes, types.CharSeq)
def bytes_to_charseq(context, builder, fromty, toty, val):
barr = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src = builder.bitcast(barr.data, ir.IntType(8).as_pointer())
src_length = barr.nitems
lty = context.get_value_type(toty)
dstint_t = ir.IntType(8)
dst_ptr = cgutils.alloca_once(builder, lty)
dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())
dst_length = ir.Constant(src_length.type, toty.count)
is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length)
count = builder.select(is_shorter_value, src_length, dst_length)
with builder.if_then(is_shorter_value):
cgutils.memset(builder,
dst,
ir.Constant(src_length.type,
toty.count), 0)
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
return builder.load(dst_ptr)
def _make_constant_bytes(context, builder, nbytes):
bstr_ctor = cgutils.create_struct_proxy(bytes_type)
bstr = bstr_ctor(context, builder)
if isinstance(nbytes, int):
nbytes = ir.Constant(bstr.nitems.type, nbytes)
bstr.meminfo = context.nrt.meminfo_alloc(builder, nbytes)
bstr.nitems = nbytes
bstr.itemsize = ir.Constant(bstr.itemsize.type, 1)
bstr.data = context.nrt.meminfo_data(builder, bstr.meminfo)
bstr.parent = cgutils.get_null_value(bstr.parent.type)
# bstr.shape and bstr.strides are not used
bstr.shape = cgutils.get_null_value(bstr.shape.type)
bstr.strides = cgutils.get_null_value(bstr.strides.type)
return bstr
@lower_cast(types.CharSeq, types.Bytes)
def charseq_to_bytes(context, builder, fromty, toty, val):
bstr = _make_constant_bytes(context, builder, val.type.count)
rawptr = cgutils.alloca_once_value(builder, value=val)
ptr = builder.bitcast(rawptr, bstr.data.type)
cgutils.memcpy(builder, bstr.data, ptr, bstr.nitems)
return bstr
@lower_cast(types.UnicodeType, types.Bytes)
def unicode_to_bytes_cast(context, builder, fromty, toty, val):
uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer())
notkind1 = builder.icmp_unsigned('!=', uni_str.kind,
ir.Constant(uni_str.kind.type, 1))
src_length = uni_str.length
with builder.if_then(notkind1):
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast higher than 8-bit unicode_type to bytes",))
bstr = _make_constant_bytes(context, builder, src_length)
cgutils.memcpy(builder, bstr.data, src1, bstr.nitems)
return bstr
@intrinsic
def _unicode_to_bytes(typingctx, s):
# used in _to_bytes method
assert s == types.unicode_type
sig = bytes_type(s)
def codegen(context, builder, signature, args):
return unicode_to_bytes_cast(
context, builder, s, bytes_type, args[0])._getvalue()
return sig, codegen
@lower_cast(types.UnicodeType, types.UnicodeCharSeq)
def unicode_to_unicode_charseq(context, builder, fromty, toty, val):
uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer())
src2 = builder.bitcast(uni_str.data, ir.IntType(16).as_pointer())
src4 = builder.bitcast(uni_str.data, ir.IntType(32).as_pointer())
kind1 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 1))
kind2 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 2))
kind4 = builder.icmp_unsigned('==', uni_str.kind,
ir.Constant(uni_str.kind.type, 4))
src_length = uni_str.length
lty = context.get_value_type(toty)
dstint_t = ir.IntType(8 * unicode_byte_width)
dst_ptr = cgutils.alloca_once(builder, lty)
dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())
dst_length = ir.Constant(src_length.type, toty.count)
is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length)
count = builder.select(is_shorter_value, src_length, dst_length)
with builder.if_then(is_shorter_value):
cgutils.memset(builder,
dst,
ir.Constant(src_length.type,
toty.count * unicode_byte_width), 0)
with builder.if_then(kind1):
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src1, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
with builder.if_then(kind2):
if unicode_byte_width >= 2:
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src2, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
else:
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast 16-bit unicode_type to %s-bit %s"
% (unicode_byte_width * 8, toty)))
with builder.if_then(kind4):
if unicode_byte_width >= 4:
with cgutils.for_range(builder, count) as loop:
in_ptr = builder.gep(src4, [loop.index])
in_val = builder.zext(builder.load(in_ptr), dstint_t)
builder.store(in_val, builder.gep(dst, [loop.index]))
else:
context.call_conv.return_user_exc(
builder, ValueError,
("cannot cast 32-bit unicode_type to %s-bit %s"
% (unicode_byte_width * 8, toty)))
return builder.load(dst_ptr)
#
# Operations on bytes/str array items
#
# Implementation note: while some operations need
# CharSeq/UnicodeCharSeq specific implementations (getitem, len, str,
# etc), many operations can be supported by casting
# CharSeq/UnicodeCharSeq objects to Bytes/UnicodeType objects and
# re-use existing operations.
#
# However, in numba more operations are implemented for UnicodeType
# than for Bytes objects, hence the support for operations with bytes
# array items will be less complete than for str arrays. Although, in
# some cases (hash, contains, etc) the UnicodeType implementations can
# be reused for Bytes objects via using `_to_str` method.
#
@overload(operator.getitem)
def charseq_getitem(s, i):
get_value = None
if isinstance(i, types.Integer):
if isinstance(s, types.CharSeq):
get_value = charseq_get_value
if isinstance(s, types.UnicodeCharSeq):
get_value = unicode_charseq_get_value
if get_value is not None:
max_i = s.count
msg = 'index out of range [0, %s]' % (max_i - 1)
def getitem_impl(s, i):
if i < max_i and i >= 0:
return get_value(s, i)
raise IndexError(msg)
return getitem_impl
@overload(len)
def charseq_len(s):
if isinstance(s, (types.CharSeq, types.UnicodeCharSeq)):
get_code = _get_code_impl(s)
n = s.count
if n == 0:
def len_impl(s):
return 0
return len_impl
else:
def len_impl(s):
# return the index of the last non-null value (numpy
# behavior)
i = n
code = 0
while code == 0:
i = i - 1
if i < 0:
break
code = get_code(s, i)
return i + 1
return len_impl
@overload(operator.add)
@overload(operator.iadd)
def charseq_concat(a, b):
if not _same_kind(a, b):
return
if (isinstance(a, types.UnicodeCharSeq) and
isinstance(b, types.UnicodeType)):
def impl(a, b):
return str(a) + b
return impl
if (isinstance(b, types.UnicodeCharSeq) and
isinstance(a, types.UnicodeType)):
def impl(a, b):
return a + str(b)
return impl
if (isinstance(a, types.UnicodeCharSeq) and
isinstance(b, types.UnicodeCharSeq)):
def impl(a, b):
return str(a) + str(b)
return impl
if (isinstance(a, (types.CharSeq, types.Bytes)) and
isinstance(b, (types.CharSeq, types.Bytes))):
def impl(a, b):
return (a._to_str() + b._to_str())._to_bytes()
return impl
@overload(operator.mul)
def charseq_repeat(a, b):
if isinstance(a, types.UnicodeCharSeq):
def wrap(a, b):
return str(a) * b
return wrap
if isinstance(b, types.UnicodeCharSeq):
def wrap(a, b):
return a * str(b)
return wrap
if isinstance(a, (types.CharSeq, types.Bytes)):
def wrap(a, b):
return (a._to_str() * b)._to_bytes()
return wrap
if isinstance(b, (types.CharSeq, types.Bytes)):
def wrap(a, b):
return (a * b._to_str())._to_bytes()
return wrap
@overload(operator.not_)
def charseq_not(a):
if isinstance(a, (types.UnicodeCharSeq, types.CharSeq, types.Bytes)):
def impl(a):
return len(a) == 0
return impl
@overload(operator.eq)
def charseq_eq(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def eq_impl(a, b):
n = len(a)
if n != len(b):
return False
for i in range(n):
if left_code(a, i) != right_code(b, i):
return False
return True
return eq_impl
@overload(operator.ne)
def charseq_ne(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def ne_impl(a, b):
return not (a == b)
return ne_impl
@overload(operator.lt)
def charseq_lt(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def lt_impl(a, b):
na = len(a)
nb = len(b)
n = min(na, nb)
for i in range(n):
ca, cb = left_code(a, i), right_code(b, i)
if ca != cb:
return ca < cb
return na < nb
return lt_impl
@overload(operator.gt)
def charseq_gt(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def gt_impl(a, b):
return b < a
return gt_impl
@overload(operator.le)
def charseq_le(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def le_impl(a, b):
return not (a > b)
return le_impl
@overload(operator.ge)
def charseq_ge(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
def ge_impl(a, b):
return not (a < b)
return ge_impl
@overload(operator.contains)
def charseq_contains(a, b):
if not _same_kind(a, b):
return
left_code = _get_code_impl(a)
right_code = _get_code_impl(b)
if left_code is not None and right_code is not None:
if _is_bytes(a):
def contains_impl(a, b):
# Ideally, `return bytes(b) in bytes(a)` would be used
# here, but numba Bytes does not implement
# contains. So, using `unicode_type` implementation
# here:
return b._to_str() in a._to_str()
else:
def contains_impl(a, b):
return str(b) in str(a)
return contains_impl
@overload_method(types.UnicodeCharSeq, 'isascii')
@overload_method(types.CharSeq, 'isascii')
@overload_method(types.Bytes, 'isascii')
def charseq_isascii(s):
get_code = _get_code_impl(s)
def impl(s):
for i in range(len(s)):
if get_code(s, i) > 127:
return False
return True
return impl
@overload_method(types.UnicodeCharSeq, '_get_kind')
@overload_method(types.CharSeq, '_get_kind')
def charseq_get_kind(s):
get_code = _get_code_impl(s)
def impl(s):
max_code = 0
for i in range(len(s)):
code = get_code(s, i)
if code > max_code:
max_code = code
if max_code > 0xffff:
return unicode.PY_UNICODE_4BYTE_KIND
if max_code > 0xff:
return unicode.PY_UNICODE_2BYTE_KIND
return unicode.PY_UNICODE_1BYTE_KIND
return impl
@overload_method(types.UnicodeType, '_to_bytes')
def unicode_to_bytes_mth(s):
"""Convert unicode_type object to Bytes object.
Note: The usage of _to_bytes method can be eliminated once all
Python bytes operations are implemented for numba Bytes objects.
"""
def impl(s):
return _unicode_to_bytes(s)
return impl
@overload_method(types.CharSeq, '_to_str')
@overload_method(types.Bytes, '_to_str')
def charseq_to_str_mth(s):
"""Convert bytes array item or bytes instance to UTF-8 str.
Note: The usage of _to_str method can be eliminated once all
Python bytes operations are implemented for numba Bytes objects.
"""
get_code = _get_code_impl(s)
def tostr_impl(s):
n = len(s)
is_ascii = s.isascii()
result = unicode._empty_string(
unicode.PY_UNICODE_1BYTE_KIND, n, is_ascii)
for i in range(n):
code = get_code(s, i)
unicode._set_code_point(result, i, code)
return result
return tostr_impl
@overload_method(types.UnicodeCharSeq, "__str__")
def charseq_str(s):
get_code = _get_code_impl(s)
def str_impl(s):
n = len(s)
kind = s._get_kind()
is_ascii = kind == 1 and s.isascii()
result = unicode._empty_string(kind, n, is_ascii)
for i in range(n):
code = get_code(s, i)
unicode._set_code_point(result, i, code)
return result
return str_impl
@overload(bytes)
def charseq_bytes(s):
if isinstance(s, types.CharSeq):
return lambda s: s
@overload_method(types.UnicodeCharSeq, '__hash__')
def unicode_charseq_hash(s):
def impl(s):
return hash(str(s))
return impl
@overload_method(types.CharSeq, '__hash__')
def charseq_hash(s):
def impl(s):
# Ideally, `return hash(bytes(s))` would be used here but
# numba Bytes does not implement hash (yet). However, for a
# UTF-8 string `s`, we have hash(bytes(s)) == hash(s), hence,
# we can convert CharSeq object to unicode_type and reuse its
# hash implementation:
return hash(s._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'isupper')
def unicode_charseq_isupper(s):
def impl(s):
# workaround unicode_type.isupper bug: it returns int value
return not not str(s).isupper()
return impl
@overload_method(types.CharSeq, 'isupper')
def charseq_isupper(s):
def impl(s):
# return bytes(s).isupper() # TODO: implement isupper for Bytes
return not not s._to_str().isupper()
return impl
@overload_method(types.UnicodeCharSeq, 'upper')
def unicode_charseq_upper(s):
def impl(s):
return str(s).upper()
return impl
@overload_method(types.CharSeq, 'upper')
def charseq_upper(s):
def impl(s):
# return bytes(s).upper() # TODO: implement upper for Bytes
return s._to_str().upper()._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'find')
@overload_method(types.CharSeq, 'find')
@overload_method(types.Bytes, 'find')
def unicode_charseq_find(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).find(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).find(b)
return impl
if isinstance(a, types.CharSeq):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().find(b._to_str())
return impl
if isinstance(a, types.UnicodeType):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return a.find(str(b))
return impl
if isinstance(a, types.Bytes):
if isinstance(b, types.CharSeq):
def impl(a, b):
return a._to_str().find(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'rfind')
@overload_method(types.CharSeq, 'rfind')
@overload_method(types.Bytes, 'rfind')
def unicode_charseq_rfind(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).rfind(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).rfind(b)
return impl
if isinstance(a, types.CharSeq):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().rfind(b._to_str())
return impl
if isinstance(a, types.UnicodeType):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return a.rfind(str(b))
return impl
if isinstance(a, types.Bytes):
if isinstance(b, types.CharSeq):
def impl(a, b):
return a._to_str().rfind(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'startswith')
@overload_method(types.CharSeq, 'startswith')
@overload_method(types.Bytes, 'startswith')
def unicode_charseq_startswith(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).startswith(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).startswith(b)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().startswith(b._to_str())
return impl
@overload_method(types.UnicodeCharSeq, 'endswith')
@overload_method(types.CharSeq, 'endswith')
@overload_method(types.Bytes, 'endswith')
def unicode_charseq_endswith(a, b):
if isinstance(a, types.UnicodeCharSeq):
if isinstance(b, types.UnicodeCharSeq):
def impl(a, b):
return str(a).endswith(str(b))
return impl
if isinstance(b, types.UnicodeType):
def impl(a, b):
return str(a).endswith(b)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(b, (types.CharSeq, types.Bytes)):
def impl(a, b):
return a._to_str().endswith(b._to_str())
return impl
@register_jitable
def _map_bytes(seq):
return [s._to_bytes() for s in seq]
@overload_method(types.UnicodeCharSeq, 'split')
@overload_method(types.CharSeq, 'split')
@overload_method(types.Bytes, 'split')
def unicode_charseq_split(a, sep=None, maxsplit=-1):
if not (maxsplit == -1 or
isinstance(maxsplit, (types.Omitted, types.Integer,
types.IntegerLiteral))):
return None
if isinstance(a, types.UnicodeCharSeq):
if isinstance(sep, types.UnicodeCharSeq):
def impl(a, sep=None, maxsplit=-1):
return str(a).split(sep=str(sep), maxsplit=maxsplit)
return impl
if isinstance(sep, types.UnicodeType):
def impl(a, sep=None, maxsplit=-1):
return str(a).split(sep=sep, maxsplit=maxsplit)
return impl
if is_nonelike(sep):
if is_default(maxsplit, -1):
def impl(a, sep=None, maxsplit=-1):
return str(a).split()
else:
def impl(a, sep=None, maxsplit=-1):
return str(a).split(maxsplit=maxsplit)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if isinstance(sep, (types.CharSeq, types.Bytes)):
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split(sep._to_str(),
maxsplit=maxsplit))
return impl
if is_nonelike(sep):
if is_default(maxsplit, -1):
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split())
else:
def impl(a, sep=None, maxsplit=-1):
return _map_bytes(a._to_str().split(maxsplit=maxsplit))
return impl
# NOT IMPLEMENTED: rsplit
@overload_method(types.UnicodeCharSeq, 'ljust')
@overload_method(types.CharSeq, 'ljust')
@overload_method(types.Bytes, 'ljust')
def unicode_charseq_ljust(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).ljust(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).ljust(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).ljust(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().ljust(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().ljust(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'rjust')
@overload_method(types.CharSeq, 'rjust')
@overload_method(types.Bytes, 'rjust')
def unicode_charseq_rjust(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).rjust(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).rjust(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).rjust(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().rjust(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().rjust(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'center')
@overload_method(types.CharSeq, 'center')
@overload_method(types.Bytes, 'center')
def unicode_charseq_center(a, width, fillchar=' '):
if isinstance(a, types.UnicodeCharSeq):
if is_default(fillchar, ' '):
def impl(a, width, fillchar=' '):
return str(a).center(width)
return impl
elif isinstance(fillchar, types.UnicodeCharSeq):
def impl(a, width, fillchar=' '):
return str(a).center(width, str(fillchar))
return impl
elif isinstance(fillchar, types.UnicodeType):
def impl(a, width, fillchar=' '):
return str(a).center(width, fillchar)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_default(fillchar, ' ') or is_default(fillchar, b' '):
def impl(a, width, fillchar=' '):
return a._to_str().center(width)._to_bytes()
return impl
elif isinstance(fillchar, (types.CharSeq, types.Bytes)):
def impl(a, width, fillchar=' '):
return a._to_str().center(width, fillchar._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'zfill')
@overload_method(types.CharSeq, 'zfill')
@overload_method(types.Bytes, 'zfill')
def unicode_charseq_zfill(a, width):
if isinstance(a, types.UnicodeCharSeq):
def impl(a, width):
return str(a).zfill(width)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
def impl(a, width):
return a._to_str().zfill(width)._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'lstrip')
@overload_method(types.CharSeq, 'lstrip')
@overload_method(types.Bytes, 'lstrip')
def unicode_charseq_lstrip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).lstrip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).lstrip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).lstrip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().lstrip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().lstrip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'rstrip')
@overload_method(types.CharSeq, 'rstrip')
@overload_method(types.Bytes, 'rstrip')
def unicode_charseq_rstrip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).rstrip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).rstrip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).rstrip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().rstrip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().rstrip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'strip')
@overload_method(types.CharSeq, 'strip')
@overload_method(types.Bytes, 'strip')
def unicode_charseq_strip(a, chars=None):
if isinstance(a, types.UnicodeCharSeq):
if is_nonelike(chars):
def impl(a, chars=None):
return str(a).strip()
return impl
elif isinstance(chars, types.UnicodeCharSeq):
def impl(a, chars=None):
return str(a).strip(str(chars))
return impl
elif isinstance(chars, types.UnicodeType):
def impl(a, chars=None):
return str(a).strip(chars)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
if is_nonelike(chars):
def impl(a, chars=None):
return a._to_str().strip()._to_bytes()
return impl
elif isinstance(chars, (types.CharSeq, types.Bytes)):
def impl(a, chars=None):
return a._to_str().strip(chars._to_str())._to_bytes()
return impl
@overload_method(types.UnicodeCharSeq, 'join')
@overload_method(types.CharSeq, 'join')
@overload_method(types.Bytes, 'join')
def unicode_charseq_join(a, parts):
if isinstance(a, types.UnicodeCharSeq):
# assuming parts contains UnicodeCharSeq or UnicodeType objects
def impl(a, parts):
_parts = [str(p) for p in parts]
return str(a).join(_parts)
return impl
if isinstance(a, (types.CharSeq, types.Bytes)):
# assuming parts contains CharSeq or Bytes objects
def impl(a, parts):
_parts = [p._to_str() for p in parts]
return a._to_str().join(_parts)._to_bytes()
return impl