640 lines
30 KiB
Cython
640 lines
30 KiB
Cython
|
from libc.stddef cimport wchar_t
|
||
|
|
||
|
cdef extern from *:
|
||
|
ctypedef unsigned char Py_UCS1 # uint8_t
|
||
|
ctypedef unsigned short Py_UCS2 # uint16_t
|
||
|
|
||
|
# Return true if the object o is a Unicode object or an instance
|
||
|
# of a Unicode subtype. Changed in version 2.2: Allowed subtypes
|
||
|
# to be accepted.
|
||
|
bint PyUnicode_Check(object o)
|
||
|
|
||
|
# Return true if the object o is a Unicode object, but not an
|
||
|
# instance of a subtype. New in version 2.2.
|
||
|
bint PyUnicode_CheckExact(object o)
|
||
|
|
||
|
# Return the size of the object. o has to be a PyUnicodeObject
|
||
|
# (not checked).
|
||
|
#
|
||
|
# Deprecated since version 3.3, will be removed in version 3.10:
|
||
|
# Part of the old-style Unicode API, please migrate to using
|
||
|
# PyUnicode_GET_LENGTH().
|
||
|
Py_ssize_t PyUnicode_GET_SIZE(object o)
|
||
|
|
||
|
# Return the length of the Unicode string, in code points. o has
|
||
|
# to be a Unicode object in the “canonical” representation (not
|
||
|
# checked).
|
||
|
#
|
||
|
# New in version 3.3.
|
||
|
Py_ssize_t PyUnicode_GET_LENGTH(object o)
|
||
|
|
||
|
Py_UCS1 *PyUnicode_1BYTE_DATA(object o)
|
||
|
Py_UCS2 *PyUnicode_2BYTE_DATA(object o)
|
||
|
Py_UCS4 *PyUnicode_4BYTE_DATA(object o)
|
||
|
|
||
|
int PyUnicode_WCHAR_KIND # Deprecated since Python 3.10, removed in 3.12.
|
||
|
int PyUnicode_1BYTE_KIND
|
||
|
int PyUnicode_2BYTE_KIND
|
||
|
int PyUnicode_4BYTE_KIND
|
||
|
void PyUnicode_WRITE(int kind, void *data, Py_ssize_t index, Py_UCS4 value)
|
||
|
Py_UCS4 PyUnicode_READ(int kind, void *data, Py_ssize_t index)
|
||
|
Py_UCS4 PyUnicode_READ_CHAR(object o, Py_ssize_t index)
|
||
|
|
||
|
unsigned int PyUnicode_KIND(object o)
|
||
|
void *PyUnicode_DATA(object o)
|
||
|
|
||
|
# Return the size of the object's internal buffer in bytes. o has
|
||
|
# to be a PyUnicodeObject (not checked).
|
||
|
Py_ssize_t PyUnicode_GET_DATA_SIZE(object o)
|
||
|
|
||
|
# Return a pointer to the internal Py_UNICODE buffer of the
|
||
|
# object. o has to be a PyUnicodeObject (not checked).
|
||
|
Py_UNICODE* PyUnicode_AS_UNICODE(object o)
|
||
|
|
||
|
# Return a pointer to the internal buffer of the object. o has to
|
||
|
# be a PyUnicodeObject (not checked).
|
||
|
char* PyUnicode_AS_DATA(object o)
|
||
|
|
||
|
bint PyUnicode_IsIdentifier(object o)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a whitespace character.
|
||
|
bint Py_UNICODE_ISSPACE(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a lowercase character.
|
||
|
bint Py_UNICODE_ISLOWER(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is an uppercase character.
|
||
|
bint Py_UNICODE_ISUPPER(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a titlecase character.
|
||
|
bint Py_UNICODE_ISTITLE(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a linebreak character.
|
||
|
bint Py_UNICODE_ISLINEBREAK(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a decimal character.
|
||
|
bint Py_UNICODE_ISDECIMAL(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a digit character.
|
||
|
bint Py_UNICODE_ISDIGIT(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is a numeric character.
|
||
|
bint Py_UNICODE_ISNUMERIC(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is an alphabetic character.
|
||
|
bint Py_UNICODE_ISALPHA(Py_UCS4 ch)
|
||
|
|
||
|
# Return 1 or 0 depending on whether ch is an alphanumeric character.
|
||
|
bint Py_UNICODE_ISALNUM(Py_UCS4 ch)
|
||
|
|
||
|
bint Py_UNICODE_ISPRINTABLE(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to lower case.
|
||
|
# Used to return a Py_UNICODE value before Py3.3.
|
||
|
Py_UCS4 Py_UNICODE_TOLOWER(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to upper case.
|
||
|
# Used to return a Py_UNICODE value before Py3.3.
|
||
|
Py_UCS4 Py_UNICODE_TOUPPER(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to title case.
|
||
|
# Used to return a Py_UNICODE value before Py3.3.
|
||
|
Py_UCS4 Py_UNICODE_TOTITLE(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to a decimal positive
|
||
|
# integer. Return -1 if this is not possible. This macro does not
|
||
|
# raise exceptions.
|
||
|
int Py_UNICODE_TODECIMAL(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to a single digit
|
||
|
# integer. Return -1 if this is not possible. This macro does not
|
||
|
# raise exceptions.
|
||
|
int Py_UNICODE_TODIGIT(Py_UCS4 ch)
|
||
|
|
||
|
# Return the character ch converted to a double. Return -1.0 if
|
||
|
# this is not possible. This macro does not raise exceptions.
|
||
|
double Py_UNICODE_TONUMERIC(Py_UCS4 ch)
|
||
|
|
||
|
# To create Unicode objects and access their basic sequence
|
||
|
# properties, use these APIs:
|
||
|
|
||
|
# Create a Unicode Object from the Py_UNICODE buffer u of the
|
||
|
# given size. u may be NULL which causes the contents to be
|
||
|
# undefined. It is the user's responsibility to fill in the needed
|
||
|
# data. The buffer is copied into the new object. If the buffer is
|
||
|
# not NULL, the return value might be a shared object. Therefore,
|
||
|
# modification of the resulting Unicode object is only allowed
|
||
|
# when u is NULL.
|
||
|
unicode PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size)
|
||
|
|
||
|
# Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded
|
||
|
# bytes
|
||
|
unicode PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
||
|
|
||
|
# Similar to PyUnicode_FromUnicode(), but u points to null-terminated
|
||
|
# UTF-8 encoded bytes. The size is determined with strlen().
|
||
|
unicode PyUnicode_FromString(const char *u)
|
||
|
|
||
|
unicode PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||
|
unicode PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
|
||
|
unicode PyUnicode_FromFormat(const char *format, ...)
|
||
|
Py_ssize_t PyUnicode_GetLength(object unicode) except -1
|
||
|
Py_ssize_t PyUnicode_CopyCharacters(object to, Py_ssize_t to_start, object from_, Py_ssize_t from_start, Py_ssize_t how_many) except -1
|
||
|
Py_ssize_t PyUnicode_Fill(object unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) except -1
|
||
|
int PyUnicode_WriteChar(object unicode, Py_ssize_t index, Py_UCS4 character) except -1
|
||
|
Py_UCS4 PyUnicode_ReadChar(object unicode, Py_ssize_t index) except -1
|
||
|
unicode PyUnicode_Substring(object str, Py_ssize_t start, Py_ssize_t end)
|
||
|
Py_UCS4 *PyUnicode_AsUCS4(object u, Py_UCS4 *buffer, Py_ssize_t buflen, int copy_null) except NULL
|
||
|
Py_UCS4 *PyUnicode_AsUCS4Copy(object u) except NULL
|
||
|
|
||
|
# Create a Unicode Object from the given Unicode code point ordinal.
|
||
|
#
|
||
|
# The ordinal must be in range(0x10000) on narrow Python builds
|
||
|
# (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError
|
||
|
# is raised in case it is not.
|
||
|
unicode PyUnicode_FromOrdinal(int ordinal)
|
||
|
|
||
|
# Return a read-only pointer to the Unicode object's internal
|
||
|
# Py_UNICODE buffer, NULL if unicode is not a Unicode object.
|
||
|
Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL
|
||
|
|
||
|
# Return the length of the Unicode object.
|
||
|
Py_ssize_t PyUnicode_GetSize(object o) except -1
|
||
|
|
||
|
# Coerce an encoded object obj to an Unicode object and return a
|
||
|
# reference with incremented refcount.
|
||
|
# String and other char buffer compatible objects are decoded
|
||
|
# according to the given encoding and using the error handling
|
||
|
# defined by errors. Both can be NULL to have the interface use
|
||
|
# the default values (see the next section for details).
|
||
|
# All other objects, including Unicode objects, cause a TypeError
|
||
|
# to be set.
|
||
|
object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors)
|
||
|
|
||
|
# Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict")
|
||
|
# which is used throughout the interpreter whenever coercion to
|
||
|
# Unicode is needed.
|
||
|
object PyUnicode_FromObject(object obj)
|
||
|
|
||
|
# If the platform supports wchar_t and provides a header file
|
||
|
# wchar.h, Python can interface directly to this type using the
|
||
|
# following functions. Support is optimized if Python's own
|
||
|
# Py_UNICODE type is identical to the system's wchar_t.
|
||
|
|
||
|
# Create a Unicode object from the wchar_t buffer w of the given
|
||
|
# size. Return NULL on failure.
|
||
|
object PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size)
|
||
|
|
||
|
# Copy the Unicode object contents into the wchar_t buffer w.
|
||
|
# At most size wchar_t characters are copied (excluding a possibly
|
||
|
# trailing null termination character). Return the number of wchar_t
|
||
|
# characters copied or -1 in case of an error. Note that the
|
||
|
# esulting wchar_t* string may or may not be null-terminated.
|
||
|
# It is the responsibility of the caller to make sure that the wchar_t*
|
||
|
# string is null-terminated in case this is required by the application.
|
||
|
# Also, note that the wchar_t* string might contain null characters,
|
||
|
# which would cause the string to be truncated when used with most C functions.
|
||
|
Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size) except -1
|
||
|
|
||
|
# Convert the Unicode object to a wide character string. The output
|
||
|
# string always ends with a null character. If size is not NULL,
|
||
|
# write the number of wide characters (excluding the trailing null
|
||
|
# termination character) into *size. Note that the resulting wchar_t
|
||
|
# string might contain null characters, which would cause the string
|
||
|
# to be truncated when used with most C functions. If size is NULL and
|
||
|
# the wchar_t* string contains null characters a ValueError is raised.
|
||
|
|
||
|
# Returns a buffer allocated by PyMem_New (use PyMem_Free() to free it)
|
||
|
# on success. On error, returns NULL and *size is undefined. Raises a
|
||
|
# MemoryError if memory allocation is failed.
|
||
|
wchar_t *PyUnicode_AsWideCharString(object o, Py_ssize_t *size) except NULL
|
||
|
|
||
|
# Unicode Methods
|
||
|
|
||
|
# Concat two strings giving a new Unicode string.
|
||
|
# Return value: New reference.
|
||
|
unicode PyUnicode_Concat(object left, object right)
|
||
|
|
||
|
# Split a string giving a list of Unicode strings. If sep is NULL,
|
||
|
# splitting will be done at all whitespace substrings. Otherwise,
|
||
|
# splits occur at the given separator. At most maxsplit splits will
|
||
|
# be done. If negative, no limit is set. Separators are not included
|
||
|
# in the resulting list.
|
||
|
# Return value: New reference.
|
||
|
list PyUnicode_Split(object s, object sep, Py_ssize_t maxsplit)
|
||
|
|
||
|
# Split a Unicode string at line breaks, returning a list of Unicode
|
||
|
# strings. CRLF is considered to be one line break. If keepend is 0,
|
||
|
# the Line break characters are not included in the resulting strings.
|
||
|
# Return value: New reference.
|
||
|
list PyUnicode_Splitlines(object s, bint keepend)
|
||
|
|
||
|
# Translate a string by applying a character mapping table to it and
|
||
|
# return the resulting Unicode object.
|
||
|
#
|
||
|
# The mapping table must map Unicode ordinal integers to Unicode ordinal
|
||
|
# integers or None (causing deletion of the character).
|
||
|
#
|
||
|
# Mapping tables need only provide the __getitem__() interface;
|
||
|
# dictionaries and sequences work well. Unmapped character ordinals (ones
|
||
|
# which cause a LookupError) are left untouched and are copied as-is.
|
||
|
#
|
||
|
# errors has the usual meaning for codecs. It may be NULL which indicates
|
||
|
# to use the default error handling.
|
||
|
# Return value: New reference.
|
||
|
unicode PyUnicode_Translate(object str, object table, const char *errors)
|
||
|
|
||
|
# Join a sequence of strings using the given separator and return the
|
||
|
# resulting Unicode string.
|
||
|
# Return value: New reference.
|
||
|
unicode PyUnicode_Join(object separator, object seq)
|
||
|
|
||
|
# Return 1 if substr matches str[start:end] at the given tail end
|
||
|
# (direction == -1 means to do a prefix match, direction == 1 a
|
||
|
# suffix match), 0 otherwise.
|
||
|
# Return -1 if an error occurred.
|
||
|
Py_ssize_t PyUnicode_Tailmatch(object str, object substr,
|
||
|
Py_ssize_t start, Py_ssize_t end, int direction) except -1
|
||
|
|
||
|
# Return the first position of substr in str[start:end] using the given
|
||
|
# direction (direction == 1 means to do a forward search, direction == -1
|
||
|
# a backward search). The return value is the index of the first match;
|
||
|
# a value of -1 indicates that no match was found, and -2 indicates that an
|
||
|
# error occurred and an exception has been set.
|
||
|
Py_ssize_t PyUnicode_Find(object str, object substr, Py_ssize_t start, Py_ssize_t end, int direction) except -2
|
||
|
|
||
|
# Return the first position of the character ch in str[start:end] using
|
||
|
# the given direction (direction == 1 means to do a forward search,
|
||
|
# direction == -1 a backward search). The return value is the index of
|
||
|
# the first match; a value of -1 indicates that no match was found, and
|
||
|
# -2 indicates that an error occurred and an exception has been set.
|
||
|
# New in version 3.3.
|
||
|
Py_ssize_t PyUnicode_FindChar(object str, Py_UCS4 ch, Py_ssize_t start, Py_ssize_t end, int direction) except -2
|
||
|
|
||
|
# Return the number of non-overlapping occurrences of substr in
|
||
|
# str[start:end]. Return -1 if an error occurred.
|
||
|
Py_ssize_t PyUnicode_Count(object str, object substr, Py_ssize_t start, Py_ssize_t end) except -1
|
||
|
|
||
|
# Replace at most maxcount occurrences of substr in str with replstr and
|
||
|
# return the resulting Unicode object. maxcount == -1 means replace all
|
||
|
# occurrences.
|
||
|
# Return value: New reference.
|
||
|
unicode PyUnicode_Replace(object str, object substr, object replstr, Py_ssize_t maxcount)
|
||
|
|
||
|
# Compare two strings and return -1, 0, 1 for less than,
|
||
|
# equal, and greater than, respectively.
|
||
|
int PyUnicode_Compare(object left, object right) except? -1
|
||
|
|
||
|
# Compare a unicode object, uni, with string and return -1, 0, 1 for less than,
|
||
|
# equal, and greater than, respectively. It is best to pass only ASCII-encoded
|
||
|
# strings, but the function interprets the input string as ISO-8859-1 if it
|
||
|
# contains non-ASCII characters.
|
||
|
int PyUnicode_CompareWithASCIIString(object uni, const char *string)
|
||
|
|
||
|
# Rich compare two unicode strings and return one of the following:
|
||
|
#
|
||
|
# NULL in case an exception was raised
|
||
|
# Py_True or Py_False for successful comparisons
|
||
|
# Py_NotImplemented in case the type combination is unknown
|
||
|
#
|
||
|
# Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in case
|
||
|
# the conversion of the arguments to Unicode fails with a UnicodeDecodeError.
|
||
|
#
|
||
|
# Possible values for op are Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, and Py_LE.
|
||
|
object PyUnicode_RichCompare(object left, object right, int op)
|
||
|
|
||
|
# Return a new string object from format and args; this is analogous to
|
||
|
# format % args.
|
||
|
# Return value: New reference.
|
||
|
unicode PyUnicode_Format(object format, object args)
|
||
|
|
||
|
# Check whether element is contained in container and return true or false
|
||
|
# accordingly.
|
||
|
#
|
||
|
# element has to coerce to a one element Unicode string. -1 is returned
|
||
|
# if there was an error.
|
||
|
int PyUnicode_Contains(object container, object element) except -1
|
||
|
|
||
|
# Intern the argument *string in place. The argument must be the address
|
||
|
# of a pointer variable pointing to a Python unicode string object. If
|
||
|
# there is an existing interned string that is the same as *string, it sets
|
||
|
# *string to it (decrementing the reference count of the old string object
|
||
|
# and incrementing the reference count of the interned string object),
|
||
|
# otherwise it leaves *string alone and interns it (incrementing its reference
|
||
|
# count). (Clarification: even though there is a lot of talk about reference
|
||
|
# counts, think of this function as reference-count-neutral; you own the object
|
||
|
# after the call if and only if you owned it before the call.)
|
||
|
#void PyUnicode_InternInPlace(PyObject **string)
|
||
|
|
||
|
# A combination of PyUnicode_FromString() and PyUnicode_InternInPlace(),
|
||
|
# returning either a new unicode string object that has been interned, or
|
||
|
# a new ("owned") reference to an earlier interned string object with the
|
||
|
# same value.
|
||
|
unicode PyUnicode_InternFromString(const char *v)
|
||
|
|
||
|
|
||
|
# Codecs
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the encoded
|
||
|
# string s. encoding and errors have the same meaning as the
|
||
|
# parameters of the same name in the unicode() builtin
|
||
|
# function. The codec to be used is looked up using the Python
|
||
|
# codec registry. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size and return a
|
||
|
# Python string object. encoding and errors have the same meaning
|
||
|
# as the parameters of the same name in the Unicode encode()
|
||
|
# method. The codec to be used is looked up using the Python codec
|
||
|
# registry. Return NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size,
|
||
|
char *encoding, char *errors)
|
||
|
|
||
|
# Encode a Unicode object and return the result as Python string
|
||
|
# object. encoding and errors have the same meaning as the
|
||
|
# parameters of the same name in the Unicode encode() method. The
|
||
|
# codec to be used is looked up using the Python codec
|
||
|
# registry. Return NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors)
|
||
|
|
||
|
# These are the UTF-8 codec APIs:
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the UTF-8
|
||
|
# encoded string s. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
unicode PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
|
||
|
# consumed is not NULL, trailing incomplete UTF-8 byte sequences
|
||
|
# will not be treated as an error. Those bytes will not be decoded
|
||
|
# and the number of bytes that have been decoded will be stored in
|
||
|
# consumed. New in version 2.4.
|
||
|
unicode PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using UTF-8 and
|
||
|
# return a Python string object. Return NULL if an exception was
|
||
|
# raised by the codec.
|
||
|
bytes PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using UTF-8 and return the result as Python bytes object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
|
||
|
bytes PyUnicode_AsUTF8String(object unicode)
|
||
|
|
||
|
|
||
|
# Return a pointer to the UTF-8 encoding of the Unicode object,
|
||
|
# and store the size of the encoded representation (in bytes) in size.
|
||
|
# The size argument can be NULL; in this case no size will be stored.
|
||
|
# The returned buffer always has an extra null byte appended
|
||
|
# (not included in size), regardless of whether there are any
|
||
|
# other null code points.
|
||
|
|
||
|
# In the case of an error, NULL is returned with an exception set and
|
||
|
# no size is stored.
|
||
|
|
||
|
# This caches the UTF-8 representation of the string in the Unicode
|
||
|
# object, and subsequent calls will return a pointer to the same buffer.
|
||
|
# The caller is not responsible for deallocating the buffer
|
||
|
const char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size) except NULL
|
||
|
|
||
|
|
||
|
# As PyUnicode_AsUTF8AndSize(), but does not store the size.
|
||
|
const char *PyUnicode_AsUTF8(object unicode) except NULL
|
||
|
|
||
|
# These are the UTF-16 codec APIs:
|
||
|
|
||
|
# Decode length bytes from a UTF-16 encoded buffer string and
|
||
|
# return the corresponding Unicode object. errors (if non-NULL)
|
||
|
# defines the error handling. It defaults to ``strict''.
|
||
|
#
|
||
|
# If byteorder is non-NULL, the decoder starts decoding using the
|
||
|
# given byte order:
|
||
|
#
|
||
|
# *byteorder == -1: little endian
|
||
|
# *byteorder == 0: native order
|
||
|
# *byteorder == 1: big endian
|
||
|
#
|
||
|
# and then switches if the first two bytes of the input data are a
|
||
|
# byte order mark (BOM) and the specified byte order is native
|
||
|
# order. This BOM is not copied into the resulting Unicode
|
||
|
# string. After completion, *byteorder is set to the current byte
|
||
|
# order at the.
|
||
|
#
|
||
|
# If byteorder is NULL, the codec starts in native order mode.
|
||
|
unicode PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
|
||
|
|
||
|
# If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
|
||
|
# consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
|
||
|
# treat trailing incomplete UTF-16 byte sequences (such as an odd
|
||
|
# number of bytes or a split surrogate pair) as an error. Those
|
||
|
# bytes will not be decoded and the number of bytes that have been
|
||
|
# decoded will be stored in consumed. New in version 2.4.
|
||
|
unicode PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
|
||
|
|
||
|
# Return a Python string object holding the UTF-16 encoded value
|
||
|
# of the Unicode data in s. If byteorder is not 0, output is
|
||
|
# written according to the following byte order:
|
||
|
#
|
||
|
# byteorder == -1: little endian
|
||
|
# byteorder == 0: native byte order (writes a BOM mark)
|
||
|
# byteorder == 1: big endian
|
||
|
#
|
||
|
# If byteorder is 0, the output string will always start with the
|
||
|
# Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark
|
||
|
# is prepended.
|
||
|
#
|
||
|
# If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
|
||
|
# represented as a surrogate pair. If it is not defined, each
|
||
|
# Py_UNICODE values is interpreted as an UCS-2 character.
|
||
|
bytes PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
|
||
|
|
||
|
# Return a Python string using the UTF-16 encoding in native byte
|
||
|
# order. The string always starts with a BOM mark. Error handling
|
||
|
# is ``strict''. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
bytes PyUnicode_AsUTF16String(object unicode)
|
||
|
|
||
|
# These are the ``Unicode Escape'' codec APIs:
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the
|
||
|
# Unicode-Escape encoded string s. Return NULL if an exception was
|
||
|
# raised by the codec.
|
||
|
object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using
|
||
|
# Unicode-Escape and return a Python string object. Return NULL if
|
||
|
# an exception was raised by the codec.
|
||
|
object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size)
|
||
|
|
||
|
# Encode a Unicode objects using Unicode-Escape and return the
|
||
|
# result as Python string object. Error handling is
|
||
|
# ``strict''. Return NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_AsUnicodeEscapeString(object unicode)
|
||
|
|
||
|
# These are the ``Raw Unicode Escape'' codec APIs:
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the
|
||
|
# Raw-Unicode-Escape encoded string s. Return NULL if an exception
|
||
|
# was raised by the codec.
|
||
|
object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using
|
||
|
# Raw-Unicode-Escape and return a Python string object. Return
|
||
|
# NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using Raw-Unicode-Escape and return the
|
||
|
# result as Python string object. Error handling is
|
||
|
# ``strict''. Return NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_AsRawUnicodeEscapeString(object unicode)
|
||
|
|
||
|
# These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding.
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the Latin-1
|
||
|
# encoded string s. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
unicode PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using Latin-1 and
|
||
|
# return a Python bytes object. Return NULL if an exception was
|
||
|
# raised by the codec.
|
||
|
bytes PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using Latin-1 and return the result as
|
||
|
# Python bytes object. Error handling is ``strict''. Return NULL
|
||
|
# if an exception was raised by the codec.
|
||
|
bytes PyUnicode_AsLatin1String(object unicode)
|
||
|
|
||
|
# These are the ASCII codec APIs. Only 7-bit ASCII data is
|
||
|
# accepted. All other codes generate errors.
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the ASCII
|
||
|
# encoded string s. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
unicode PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using ASCII and
|
||
|
# return a Python bytes object. Return NULL if an exception was
|
||
|
# raised by the codec.
|
||
|
bytes PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using ASCII and return the result as
|
||
|
# Python bytes object. Error handling is ``strict''. Return NULL
|
||
|
# if an exception was raised by the codec.
|
||
|
bytes PyUnicode_AsASCIIString(object o)
|
||
|
|
||
|
# These are the mapping codec APIs:
|
||
|
#
|
||
|
# This codec is special in that it can be used to implement many
|
||
|
# different codecs (and this is in fact what was done to obtain most
|
||
|
# of the standard codecs included in the encodings package). The codec
|
||
|
# uses mapping to encode and decode characters.
|
||
|
#
|
||
|
# Decoding mappings must map single string characters to single
|
||
|
# Unicode characters, integers (which are then interpreted as Unicode
|
||
|
# ordinals) or None (meaning "undefined mapping" and causing an
|
||
|
# error).
|
||
|
#
|
||
|
# Encoding mappings must map single Unicode characters to single
|
||
|
# string characters, integers (which are then interpreted as Latin-1
|
||
|
# ordinals) or None (meaning "undefined mapping" and causing an
|
||
|
# error).
|
||
|
#
|
||
|
# The mapping objects provided must only support the __getitem__
|
||
|
# mapping interface.
|
||
|
#
|
||
|
# If a character lookup fails with a LookupError, the character is
|
||
|
# copied as-is meaning that its ordinal value will be interpreted as
|
||
|
# Unicode or Latin-1 ordinal resp. Because of this, mappings only need
|
||
|
# to contain those mappings which map characters to different code
|
||
|
# points.
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the encoded
|
||
|
# string s using the given mapping object. Return NULL if an
|
||
|
# exception was raised by the codec. If mapping is NULL latin-1
|
||
|
# decoding will be done. Else it can be a dictionary mapping byte
|
||
|
# or a unicode string, which is treated as a lookup table. Byte
|
||
|
# values greater that the length of the string and U+FFFE
|
||
|
# "characters" are treated as "undefined mapping". Changed in
|
||
|
# version 2.4: Allowed unicode string as mapping argument.
|
||
|
object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using the given
|
||
|
# mapping object and return a Python string object. Return NULL if
|
||
|
# an exception was raised by the codec.
|
||
|
#
|
||
|
# Deprecated since version 3.3, will be removed in version 4.0.
|
||
|
object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using the given mapping object and
|
||
|
# return the result as Python string object. Error handling is
|
||
|
# ``strict''. Return NULL if an exception was raised by the codec.
|
||
|
object PyUnicode_AsCharmapString(object o, object mapping)
|
||
|
|
||
|
# The following codec API is special in that maps Unicode to Unicode.
|
||
|
|
||
|
# Translate a Py_UNICODE buffer of the given length by applying a
|
||
|
# character mapping table to it and return the resulting Unicode
|
||
|
# object. Return NULL when an exception was raised by the codec.
|
||
|
#
|
||
|
# The mapping table must map Unicode ordinal integers to Unicode
|
||
|
# ordinal integers or None (causing deletion of the character).
|
||
|
#
|
||
|
# Mapping tables need only provide the __getitem__() interface;
|
||
|
# dictionaries and sequences work well. Unmapped character
|
||
|
# ordinals (ones which cause a LookupError) are left untouched and
|
||
|
# are copied as-is.
|
||
|
#
|
||
|
# Deprecated since version 3.3, will be removed in version 4.0.
|
||
|
object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
|
||
|
object table, char *errors)
|
||
|
|
||
|
# These are the MBCS codec APIs. They are currently only available on
|
||
|
# Windows and use the Win32 MBCS converters to implement the
|
||
|
# conversions. Note that MBCS (or DBCS) is a class of encodings, not
|
||
|
# just one. The target encoding is defined by the user settings on the
|
||
|
# machine running the codec.
|
||
|
|
||
|
# Create a Unicode object by decoding size bytes of the MBCS
|
||
|
# encoded string s. Return NULL if an exception was raised by the
|
||
|
# codec.
|
||
|
unicode PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
|
||
|
# consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
|
||
|
# decode trailing lead byte and the number of bytes that have been
|
||
|
# decoded will be stored in consumed. New in version 2.5.
|
||
|
# NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
|
||
|
unicode PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
|
||
|
|
||
|
# Encode the Py_UNICODE buffer of the given size using MBCS and
|
||
|
# return a Python string object. Return NULL if an exception was
|
||
|
# raised by the codec.
|
||
|
bytes PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
|
||
|
|
||
|
# Encode a Unicode objects using MBCS and return the result as
|
||
|
# Python string object. Error handling is ``strict''. Return NULL
|
||
|
# if an exception was raised by the codec.
|
||
|
bytes PyUnicode_AsMBCSString(object o)
|
||
|
|
||
|
# Encode the Unicode object using the specified code page and return
|
||
|
# a Python bytes object. Return NULL if an exception was raised by the
|
||
|
# codec. Use CP_ACP code page to get the MBCS encoder.
|
||
|
#
|
||
|
# New in version 3.3.
|
||
|
bytes PyUnicode_EncodeCodePage(int code_page, object unicode, const char *errors)
|
||
|
|
||
|
|
||
|
# Py_UCS4 helpers (new in CPython 3.3)
|
||
|
|
||
|
# These utility functions work on strings of Py_UCS4 characters and
|
||
|
# otherwise behave like the C standard library functions with the same name.
|
||
|
|
||
|
size_t Py_UCS4_strlen(const Py_UCS4 *u)
|
||
|
Py_UCS4* Py_UCS4_strcpy(Py_UCS4 *s1, const Py_UCS4 *s2)
|
||
|
Py_UCS4* Py_UCS4_strncpy(Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
|
||
|
Py_UCS4* Py_UCS4_strcat(Py_UCS4 *s1, const Py_UCS4 *s2)
|
||
|
int Py_UCS4_strcmp(const Py_UCS4 *s1, const Py_UCS4 *s2)
|
||
|
int Py_UCS4_strncmp(const Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
|
||
|
Py_UCS4* Py_UCS4_strchr(const Py_UCS4 *s, Py_UCS4 c)
|
||
|
Py_UCS4* Py_UCS4_strrchr(const Py_UCS4 *s, Py_UCS4 c)
|