ai-content-maker/.venv/Lib/site-packages/numba/np/ufunc/wrappers.py

744 lines
27 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
from collections import namedtuple
import numpy as np
from llvmlite.ir import Constant, IRBuilder
from llvmlite import ir
from numba.core import types, cgutils
from numba.core.compiler_lock import global_compiler_lock
from numba.core.caching import make_library_cache, NullCache
_wrapper_info = namedtuple('_wrapper_info', ['library', 'env', 'name'])
def _build_ufunc_loop_body(load, store, context, func, builder, arrays, out,
offsets, store_offset, signature, pyapi, env):
elems = load()
# Compute
status, retval = context.call_conv.call_function(builder, func,
signature.return_type,
signature.args, elems)
# Store
with builder.if_else(status.is_ok, likely=True) as (if_ok, if_error):
with if_ok:
store(retval)
with if_error:
gil = pyapi.gil_ensure()
context.call_conv.raise_error(builder, pyapi, status)
pyapi.gil_release(gil)
# increment indices
for off, ary in zip(offsets, arrays):
builder.store(builder.add(builder.load(off), ary.step), off)
builder.store(builder.add(builder.load(store_offset), out.step),
store_offset)
return status.code
def _build_ufunc_loop_body_objmode(load, store, context, func, builder,
arrays, out, offsets, store_offset,
signature, env, pyapi):
elems = load()
# Compute
_objargs = [types.pyobject] * len(signature.args)
# We need to push the error indicator to avoid it messing with
# the ufunc's execution. We restore it unless the ufunc raised
# a new error.
with pyapi.err_push(keep_new=True):
status, retval = context.call_conv.call_function(builder, func,
types.pyobject,
_objargs, elems)
# Release owned reference to arguments
for elem in elems:
pyapi.decref(elem)
# NOTE: if an error occurred, it will be caught by the Numpy machinery
# Store
store(retval)
# increment indices
for off, ary in zip(offsets, arrays):
builder.store(builder.add(builder.load(off), ary.step), off)
builder.store(builder.add(builder.load(store_offset), out.step),
store_offset)
return status.code
def build_slow_loop_body(context, func, builder, arrays, out, offsets,
store_offset, signature, pyapi, env):
def load():
elems = [ary.load_direct(builder.load(off))
for off, ary in zip(offsets, arrays)]
return elems
def store(retval):
out.store_direct(retval, builder.load(store_offset))
return _build_ufunc_loop_body(load, store, context, func, builder, arrays,
out, offsets, store_offset, signature, pyapi,
env=env)
def build_obj_loop_body(context, func, builder, arrays, out, offsets,
store_offset, signature, pyapi, envptr, env):
env_body = context.get_env_body(builder, envptr)
env_manager = pyapi.get_env_manager(env, env_body, envptr)
def load():
# Load
elems = [ary.load_direct(builder.load(off))
for off, ary in zip(offsets, arrays)]
# Box
elems = [pyapi.from_native_value(t, v, env_manager)
for v, t in zip(elems, signature.args)]
return elems
def store(retval):
is_ok = cgutils.is_not_null(builder, retval)
# If an error is raised by the object mode ufunc, it will
# simply get caught by the Numpy ufunc machinery.
with builder.if_then(is_ok, likely=True):
# Unbox
native = pyapi.to_native_value(signature.return_type, retval)
assert native.cleanup is None
# Store
out.store_direct(native.value, builder.load(store_offset))
# Release owned reference
pyapi.decref(retval)
return _build_ufunc_loop_body_objmode(load, store, context, func, builder,
arrays, out, offsets, store_offset,
signature, envptr, pyapi)
def build_fast_loop_body(context, func, builder, arrays, out, offsets,
store_offset, signature, ind, pyapi, env):
def load():
elems = [ary.load_aligned(ind)
for ary in arrays]
return elems
def store(retval):
out.store_aligned(retval, ind)
return _build_ufunc_loop_body(load, store, context, func, builder, arrays,
out, offsets, store_offset, signature, pyapi,
env=env)
def build_ufunc_wrapper(library, context, fname, signature, objmode, cres):
"""
Wrap the scalar function with a loop that iterates over the arguments
Returns
-------
(library, env, name)
"""
assert isinstance(fname, str)
byte_t = ir.IntType(8)
byte_ptr_t = ir.PointerType(byte_t)
byte_ptr_ptr_t = ir.PointerType(byte_ptr_t)
intp_t = context.get_value_type(types.intp)
intp_ptr_t = ir.PointerType(intp_t)
fnty = ir.FunctionType(ir.VoidType(), [byte_ptr_ptr_t, intp_ptr_t,
intp_ptr_t, byte_ptr_t])
wrapperlib = context.codegen().create_library('ufunc_wrapper')
wrapper_module = wrapperlib.create_ir_module('')
if objmode:
func_type = context.call_conv.get_function_type(
types.pyobject, [types.pyobject] * len(signature.args))
else:
func_type = context.call_conv.get_function_type(
signature.return_type, signature.args)
func = ir.Function(wrapper_module, func_type, name=fname)
func.attributes.add("alwaysinline")
wrapper = ir.Function(wrapper_module, fnty, "__ufunc__." + func.name)
arg_args, arg_dims, arg_steps, arg_data = wrapper.args
arg_args.name = "args"
arg_dims.name = "dims"
arg_steps.name = "steps"
arg_data.name = "data"
builder = IRBuilder(wrapper.append_basic_block("entry"))
# Prepare Environment
envname = context.get_env_name(cres.fndesc)
env = cres.environment
envptr = builder.load(context.declare_env_global(builder.module, envname))
# Emit loop
loopcount = builder.load(arg_dims, name="loopcount")
# Prepare inputs
arrays = []
for i, typ in enumerate(signature.args):
arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))
# Prepare output
out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
signature.return_type)
# Setup indices
offsets = []
zero = context.get_constant(types.intp, 0)
for _ in arrays:
p = cgutils.alloca_once(builder, intp_t)
offsets.append(p)
builder.store(zero, p)
store_offset = cgutils.alloca_once(builder, intp_t)
builder.store(zero, store_offset)
unit_strided = cgutils.true_bit
for ary in arrays:
unit_strided = builder.and_(unit_strided, ary.is_unit_strided)
pyapi = context.get_python_api(builder)
if objmode:
# General loop
gil = pyapi.gil_ensure()
with cgutils.for_range(builder, loopcount, intp=intp_t):
build_obj_loop_body(
context, func, builder, arrays, out, offsets,
store_offset, signature, pyapi, envptr, env,
)
pyapi.gil_release(gil)
builder.ret_void()
else:
with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
with is_unit_strided:
with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
build_fast_loop_body(
context, func, builder, arrays, out, offsets,
store_offset, signature, loop.index, pyapi,
env=envptr,
)
with is_strided:
# General loop
with cgutils.for_range(builder, loopcount, intp=intp_t):
build_slow_loop_body(
context, func, builder, arrays, out, offsets,
store_offset, signature, pyapi,
env=envptr,
)
builder.ret_void()
del builder
# Link and finalize
wrapperlib.add_ir_module(wrapper_module)
wrapperlib.add_linking_library(library)
return _wrapper_info(library=wrapperlib, env=env, name=wrapper.name)
class UArrayArg(object):
def __init__(self, context, builder, args, steps, i, fe_type):
self.context = context
self.builder = builder
self.fe_type = fe_type
offset = self.context.get_constant(types.intp, i)
offseted_args = self.builder.load(builder.gep(args, [offset]))
data_type = context.get_data_type(fe_type)
self.dataptr = self.builder.bitcast(offseted_args,
data_type.as_pointer())
sizeof = self.context.get_abi_sizeof(data_type)
self.abisize = self.context.get_constant(types.intp, sizeof)
offseted_step = self.builder.gep(steps, [offset])
self.step = self.builder.load(offseted_step)
self.is_unit_strided = builder.icmp_unsigned('==',
self.abisize, self.step)
self.builder = builder
def load_direct(self, byteoffset):
"""
Generic load from the given *byteoffset*. load_aligned() is
preferred if possible.
"""
ptr = cgutils.pointer_add(self.builder, self.dataptr, byteoffset)
return self.context.unpack_value(self.builder, self.fe_type, ptr)
def load_aligned(self, ind):
# Using gep() instead of explicit pointer addition helps LLVM
# vectorize the loop.
ptr = self.builder.gep(self.dataptr, [ind])
return self.context.unpack_value(self.builder, self.fe_type, ptr)
def store_direct(self, value, byteoffset):
ptr = cgutils.pointer_add(self.builder, self.dataptr, byteoffset)
self.context.pack_value(self.builder, self.fe_type, value, ptr)
def store_aligned(self, value, ind):
ptr = self.builder.gep(self.dataptr, [ind])
self.context.pack_value(self.builder, self.fe_type, value, ptr)
GufWrapperCache = make_library_cache('guf')
class _GufuncWrapper(object):
def __init__(self, py_func, cres, sin, sout, cache, is_parfors):
"""
The *is_parfors* argument is a boolean that indicates if the GUfunc
being built is to be used as a ParFors kernel. If True, it disables
the caching on the wrapper as a separate unit because it will be linked
into the caller function and cached along with it.
"""
self.py_func = py_func
self.cres = cres
self.sin = sin
self.sout = sout
self.is_objectmode = self.signature.return_type == types.pyobject
self.cache = (GufWrapperCache(py_func=self.py_func)
if cache else NullCache())
self.is_parfors = bool(is_parfors)
@property
def library(self):
return self.cres.library
@property
def context(self):
return self.cres.target_context
@property
def call_conv(self):
return self.context.call_conv
@property
def signature(self):
return self.cres.signature
@property
def fndesc(self):
return self.cres.fndesc
@property
def env(self):
return self.cres.environment
def _wrapper_function_type(self):
byte_t = ir.IntType(8)
byte_ptr_t = ir.PointerType(byte_t)
byte_ptr_ptr_t = ir.PointerType(byte_ptr_t)
intp_t = self.context.get_value_type(types.intp)
intp_ptr_t = ir.PointerType(intp_t)
fnty = ir.FunctionType(ir.VoidType(), [byte_ptr_ptr_t, intp_ptr_t,
intp_ptr_t, byte_ptr_t])
return fnty
def _build_wrapper(self, library, name):
"""
The LLVM IRBuilder code to create the gufunc wrapper.
The *library* arg is the CodeLibrary to which the wrapper should
be added. The *name* arg is the name of the wrapper function being
created.
"""
intp_t = self.context.get_value_type(types.intp)
fnty = self._wrapper_function_type()
wrapper_module = library.create_ir_module('_gufunc_wrapper')
func_type = self.call_conv.get_function_type(self.fndesc.restype,
self.fndesc.argtypes)
fname = self.fndesc.llvm_func_name
func = ir.Function(wrapper_module, func_type, name=fname)
func.attributes.add("alwaysinline")
wrapper = ir.Function(wrapper_module, fnty, name)
# The use of weak_odr linkage avoids the function being dropped due
# to the order in which the wrappers and the user function are linked.
wrapper.linkage = 'weak_odr'
arg_args, arg_dims, arg_steps, arg_data = wrapper.args
arg_args.name = "args"
arg_dims.name = "dims"
arg_steps.name = "steps"
arg_data.name = "data"
builder = IRBuilder(wrapper.append_basic_block("entry"))
loopcount = builder.load(arg_dims, name="loopcount")
pyapi = self.context.get_python_api(builder)
# Unpack shapes
unique_syms = set()
for grp in (self.sin, self.sout):
for syms in grp:
unique_syms |= set(syms)
sym_map = {}
for syms in self.sin:
for s in syms:
if s not in sym_map:
sym_map[s] = len(sym_map)
sym_dim = {}
for s, i in sym_map.items():
sym_dim[s] = builder.load(builder.gep(arg_dims,
[self.context.get_constant(
types.intp,
i + 1)]))
# Prepare inputs
arrays = []
step_offset = len(self.sin) + len(self.sout)
for i, (typ, sym) in enumerate(zip(self.signature.args,
self.sin + self.sout)):
ary = GUArrayArg(self.context, builder, arg_args,
arg_steps, i, step_offset, typ, sym, sym_dim)
step_offset += len(sym)
arrays.append(ary)
bbreturn = builder.append_basic_block('.return')
# Prologue
self.gen_prologue(builder, pyapi)
# Loop
with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
args = [a.get_array_at_offset(loop.index) for a in arrays]
innercall, error = self.gen_loop_body(builder, pyapi, func, args)
# If error, escape
cgutils.cbranch_or_continue(builder, error, bbreturn)
builder.branch(bbreturn)
builder.position_at_end(bbreturn)
# Epilogue
self.gen_epilogue(builder, pyapi)
builder.ret_void()
# Link
library.add_ir_module(wrapper_module)
library.add_linking_library(self.library)
def _compile_wrapper(self, wrapper_name):
# Gufunc created by Parfors?
if self.is_parfors:
# No wrapper caching for parfors
wrapperlib = self.context.codegen().create_library(str(self))
# Build wrapper
self._build_wrapper(wrapperlib, wrapper_name)
# Non-parfors?
else:
# Use cache and compiler in a critical section
wrapperlib = self.cache.load_overload(
self.cres.signature, self.cres.target_context,
)
if wrapperlib is None:
# Create library and enable caching
wrapperlib = self.context.codegen().create_library(str(self))
wrapperlib.enable_object_caching()
# Build wrapper
self._build_wrapper(wrapperlib, wrapper_name)
# Cache
self.cache.save_overload(self.cres.signature, wrapperlib)
return wrapperlib
@global_compiler_lock
def build(self):
wrapper_name = "__gufunc__." + self.fndesc.mangled_name
wrapperlib = self._compile_wrapper(wrapper_name)
return _wrapper_info(
library=wrapperlib, env=self.env, name=wrapper_name,
)
def gen_loop_body(self, builder, pyapi, func, args):
status, retval = self.call_conv.call_function(
builder, func, self.signature.return_type, self.signature.args,
args)
with builder.if_then(status.is_error, likely=False):
gil = pyapi.gil_ensure()
self.context.call_conv.raise_error(builder, pyapi, status)
pyapi.gil_release(gil)
return status.code, status.is_error
def gen_prologue(self, builder, pyapi):
pass # Do nothing
def gen_epilogue(self, builder, pyapi):
pass # Do nothing
class _GufuncObjectWrapper(_GufuncWrapper):
def gen_loop_body(self, builder, pyapi, func, args):
innercall, error = _prepare_call_to_object_mode(self.context,
builder, pyapi, func,
self.signature,
args)
return innercall, error
def gen_prologue(self, builder, pyapi):
# Acquire the GIL
self.gil = pyapi.gil_ensure()
def gen_epilogue(self, builder, pyapi):
# Release GIL
pyapi.gil_release(self.gil)
def build_gufunc_wrapper(py_func, cres, sin, sout, cache, is_parfors):
signature = cres.signature
wrapcls = (_GufuncObjectWrapper
if signature.return_type == types.pyobject
else _GufuncWrapper)
return wrapcls(
py_func, cres, sin, sout, cache, is_parfors=is_parfors,
).build()
def _prepare_call_to_object_mode(context, builder, pyapi, func,
signature, args):
mod = builder.module
bb_core_return = builder.append_basic_block('ufunc.core.return')
# Call to
# PyObject* ndarray_new(int nd,
# npy_intp *dims, /* shape */
# npy_intp *strides,
# void* data,
# int type_num,
# int itemsize)
ll_int = context.get_value_type(types.int32)
ll_intp = context.get_value_type(types.intp)
ll_intp_ptr = ir.PointerType(ll_intp)
ll_voidptr = context.get_value_type(types.voidptr)
ll_pyobj = context.get_value_type(types.pyobject)
fnty = ir.FunctionType(ll_pyobj, [ll_int, ll_intp_ptr,
ll_intp_ptr, ll_voidptr,
ll_int, ll_int])
fn_array_new = cgutils.get_or_insert_function(mod, fnty,
"numba_ndarray_new")
# Convert each llarray into pyobject
error_pointer = cgutils.alloca_once(builder, ir.IntType(1), name='error')
builder.store(cgutils.true_bit, error_pointer)
# The PyObject* arguments to the kernel function
object_args = []
object_pointers = []
for i, (arg, argty) in enumerate(zip(args, signature.args)):
# Allocate NULL-initialized slot for this argument
objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True)
object_pointers.append(objptr)
if isinstance(argty, types.Array):
# Special case arrays: we don't need full-blown NRT reflection
# since the argument will be gone at the end of the kernel
arycls = context.make_array(argty)
array = arycls(context, builder, value=arg)
zero = Constant(ll_int, 0)
# Extract members of the llarray
nd = Constant(ll_int, argty.ndim)
dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
strides = builder.gep(array._get_ptr_by_name('strides'),
[zero, zero])
data = builder.bitcast(array.data, ll_voidptr)
dtype = np.dtype(str(argty.dtype))
# Prepare other info for reconstruction of the PyArray
type_num = Constant(ll_int, dtype.num)
itemsize = Constant(ll_int, dtype.itemsize)
# Call helper to reconstruct PyArray objects
obj = builder.call(fn_array_new, [nd, dims, strides, data,
type_num, itemsize])
else:
# Other argument types => use generic boxing
obj = pyapi.from_native_value(argty, arg)
builder.store(obj, objptr)
object_args.append(obj)
obj_is_null = cgutils.is_null(builder, obj)
builder.store(obj_is_null, error_pointer)
cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)
# Call ufunc core function
object_sig = [types.pyobject] * len(object_args)
status, retval = context.call_conv.call_function(
builder, func, types.pyobject, object_sig,
object_args)
builder.store(status.is_error, error_pointer)
# Release returned object
pyapi.decref(retval)
builder.branch(bb_core_return)
# At return block
builder.position_at_end(bb_core_return)
# Release argument objects
for objptr in object_pointers:
pyapi.decref(builder.load(objptr))
innercall = status.code
return innercall, builder.load(error_pointer)
class GUArrayArg(object):
def __init__(self, context, builder, args, steps, i, step_offset,
typ, syms, sym_dim):
self.context = context
self.builder = builder
offset = context.get_constant(types.intp, i)
data = builder.load(builder.gep(args, [offset], name="data.ptr"),
name="data")
self.data = data
core_step_ptr = builder.gep(steps, [offset], name="core.step.ptr")
core_step = builder.load(core_step_ptr)
if isinstance(typ, types.Array):
as_scalar = not syms
# number of symbol in the shape spec should match the dimension
# of the array type.
if len(syms) != typ.ndim:
if len(syms) == 0 and typ.ndim == 1:
# This is an exception for handling scalar argument.
# The type can be 1D array for scalar.
# In the future, we may deprecate this exception.
pass
else:
raise TypeError("type and shape signature mismatch for arg "
"#{0}".format(i + 1))
ndim = typ.ndim
shape = [sym_dim[s] for s in syms]
strides = []
for j in range(ndim):
stepptr = builder.gep(steps,
[context.get_constant(types.intp,
step_offset + j)],
name="step.ptr")
step = builder.load(stepptr)
strides.append(step)
ldcls = (_ArrayAsScalarArgLoader
if as_scalar
else _ArrayArgLoader)
self._loader = ldcls(dtype=typ.dtype,
ndim=ndim,
core_step=core_step,
as_scalar=as_scalar,
shape=shape,
strides=strides)
else:
# If typ is not an array
if syms:
raise TypeError("scalar type {0} given for non scalar "
"argument #{1}".format(typ, i + 1))
self._loader = _ScalarArgLoader(dtype=typ, stride=core_step)
def get_array_at_offset(self, ind):
return self._loader.load(context=self.context, builder=self.builder,
data=self.data, ind=ind)
class _ScalarArgLoader(object):
"""
Handle GFunc argument loading where a scalar type is used in the core
function.
Note: It still has a stride because the input to the gufunc can be an array
for this argument.
"""
def __init__(self, dtype, stride):
self.dtype = dtype
self.stride = stride
def load(self, context, builder, data, ind):
# Load at base + ind * stride
data = builder.gep(data, [builder.mul(ind, self.stride)])
dptr = builder.bitcast(data,
context.get_data_type(self.dtype).as_pointer())
return builder.load(dptr)
class _ArrayArgLoader(object):
"""
Handle GUFunc argument loading where an array is expected.
"""
def __init__(self, dtype, ndim, core_step, as_scalar, shape, strides):
self.dtype = dtype
self.ndim = ndim
self.core_step = core_step
self.as_scalar = as_scalar
self.shape = shape
self.strides = strides
def load(self, context, builder, data, ind):
arytyp = types.Array(dtype=self.dtype, ndim=self.ndim, layout="A")
arycls = context.make_array(arytyp)
array = arycls(context, builder)
offseted_data = cgutils.pointer_add(builder,
data,
builder.mul(self.core_step,
ind))
shape, strides = self._shape_and_strides(context, builder)
itemsize = context.get_abi_sizeof(context.get_data_type(self.dtype))
context.populate_array(array,
data=builder.bitcast(offseted_data,
array.data.type),
shape=shape,
strides=strides,
itemsize=context.get_constant(types.intp,
itemsize),
meminfo=None)
return array._getvalue()
def _shape_and_strides(self, context, builder):
shape = cgutils.pack_array(builder, self.shape)
strides = cgutils.pack_array(builder, self.strides)
return shape, strides
class _ArrayAsScalarArgLoader(_ArrayArgLoader):
"""
Handle GUFunc argument loading where the shape signature specifies
a scalar "()" but a 1D array is used for the type of the core function.
"""
def _shape_and_strides(self, context, builder):
# Set shape and strides for a 1D size 1 array
one = context.get_constant(types.intp, 1)
zero = context.get_constant(types.intp, 0)
shape = cgutils.pack_array(builder, [one])
strides = cgutils.pack_array(builder, [zero])
return shape, strides