ai-content-maker/.venv/Lib/site-packages/numba/stencils/stencil.py

833 lines
39 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
#
# Copyright (c) 2017 Intel Corporation
# SPDX-License-Identifier: BSD-2-Clause
#
import copy
import numpy as np
from llvmlite import ir as lir
from numba.core import types, typing, utils, ir, config, ir_utils, registry
from numba.core.typing.templates import (CallableTemplate, signature,
infer_global, AbstractTemplate)
from numba.core.imputils import lower_builtin
from numba.core.extending import register_jitable
from numba.core.errors import NumbaValueError
from numba.misc.special import literal_unroll
import numba
import operator
from numba.np import numpy_support
class StencilFuncLowerer(object):
'''Callable class responsible for lowering calls to a specific StencilFunc.
'''
def __init__(self, sf):
self.stencilFunc = sf
def __call__(self, context, builder, sig, args):
cres = self.stencilFunc.compile_for_argtys(sig.args, {},
sig.return_type, None)
res = context.call_internal(builder, cres.fndesc, sig, args)
context.add_linking_libs([cres.library])
return res
@register_jitable
def raise_if_incompatible_array_sizes(a, *args):
ashape = a.shape
# We need literal_unroll here because the stencil might take
# multiple input arrays with different types that are not compatible
# (e.g. values as float[:] and flags as bool[:])
# When more than three total arrays are given, the second and third
# are iterated over in the loop below. Without literal_unroll, their
# types have to match.
# An example failing signature without literal_unroll might be
# (float[:], float[:], bool[:]) (Just (float[:], bool[:]) wouldn't fail)
for arg in literal_unroll(args):
if a.ndim != arg.ndim:
raise ValueError("Secondary stencil array does not have same number "
" of dimensions as the first stencil input.")
argshape = arg.shape
for i in range(len(ashape)):
if ashape[i] > argshape[i]:
raise ValueError("Secondary stencil array has some dimension "
"smaller the same dimension in the first "
"stencil input.")
def slice_addition(the_slice, addend):
""" Called by stencil in Python mode to add the loop index to a
user-specified slice.
"""
return slice(the_slice.start + addend, the_slice.stop + addend)
class StencilFunc(object):
"""
A special type to hold stencil information for the IR.
"""
id_counter = 0
def __init__(self, kernel_ir, mode, options):
self.id = type(self).id_counter
type(self).id_counter += 1
self.kernel_ir = kernel_ir
self.mode = mode
self.options = options
self.kws = [] # remember original kws arguments
# stencils only supported for CPU context currently
self._typingctx = registry.cpu_target.typing_context
self._targetctx = registry.cpu_target.target_context
self._install_type(self._typingctx)
self.neighborhood = self.options.get("neighborhood")
self._type_cache = {}
self._lower_me = StencilFuncLowerer(self)
def replace_return_with_setitem(self, blocks, index_vars, out_name):
"""
Find return statements in the IR and replace them with a SetItem
call of the value "returned" by the kernel into the result array.
Returns the block labels that contained return statements.
"""
ret_blocks = []
for label, block in blocks.items():
scope = block.scope
loc = block.loc
new_body = []
for stmt in block.body:
if isinstance(stmt, ir.Return):
ret_blocks.append(label)
# If 1D array then avoid the tuple construction.
if len(index_vars) == 1:
rvar = ir.Var(scope, out_name, loc)
ivar = ir.Var(scope, index_vars[0], loc)
new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc))
else:
# Convert the string names of the index variables into
# ir.Var's.
var_index_vars = []
for one_var in index_vars:
index_var = ir.Var(scope, one_var, loc)
var_index_vars += [index_var]
s_index_var = scope.redefine("stencil_index", loc)
# Build a tuple from the index ir.Var's.
tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
rvar = ir.Var(scope, out_name, loc)
# Write the return statements original value into
# the array using the tuple index.
si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
new_body.append(si)
else:
new_body.append(stmt)
block.body = new_body
return ret_blocks
def add_indices_to_kernel(self, kernel, index_names, ndim,
neighborhood, standard_indexed, typemap, calltypes):
"""
Transforms the stencil kernel as specified by the user into one
that includes each dimension's index variable as part of the getitem
calls. So, in effect array[-1] becomes array[index0-1].
"""
const_dict = {}
kernel_consts = []
if config.DEBUG_ARRAY_OPT >= 1:
print("add_indices_to_kernel", ndim, neighborhood)
ir_utils.dump_blocks(kernel.blocks)
if neighborhood is None:
need_to_calc_kernel = True
else:
need_to_calc_kernel = False
if len(neighborhood) != ndim:
raise ValueError("%d dimensional neighborhood specified for %d " \
"dimensional input array" % (len(neighborhood), ndim))
tuple_table = ir_utils.get_tuple_table(kernel.blocks)
relatively_indexed = set()
for block in kernel.blocks.values():
scope = block.scope
loc = block.loc
new_body = []
for stmt in block.body:
if (isinstance(stmt, ir.Assign) and
isinstance(stmt.value, ir.Const)):
if config.DEBUG_ARRAY_OPT >= 1:
print("remembering in const_dict", stmt.target.name,
stmt.value.value)
# Remember consts for use later.
const_dict[stmt.target.name] = stmt.value.value
if ((isinstance(stmt, ir.Assign)
and isinstance(stmt.value, ir.Expr)
and stmt.value.op in ['setitem', 'static_setitem']
and stmt.value.value.name in kernel.arg_names) or
(isinstance(stmt, ir.SetItem)
and stmt.target.name in kernel.arg_names)):
raise ValueError("Assignments to arrays passed to stencil " \
"kernels is not allowed.")
if (isinstance(stmt, ir.Assign)
and isinstance(stmt.value, ir.Expr)
and stmt.value.op in ['getitem', 'static_getitem']
and stmt.value.value.name in kernel.arg_names
and stmt.value.value.name not in standard_indexed):
# We found a getitem from the input array.
if stmt.value.op == 'getitem':
stmt_index_var = stmt.value.index
else:
stmt_index_var = stmt.value.index_var
# allow static_getitem since rewrite passes are applied
#raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")
relatively_indexed.add(stmt.value.value.name)
# Store the index used after looking up the variable in
# the const dictionary.
if need_to_calc_kernel:
assert hasattr(stmt_index_var, 'name')
if stmt_index_var.name in tuple_table:
kernel_consts += [tuple_table[stmt_index_var.name]]
elif stmt_index_var.name in const_dict:
kernel_consts += [const_dict[stmt_index_var.name]]
else:
raise NumbaValueError("stencil kernel index is not "
"constant, 'neighborhood' option required")
if ndim == 1:
# Single dimension always has index variable 'index0'.
# tmpvar will hold the real index and is computed by
# adding the relative offset in stmt.value.index to
# the current absolute location in index0.
index_var = ir.Var(scope, index_names[0], loc)
tmpvar = scope.redefine("stencil_index", loc)
stmt_index_var_typ = typemap[stmt_index_var.name]
# If the array is indexed with a slice then we
# have to add the index value with a call to
# slice_addition.
if isinstance(stmt_index_var_typ, types.misc.SliceType):
sa_var = scope.redefine("slice_addition", loc)
sa_func = numba.njit(slice_addition)
sa_func_typ = types.functions.Dispatcher(sa_func)
typemap[sa_var.name] = sa_func_typ
g_sa = ir.Global("slice_addition", sa_func, loc)
new_body.append(ir.Assign(g_sa, sa_var, loc))
slice_addition_call = ir.Expr.call(sa_var, [stmt_index_var, index_var], (), loc)
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [stmt_index_var_typ, types.intp], {})
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
new_body.append(ir.Assign(
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
stmt.target, loc))
else:
acc_call = ir.Expr.binop(operator.add, stmt_index_var,
index_var, loc)
new_body.append(ir.Assign(acc_call, tmpvar, loc))
new_body.append(ir.Assign(
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
stmt.target, loc))
else:
index_vars = []
sum_results = []
s_index_var = scope.redefine("stencil_index", loc)
const_index_vars = []
ind_stencils = []
stmt_index_var_typ = typemap[stmt_index_var.name]
# Same idea as above but you have to extract
# individual elements out of the tuple indexing
# expression and add the corresponding index variable
# to them and then reconstitute as a tuple that can
# index the array.
for dim in range(ndim):
tmpvar = scope.redefine("const_index", loc)
new_body.append(ir.Assign(ir.Const(dim, loc),
tmpvar, loc))
const_index_vars += [tmpvar]
index_var = ir.Var(scope, index_names[dim], loc)
index_vars += [index_var]
tmpvar = scope.redefine("ind_stencil_index", loc)
ind_stencils += [tmpvar]
getitemvar = scope.redefine("getitem", loc)
getitemcall = ir.Expr.getitem(stmt_index_var,
const_index_vars[dim], loc)
new_body.append(ir.Assign(getitemcall, getitemvar, loc))
# Get the type of this particular part of the index tuple.
if isinstance(stmt_index_var_typ, types.ConstSized):
one_index_typ = stmt_index_var_typ[dim]
else:
one_index_typ = stmt_index_var_typ[:]
# If the array is indexed with a slice then we
# have to add the index value with a call to
# slice_addition.
if isinstance(one_index_typ, types.misc.SliceType):
sa_var = scope.redefine("slice_addition", loc)
sa_func = numba.njit(slice_addition)
sa_func_typ = types.functions.Dispatcher(sa_func)
typemap[sa_var.name] = sa_func_typ
g_sa = ir.Global("slice_addition", sa_func, loc)
new_body.append(ir.Assign(g_sa, sa_var, loc))
slice_addition_call = ir.Expr.call(sa_var, [getitemvar, index_vars[dim]], (), loc)
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [one_index_typ, types.intp], {})
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
else:
acc_call = ir.Expr.binop(operator.add, getitemvar,
index_vars[dim], loc)
new_body.append(ir.Assign(acc_call, tmpvar, loc))
tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
new_body.append(ir.Assign(
ir.Expr.getitem(stmt.value.value,s_index_var,loc),
stmt.target,loc))
else:
new_body.append(stmt)
block.body = new_body
if need_to_calc_kernel:
# Find the size of the kernel by finding the maximum absolute value
# index used in the kernel specification.
neighborhood = [[0,0] for _ in range(ndim)]
if len(kernel_consts) == 0:
raise NumbaValueError("Stencil kernel with no accesses to "
"relatively indexed arrays.")
for index in kernel_consts:
if isinstance(index, tuple) or isinstance(index, list):
for i in range(len(index)):
te = index[i]
if isinstance(te, ir.Var) and te.name in const_dict:
te = const_dict[te.name]
if isinstance(te, int):
neighborhood[i][0] = min(neighborhood[i][0], te)
neighborhood[i][1] = max(neighborhood[i][1], te)
else:
raise NumbaValueError(
"stencil kernel index is not constant,"
"'neighborhood' option required")
index_len = len(index)
elif isinstance(index, int):
neighborhood[0][0] = min(neighborhood[0][0], index)
neighborhood[0][1] = max(neighborhood[0][1], index)
index_len = 1
else:
raise NumbaValueError(
"Non-tuple or non-integer used as stencil index.")
if index_len != ndim:
raise NumbaValueError(
"Stencil index does not match array dimensionality.")
return (neighborhood, relatively_indexed)
def get_return_type(self, argtys):
if config.DEBUG_ARRAY_OPT >= 1:
print("get_return_type", argtys)
ir_utils.dump_blocks(self.kernel_ir.blocks)
if not isinstance(argtys[0], types.npytypes.Array):
raise NumbaValueError("The first argument to a stencil kernel must "
"be the primary input array.")
from numba.core import typed_passes
typemap, return_type, calltypes, _ = typed_passes.type_inference_stage(
self._typingctx,
self._targetctx,
self.kernel_ir,
argtys,
None,
{})
if isinstance(return_type, types.npytypes.Array):
raise NumbaValueError(
"Stencil kernel must return a scalar and not a numpy array.")
real_ret = types.npytypes.Array(return_type, argtys[0].ndim,
argtys[0].layout)
return (real_ret, typemap, calltypes)
def _install_type(self, typingctx):
"""Constructs and installs a typing class for a StencilFunc object in
the input typing context.
"""
_ty_cls = type('StencilFuncTyping_' +
str(self.id),
(AbstractTemplate,),
dict(key=self, generic=self._type_me))
typingctx.insert_user_function(self, _ty_cls)
def compile_for_argtys(self, argtys, kwtys, return_type, sigret):
# look in the type cache to find if result array is passed
(_, result, typemap, calltypes) = self._type_cache[argtys]
new_func = self._stencil_wrapper(result, sigret, return_type,
typemap, calltypes, *argtys)
return new_func
def _type_me(self, argtys, kwtys):
"""
Implement AbstractTemplate.generic() for the typing class
built by StencilFunc._install_type().
Return the call-site signature.
"""
if (self.neighborhood is not None and
len(self.neighborhood) != argtys[0].ndim):
raise NumbaValueError("%d dimensional neighborhood specified "
"for %d dimensional input array" %
(len(self.neighborhood), argtys[0].ndim))
argtys_extra = argtys
sig_extra = ""
result = None
if 'out' in kwtys:
argtys_extra += (kwtys['out'],)
sig_extra += ", out=None"
result = kwtys['out']
if 'neighborhood' in kwtys:
argtys_extra += (kwtys['neighborhood'],)
sig_extra += ", neighborhood=None"
# look in the type cache first
if argtys_extra in self._type_cache:
(_sig, _, _, _) = self._type_cache[argtys_extra]
return _sig
(real_ret, typemap, calltypes) = self.get_return_type(argtys)
sig = signature(real_ret, *argtys_extra)
dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format(
",".join(self.kernel_ir.arg_names), sig_extra))
exec(dummy_text) in globals(), locals()
dummy_func = eval("__numba_dummy_stencil")
sig = sig.replace(pysig=utils.pysignature(dummy_func))
self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra)])
self._type_cache[argtys_extra] = (sig, result, typemap, calltypes)
return sig
def copy_ir_with_calltypes(self, ir, calltypes):
"""
Create a copy of a given IR along with its calltype information.
We need a copy of the calltypes because copy propagation applied
to the copied IR will change the calltypes and make subsequent
uses of the original IR invalid.
"""
copy_calltypes = {}
kernel_copy = ir.copy()
kernel_copy.blocks = {}
# For each block...
for (block_label, block) in ir.blocks.items():
new_block = copy.deepcopy(ir.blocks[block_label])
new_block.body = []
# For each statement in each block...
for stmt in ir.blocks[block_label].body:
# Copy the statement to the new copy of the kernel
# and if the original statement is in the original
# calltypes then add the type associated with this
# statement to the calltypes copy.
scopy = copy.deepcopy(stmt)
new_block.body.append(scopy)
if stmt in calltypes:
copy_calltypes[scopy] = calltypes[stmt]
kernel_copy.blocks[block_label] = new_block
return (kernel_copy, copy_calltypes)
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args):
# Overall approach:
# 1) Construct a string containing a function definition for the stencil function
# that will execute the stencil kernel. This function definition includes a
# unique stencil function name, the parameters to the stencil kernel, loop
# nests across the dimensions of the input array. Those loop nests use the
# computed stencil kernel size so as not to try to compute elements where
# elements outside the bounds of the input array would be needed.
# 2) The but of the loop nest in this new function is a special sentinel
# assignment.
# 3) Get the IR of this new function.
# 4) Split the block containing the sentinel assignment and remove the sentinel
# assignment. Insert the stencil kernel IR into the stencil function IR
# after label and variable renaming of the stencil kernel IR to prevent
# conflicts with the stencil function IR.
# 5) Compile the combined stencil function IR + stencil kernel IR into existence.
# Copy the kernel so that our changes for this callsite
# won't effect other callsites.
(kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(
self.kernel_ir, calltypes)
# The stencil kernel body becomes the body of a loop, for which args aren't needed.
ir_utils.remove_args(kernel_copy.blocks)
first_arg = kernel_copy.arg_names[0]
in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
ir_utils.apply_copy_propagate(
kernel_copy.blocks,
in_cps,
name_var_table,
typemap,
copy_calltypes)
if "out" in name_var_table:
raise NumbaValueError("Cannot use the reserved word 'out' in stencil kernels.")
sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table)
if config.DEBUG_ARRAY_OPT >= 1:
print("name_var_table", name_var_table, sentinel_name)
the_array = args[0]
if config.DEBUG_ARRAY_OPT >= 1:
print("_stencil_wrapper", return_type, return_type.dtype,
type(return_type.dtype), args)
ir_utils.dump_blocks(kernel_copy.blocks)
# We generate a Numba function to execute this stencil and here
# create the unique name of this function.
stencil_func_name = "__numba_stencil_%s_%s" % (
hex(id(the_array)).replace("-", "_"),
self.id)
# We will put a loop nest in the generated function for each
# dimension in the input array. Here we create the name for
# the index variable for each dimension. index0, index1, ...
index_vars = []
for i in range(the_array.ndim):
index_var_name = ir_utils.get_unused_var_name("index" + str(i),
name_var_table)
index_vars += [index_var_name]
# Create extra signature for out and neighborhood.
out_name = ir_utils.get_unused_var_name("out", name_var_table)
neighborhood_name = ir_utils.get_unused_var_name("neighborhood",
name_var_table)
sig_extra = ""
if result is not None:
sig_extra += ", {}=None".format(out_name)
if "neighborhood" in dict(self.kws):
sig_extra += ", {}=None".format(neighborhood_name)
# Get a list of the standard indexed array names.
standard_indexed = self.options.get("standard_indexing", [])
if first_arg in standard_indexed:
raise NumbaValueError("The first argument to a stencil kernel must "
"use relative indexing, not standard indexing.")
if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
raise NumbaValueError("Standard indexing requested for an array name "
"not present in the stencil kernel definition.")
# Add index variables to getitems in the IR to transition the accesses
# in the kernel from relative to regular Python indexing. Returns the
# computed size of the stencil kernel and a list of the relatively indexed
# arrays.
kernel_size, relatively_indexed = self.add_indices_to_kernel(
kernel_copy, index_vars, the_array.ndim,
self.neighborhood, standard_indexed, typemap, copy_calltypes)
if self.neighborhood is None:
self.neighborhood = kernel_size
if config.DEBUG_ARRAY_OPT >= 1:
print("After add_indices_to_kernel")
ir_utils.dump_blocks(kernel_copy.blocks)
# The return in the stencil kernel becomes a setitem for that
# particular point in the iteration space.
ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
index_vars, out_name)
if config.DEBUG_ARRAY_OPT >= 1:
print("After replace_return_with_setitem", ret_blocks)
ir_utils.dump_blocks(kernel_copy.blocks)
# Start to form the new function to execute the stencil kernel.
func_text = "def {}({}{}):\n".format(stencil_func_name,
",".join(kernel_copy.arg_names), sig_extra)
# Get loop ranges for each dimension, which could be either int
# or variable. In the latter case we'll use the extra neighborhood
# argument to the function.
ranges = []
for i in range(the_array.ndim):
if isinstance(kernel_size[i][0], int):
lo = kernel_size[i][0]
hi = kernel_size[i][1]
else:
lo = "{}[{}][0]".format(neighborhood_name, i)
hi = "{}[{}][1]".format(neighborhood_name, i)
ranges.append((lo, hi))
# If there are more than one relatively indexed arrays, add a call to
# a function that will raise an error if any of the relatively indexed
# arrays are of different size than the first input array.
if len(relatively_indexed) > 1:
func_text += " raise_if_incompatible_array_sizes(" + first_arg
for other_array in relatively_indexed:
if other_array != first_arg:
func_text += "," + other_array
func_text += ")\n"
# Get the shape of the first input array.
shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
func_text += " {} = {}.shape\n".format(shape_name, first_arg)
# Converts cval to a string constant
def cval_as_str(cval):
if not np.isfinite(cval):
# See if this is a string-repr numerical const, issue #7286
if np.isnan(cval):
return "np.nan"
elif np.isinf(cval):
if cval < 0:
return "-np.inf"
else:
return "np.inf"
else:
return str(cval)
# If we have to allocate the output array (the out argument was not used)
# then us numpy.full if the user specified a cval stencil decorator option
# or np.zeros if they didn't to allocate the array.
if result is None:
return_type_name = numpy_support.as_dtype(
return_type.dtype).type.__name__
out_init ="{} = np.empty({}, dtype=np.{})\n".format(
out_name, shape_name, return_type_name)
if "cval" in self.options:
cval = self.options["cval"]
cval_ty = typing.typeof.typeof(cval)
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
msg = "cval type does not match stencil return type."
raise NumbaValueError(msg)
else:
cval = 0
func_text += " " + out_init
for dim in range(the_array.ndim):
start_items = [":"] * the_array.ndim
end_items = [":"] * the_array.ndim
start_items[dim] = ":-{}".format(self.neighborhood[dim][0])
end_items[dim] = "-{}:".format(self.neighborhood[dim][1])
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(start_items), cval_as_str(cval))
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(end_items), cval_as_str(cval))
else: # result is present, if cval is set then use it
if "cval" in self.options:
cval = self.options["cval"]
cval_ty = typing.typeof.typeof(cval)
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
msg = "cval type does not match stencil return type."
raise NumbaValueError(msg)
out_init = "{}[:] = {}\n".format(out_name, cval_as_str(cval))
func_text += " " + out_init
offset = 1
# Add the loop nests to the new function.
for i in range(the_array.ndim):
for j in range(offset):
func_text += " "
# ranges[i][0] is the minimum index used in the i'th dimension
# but minimum's greater than 0 don't preclude any entry in the array.
# So, take the minimum of 0 and the minimum index found in the kernel
# and this will be a negative number (potentially -0). Then, we do
# unary - on that to get the positive offset in this dimension whose
# use is precluded.
# ranges[i][1] is the maximum of 0 and the observed maximum index
# in this dimension because negative maximums would not cause us to
# preclude any entry in the array from being used.
func_text += ("for {} in range(-min(0,{}),"
"{}[{}]-max(0,{})):\n").format(
index_vars[i],
ranges[i][0],
shape_name,
i,
ranges[i][1])
offset += 1
for j in range(offset):
func_text += " "
# Put a sentinel in the code so we can locate it in the IR. We will
# remove this sentinel assignment and replace it with the IR for the
# stencil kernel body.
func_text += "{} = 0\n".format(sentinel_name)
func_text += " return {}\n".format(out_name)
if config.DEBUG_ARRAY_OPT >= 1:
print("new stencil func text")
print(func_text)
# Force the new stencil function into existence.
exec(func_text) in globals(), locals()
stencil_func = eval(stencil_func_name)
if sigret is not None:
pysig = utils.pysignature(stencil_func)
sigret.pysig = pysig
# Get the IR for the newly created stencil function.
from numba.core import compiler
stencil_ir = compiler.run_frontend(stencil_func)
ir_utils.remove_dels(stencil_ir.blocks)
# rename all variables in stencil_ir afresh
var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
new_var_dict = {}
reserved_names = ([sentinel_name, out_name, neighborhood_name,
shape_name] + kernel_copy.arg_names + index_vars)
for name, var in var_table.items():
if not name in reserved_names:
assert isinstance(var, ir.Var)
new_var = var.scope.redefine(var.name, var.loc)
new_var_dict[name] = new_var.name
ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)
stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1
# Shift labels in the kernel copy so they are guaranteed unique
# and don't conflict with any labels in the stencil_ir.
kernel_copy.blocks = ir_utils.add_offset_to_labels(
kernel_copy.blocks, stencil_stub_last_label)
new_label = max(kernel_copy.blocks.keys()) + 1
# Adjust ret_blocks to account for addition of the offset.
ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]
if config.DEBUG_ARRAY_OPT >= 1:
print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
print("before replace sentinel stencil_ir")
ir_utils.dump_blocks(stencil_ir.blocks)
print("before replace sentinel kernel_copy")
ir_utils.dump_blocks(kernel_copy.blocks)
# Search all the block in the stencil outline for the sentinel.
for label, block in stencil_ir.blocks.items():
for i, inst in enumerate(block.body):
if (isinstance( inst, ir.Assign) and
inst.target.name == sentinel_name):
# We found the sentinel assignment.
loc = inst.loc
scope = block.scope
# split block across __sentinel__
# A new block is allocated for the statements prior to the
# sentinel but the new block maintains the current block
# label.
prev_block = ir.Block(scope, loc)
prev_block.body = block.body[:i]
# The current block is used for statements after sentinel.
block.body = block.body[i + 1:]
# But the current block gets a new label.
body_first_label = min(kernel_copy.blocks.keys())
# The previous block jumps to the minimum labelled block of
# the parfor body.
prev_block.append(ir.Jump(body_first_label, loc))
# Add all the parfor loop body blocks to the gufunc
# function's IR.
for (l, b) in kernel_copy.blocks.items():
stencil_ir.blocks[l] = b
stencil_ir.blocks[new_label] = block
stencil_ir.blocks[label] = prev_block
# Add a jump from all the blocks that previously contained
# a return in the stencil kernel to the block
# containing statements after the sentinel.
for ret_block in ret_blocks:
stencil_ir.blocks[ret_block].append(
ir.Jump(new_label, loc))
break
else:
continue
break
stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
ir_utils.remove_dels(stencil_ir.blocks)
assert(isinstance(the_array, types.Type))
array_types = args
new_stencil_param_types = list(array_types)
if config.DEBUG_ARRAY_OPT >= 1:
print("new_stencil_param_types", new_stencil_param_types)
ir_utils.dump_blocks(stencil_ir.blocks)
# Compile the combined stencil function with the replaced loop
# body in it.
ir_utils.fixup_var_define_in_scope(stencil_ir.blocks)
new_func = compiler.compile_ir(
self._typingctx,
self._targetctx,
stencil_ir,
new_stencil_param_types,
None,
compiler.DEFAULT_FLAGS,
{})
return new_func
def __call__(self, *args, **kwargs):
self._typingctx.refresh()
if (self.neighborhood is not None and
len(self.neighborhood) != args[0].ndim):
raise ValueError("{} dimensional neighborhood specified for {} "
"dimensional input array".format(
len(self.neighborhood), args[0].ndim))
if 'out' in kwargs:
result = kwargs['out']
rdtype = result.dtype
rttype = numpy_support.from_dtype(rdtype)
result_type = types.npytypes.Array(rttype, result.ndim,
numpy_support.map_layout(result))
array_types = tuple([typing.typeof.typeof(x) for x in args])
array_types_full = tuple([typing.typeof.typeof(x) for x in args] +
[result_type])
else:
result = None
array_types = tuple([typing.typeof.typeof(x) for x in args])
array_types_full = array_types
if config.DEBUG_ARRAY_OPT >= 1:
print("__call__", array_types, args, kwargs)
(real_ret, typemap, calltypes) = self.get_return_type(array_types)
new_func = self._stencil_wrapper(result, None, real_ret, typemap,
calltypes, *array_types_full)
if result is None:
return new_func.entry_point(*args)
else:
return new_func.entry_point(*(args+(result,)))
def stencil(func_or_mode='constant', **options):
# called on function without specifying mode style
if not isinstance(func_or_mode, str):
mode = 'constant' # default style
func = func_or_mode
else:
mode = func_or_mode
func = None
for option in options:
if option not in ["cval", "standard_indexing", "neighborhood"]:
raise ValueError("Unknown stencil option " + option)
wrapper = _stencil(mode, options)
if func is not None:
return wrapper(func)
return wrapper
def _stencil(mode, options):
if mode != 'constant':
raise ValueError("Unsupported mode style " + mode)
def decorated(func):
from numba.core import compiler
kernel_ir = compiler.run_frontend(func)
return StencilFunc(kernel_ir, mode, options)
return decorated
@lower_builtin(stencil)
def stencil_dummy_lower(context, builder, sig, args):
"lowering for dummy stencil calls"
return lir.Constant(lir.IntType(types.intp.bitwidth), 0)