833 lines
39 KiB
Python
833 lines
39 KiB
Python
#
|
|
# Copyright (c) 2017 Intel Corporation
|
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
#
|
|
|
|
import copy
|
|
|
|
import numpy as np
|
|
from llvmlite import ir as lir
|
|
|
|
from numba.core import types, typing, utils, ir, config, ir_utils, registry
|
|
from numba.core.typing.templates import (CallableTemplate, signature,
|
|
infer_global, AbstractTemplate)
|
|
from numba.core.imputils import lower_builtin
|
|
from numba.core.extending import register_jitable
|
|
from numba.core.errors import NumbaValueError
|
|
from numba.misc.special import literal_unroll
|
|
import numba
|
|
|
|
import operator
|
|
from numba.np import numpy_support
|
|
|
|
class StencilFuncLowerer(object):
|
|
'''Callable class responsible for lowering calls to a specific StencilFunc.
|
|
'''
|
|
def __init__(self, sf):
|
|
self.stencilFunc = sf
|
|
|
|
def __call__(self, context, builder, sig, args):
|
|
cres = self.stencilFunc.compile_for_argtys(sig.args, {},
|
|
sig.return_type, None)
|
|
res = context.call_internal(builder, cres.fndesc, sig, args)
|
|
context.add_linking_libs([cres.library])
|
|
return res
|
|
|
|
@register_jitable
|
|
def raise_if_incompatible_array_sizes(a, *args):
|
|
ashape = a.shape
|
|
|
|
# We need literal_unroll here because the stencil might take
|
|
# multiple input arrays with different types that are not compatible
|
|
# (e.g. values as float[:] and flags as bool[:])
|
|
# When more than three total arrays are given, the second and third
|
|
# are iterated over in the loop below. Without literal_unroll, their
|
|
# types have to match.
|
|
# An example failing signature without literal_unroll might be
|
|
# (float[:], float[:], bool[:]) (Just (float[:], bool[:]) wouldn't fail)
|
|
for arg in literal_unroll(args):
|
|
if a.ndim != arg.ndim:
|
|
raise ValueError("Secondary stencil array does not have same number "
|
|
" of dimensions as the first stencil input.")
|
|
argshape = arg.shape
|
|
for i in range(len(ashape)):
|
|
if ashape[i] > argshape[i]:
|
|
raise ValueError("Secondary stencil array has some dimension "
|
|
"smaller the same dimension in the first "
|
|
"stencil input.")
|
|
|
|
def slice_addition(the_slice, addend):
|
|
""" Called by stencil in Python mode to add the loop index to a
|
|
user-specified slice.
|
|
"""
|
|
return slice(the_slice.start + addend, the_slice.stop + addend)
|
|
|
|
class StencilFunc(object):
|
|
"""
|
|
A special type to hold stencil information for the IR.
|
|
"""
|
|
|
|
id_counter = 0
|
|
|
|
def __init__(self, kernel_ir, mode, options):
|
|
self.id = type(self).id_counter
|
|
type(self).id_counter += 1
|
|
self.kernel_ir = kernel_ir
|
|
self.mode = mode
|
|
self.options = options
|
|
self.kws = [] # remember original kws arguments
|
|
|
|
# stencils only supported for CPU context currently
|
|
self._typingctx = registry.cpu_target.typing_context
|
|
self._targetctx = registry.cpu_target.target_context
|
|
self._install_type(self._typingctx)
|
|
self.neighborhood = self.options.get("neighborhood")
|
|
self._type_cache = {}
|
|
self._lower_me = StencilFuncLowerer(self)
|
|
|
|
def replace_return_with_setitem(self, blocks, index_vars, out_name):
|
|
"""
|
|
Find return statements in the IR and replace them with a SetItem
|
|
call of the value "returned" by the kernel into the result array.
|
|
Returns the block labels that contained return statements.
|
|
"""
|
|
ret_blocks = []
|
|
|
|
for label, block in blocks.items():
|
|
scope = block.scope
|
|
loc = block.loc
|
|
new_body = []
|
|
for stmt in block.body:
|
|
if isinstance(stmt, ir.Return):
|
|
ret_blocks.append(label)
|
|
# If 1D array then avoid the tuple construction.
|
|
if len(index_vars) == 1:
|
|
rvar = ir.Var(scope, out_name, loc)
|
|
ivar = ir.Var(scope, index_vars[0], loc)
|
|
new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc))
|
|
else:
|
|
# Convert the string names of the index variables into
|
|
# ir.Var's.
|
|
var_index_vars = []
|
|
for one_var in index_vars:
|
|
index_var = ir.Var(scope, one_var, loc)
|
|
var_index_vars += [index_var]
|
|
|
|
s_index_var = scope.redefine("stencil_index", loc)
|
|
# Build a tuple from the index ir.Var's.
|
|
tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
|
|
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
|
|
rvar = ir.Var(scope, out_name, loc)
|
|
# Write the return statements original value into
|
|
# the array using the tuple index.
|
|
si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
|
|
new_body.append(si)
|
|
else:
|
|
new_body.append(stmt)
|
|
block.body = new_body
|
|
return ret_blocks
|
|
|
|
def add_indices_to_kernel(self, kernel, index_names, ndim,
|
|
neighborhood, standard_indexed, typemap, calltypes):
|
|
"""
|
|
Transforms the stencil kernel as specified by the user into one
|
|
that includes each dimension's index variable as part of the getitem
|
|
calls. So, in effect array[-1] becomes array[index0-1].
|
|
"""
|
|
const_dict = {}
|
|
kernel_consts = []
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("add_indices_to_kernel", ndim, neighborhood)
|
|
ir_utils.dump_blocks(kernel.blocks)
|
|
|
|
if neighborhood is None:
|
|
need_to_calc_kernel = True
|
|
else:
|
|
need_to_calc_kernel = False
|
|
if len(neighborhood) != ndim:
|
|
raise ValueError("%d dimensional neighborhood specified for %d " \
|
|
"dimensional input array" % (len(neighborhood), ndim))
|
|
|
|
tuple_table = ir_utils.get_tuple_table(kernel.blocks)
|
|
|
|
relatively_indexed = set()
|
|
|
|
for block in kernel.blocks.values():
|
|
scope = block.scope
|
|
loc = block.loc
|
|
new_body = []
|
|
for stmt in block.body:
|
|
if (isinstance(stmt, ir.Assign) and
|
|
isinstance(stmt.value, ir.Const)):
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("remembering in const_dict", stmt.target.name,
|
|
stmt.value.value)
|
|
# Remember consts for use later.
|
|
const_dict[stmt.target.name] = stmt.value.value
|
|
if ((isinstance(stmt, ir.Assign)
|
|
and isinstance(stmt.value, ir.Expr)
|
|
and stmt.value.op in ['setitem', 'static_setitem']
|
|
and stmt.value.value.name in kernel.arg_names) or
|
|
(isinstance(stmt, ir.SetItem)
|
|
and stmt.target.name in kernel.arg_names)):
|
|
raise ValueError("Assignments to arrays passed to stencil " \
|
|
"kernels is not allowed.")
|
|
if (isinstance(stmt, ir.Assign)
|
|
and isinstance(stmt.value, ir.Expr)
|
|
and stmt.value.op in ['getitem', 'static_getitem']
|
|
and stmt.value.value.name in kernel.arg_names
|
|
and stmt.value.value.name not in standard_indexed):
|
|
# We found a getitem from the input array.
|
|
if stmt.value.op == 'getitem':
|
|
stmt_index_var = stmt.value.index
|
|
else:
|
|
stmt_index_var = stmt.value.index_var
|
|
# allow static_getitem since rewrite passes are applied
|
|
#raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")
|
|
|
|
relatively_indexed.add(stmt.value.value.name)
|
|
|
|
# Store the index used after looking up the variable in
|
|
# the const dictionary.
|
|
if need_to_calc_kernel:
|
|
assert hasattr(stmt_index_var, 'name')
|
|
|
|
if stmt_index_var.name in tuple_table:
|
|
kernel_consts += [tuple_table[stmt_index_var.name]]
|
|
elif stmt_index_var.name in const_dict:
|
|
kernel_consts += [const_dict[stmt_index_var.name]]
|
|
else:
|
|
raise NumbaValueError("stencil kernel index is not "
|
|
"constant, 'neighborhood' option required")
|
|
|
|
if ndim == 1:
|
|
# Single dimension always has index variable 'index0'.
|
|
# tmpvar will hold the real index and is computed by
|
|
# adding the relative offset in stmt.value.index to
|
|
# the current absolute location in index0.
|
|
index_var = ir.Var(scope, index_names[0], loc)
|
|
tmpvar = scope.redefine("stencil_index", loc)
|
|
stmt_index_var_typ = typemap[stmt_index_var.name]
|
|
# If the array is indexed with a slice then we
|
|
# have to add the index value with a call to
|
|
# slice_addition.
|
|
if isinstance(stmt_index_var_typ, types.misc.SliceType):
|
|
sa_var = scope.redefine("slice_addition", loc)
|
|
sa_func = numba.njit(slice_addition)
|
|
sa_func_typ = types.functions.Dispatcher(sa_func)
|
|
typemap[sa_var.name] = sa_func_typ
|
|
g_sa = ir.Global("slice_addition", sa_func, loc)
|
|
new_body.append(ir.Assign(g_sa, sa_var, loc))
|
|
slice_addition_call = ir.Expr.call(sa_var, [stmt_index_var, index_var], (), loc)
|
|
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [stmt_index_var_typ, types.intp], {})
|
|
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
|
|
new_body.append(ir.Assign(
|
|
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
|
|
stmt.target, loc))
|
|
else:
|
|
acc_call = ir.Expr.binop(operator.add, stmt_index_var,
|
|
index_var, loc)
|
|
new_body.append(ir.Assign(acc_call, tmpvar, loc))
|
|
new_body.append(ir.Assign(
|
|
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
|
|
stmt.target, loc))
|
|
else:
|
|
index_vars = []
|
|
sum_results = []
|
|
s_index_var = scope.redefine("stencil_index", loc)
|
|
const_index_vars = []
|
|
ind_stencils = []
|
|
|
|
stmt_index_var_typ = typemap[stmt_index_var.name]
|
|
# Same idea as above but you have to extract
|
|
# individual elements out of the tuple indexing
|
|
# expression and add the corresponding index variable
|
|
# to them and then reconstitute as a tuple that can
|
|
# index the array.
|
|
for dim in range(ndim):
|
|
tmpvar = scope.redefine("const_index", loc)
|
|
new_body.append(ir.Assign(ir.Const(dim, loc),
|
|
tmpvar, loc))
|
|
const_index_vars += [tmpvar]
|
|
index_var = ir.Var(scope, index_names[dim], loc)
|
|
index_vars += [index_var]
|
|
|
|
tmpvar = scope.redefine("ind_stencil_index", loc)
|
|
ind_stencils += [tmpvar]
|
|
getitemvar = scope.redefine("getitem", loc)
|
|
getitemcall = ir.Expr.getitem(stmt_index_var,
|
|
const_index_vars[dim], loc)
|
|
new_body.append(ir.Assign(getitemcall, getitemvar, loc))
|
|
# Get the type of this particular part of the index tuple.
|
|
if isinstance(stmt_index_var_typ, types.ConstSized):
|
|
one_index_typ = stmt_index_var_typ[dim]
|
|
else:
|
|
one_index_typ = stmt_index_var_typ[:]
|
|
# If the array is indexed with a slice then we
|
|
# have to add the index value with a call to
|
|
# slice_addition.
|
|
if isinstance(one_index_typ, types.misc.SliceType):
|
|
sa_var = scope.redefine("slice_addition", loc)
|
|
sa_func = numba.njit(slice_addition)
|
|
sa_func_typ = types.functions.Dispatcher(sa_func)
|
|
typemap[sa_var.name] = sa_func_typ
|
|
g_sa = ir.Global("slice_addition", sa_func, loc)
|
|
new_body.append(ir.Assign(g_sa, sa_var, loc))
|
|
slice_addition_call = ir.Expr.call(sa_var, [getitemvar, index_vars[dim]], (), loc)
|
|
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [one_index_typ, types.intp], {})
|
|
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
|
|
else:
|
|
acc_call = ir.Expr.binop(operator.add, getitemvar,
|
|
index_vars[dim], loc)
|
|
new_body.append(ir.Assign(acc_call, tmpvar, loc))
|
|
|
|
tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
|
|
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
|
|
new_body.append(ir.Assign(
|
|
ir.Expr.getitem(stmt.value.value,s_index_var,loc),
|
|
stmt.target,loc))
|
|
else:
|
|
new_body.append(stmt)
|
|
block.body = new_body
|
|
|
|
if need_to_calc_kernel:
|
|
# Find the size of the kernel by finding the maximum absolute value
|
|
# index used in the kernel specification.
|
|
neighborhood = [[0,0] for _ in range(ndim)]
|
|
if len(kernel_consts) == 0:
|
|
raise NumbaValueError("Stencil kernel with no accesses to "
|
|
"relatively indexed arrays.")
|
|
|
|
for index in kernel_consts:
|
|
if isinstance(index, tuple) or isinstance(index, list):
|
|
for i in range(len(index)):
|
|
te = index[i]
|
|
if isinstance(te, ir.Var) and te.name in const_dict:
|
|
te = const_dict[te.name]
|
|
if isinstance(te, int):
|
|
neighborhood[i][0] = min(neighborhood[i][0], te)
|
|
neighborhood[i][1] = max(neighborhood[i][1], te)
|
|
else:
|
|
raise NumbaValueError(
|
|
"stencil kernel index is not constant,"
|
|
"'neighborhood' option required")
|
|
index_len = len(index)
|
|
elif isinstance(index, int):
|
|
neighborhood[0][0] = min(neighborhood[0][0], index)
|
|
neighborhood[0][1] = max(neighborhood[0][1], index)
|
|
index_len = 1
|
|
else:
|
|
raise NumbaValueError(
|
|
"Non-tuple or non-integer used as stencil index.")
|
|
if index_len != ndim:
|
|
raise NumbaValueError(
|
|
"Stencil index does not match array dimensionality.")
|
|
|
|
return (neighborhood, relatively_indexed)
|
|
|
|
|
|
def get_return_type(self, argtys):
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("get_return_type", argtys)
|
|
ir_utils.dump_blocks(self.kernel_ir.blocks)
|
|
|
|
if not isinstance(argtys[0], types.npytypes.Array):
|
|
raise NumbaValueError("The first argument to a stencil kernel must "
|
|
"be the primary input array.")
|
|
|
|
from numba.core import typed_passes
|
|
typemap, return_type, calltypes, _ = typed_passes.type_inference_stage(
|
|
self._typingctx,
|
|
self._targetctx,
|
|
self.kernel_ir,
|
|
argtys,
|
|
None,
|
|
{})
|
|
if isinstance(return_type, types.npytypes.Array):
|
|
raise NumbaValueError(
|
|
"Stencil kernel must return a scalar and not a numpy array.")
|
|
|
|
real_ret = types.npytypes.Array(return_type, argtys[0].ndim,
|
|
argtys[0].layout)
|
|
return (real_ret, typemap, calltypes)
|
|
|
|
def _install_type(self, typingctx):
|
|
"""Constructs and installs a typing class for a StencilFunc object in
|
|
the input typing context.
|
|
"""
|
|
_ty_cls = type('StencilFuncTyping_' +
|
|
str(self.id),
|
|
(AbstractTemplate,),
|
|
dict(key=self, generic=self._type_me))
|
|
typingctx.insert_user_function(self, _ty_cls)
|
|
|
|
def compile_for_argtys(self, argtys, kwtys, return_type, sigret):
|
|
# look in the type cache to find if result array is passed
|
|
(_, result, typemap, calltypes) = self._type_cache[argtys]
|
|
new_func = self._stencil_wrapper(result, sigret, return_type,
|
|
typemap, calltypes, *argtys)
|
|
return new_func
|
|
|
|
def _type_me(self, argtys, kwtys):
|
|
"""
|
|
Implement AbstractTemplate.generic() for the typing class
|
|
built by StencilFunc._install_type().
|
|
Return the call-site signature.
|
|
"""
|
|
if (self.neighborhood is not None and
|
|
len(self.neighborhood) != argtys[0].ndim):
|
|
raise NumbaValueError("%d dimensional neighborhood specified "
|
|
"for %d dimensional input array" %
|
|
(len(self.neighborhood), argtys[0].ndim))
|
|
|
|
argtys_extra = argtys
|
|
sig_extra = ""
|
|
result = None
|
|
if 'out' in kwtys:
|
|
argtys_extra += (kwtys['out'],)
|
|
sig_extra += ", out=None"
|
|
result = kwtys['out']
|
|
|
|
if 'neighborhood' in kwtys:
|
|
argtys_extra += (kwtys['neighborhood'],)
|
|
sig_extra += ", neighborhood=None"
|
|
|
|
# look in the type cache first
|
|
if argtys_extra in self._type_cache:
|
|
(_sig, _, _, _) = self._type_cache[argtys_extra]
|
|
return _sig
|
|
|
|
(real_ret, typemap, calltypes) = self.get_return_type(argtys)
|
|
sig = signature(real_ret, *argtys_extra)
|
|
dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format(
|
|
",".join(self.kernel_ir.arg_names), sig_extra))
|
|
exec(dummy_text) in globals(), locals()
|
|
dummy_func = eval("__numba_dummy_stencil")
|
|
sig = sig.replace(pysig=utils.pysignature(dummy_func))
|
|
self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra)])
|
|
self._type_cache[argtys_extra] = (sig, result, typemap, calltypes)
|
|
return sig
|
|
|
|
def copy_ir_with_calltypes(self, ir, calltypes):
|
|
"""
|
|
Create a copy of a given IR along with its calltype information.
|
|
We need a copy of the calltypes because copy propagation applied
|
|
to the copied IR will change the calltypes and make subsequent
|
|
uses of the original IR invalid.
|
|
"""
|
|
copy_calltypes = {}
|
|
kernel_copy = ir.copy()
|
|
kernel_copy.blocks = {}
|
|
# For each block...
|
|
for (block_label, block) in ir.blocks.items():
|
|
new_block = copy.deepcopy(ir.blocks[block_label])
|
|
new_block.body = []
|
|
# For each statement in each block...
|
|
for stmt in ir.blocks[block_label].body:
|
|
# Copy the statement to the new copy of the kernel
|
|
# and if the original statement is in the original
|
|
# calltypes then add the type associated with this
|
|
# statement to the calltypes copy.
|
|
scopy = copy.deepcopy(stmt)
|
|
new_block.body.append(scopy)
|
|
if stmt in calltypes:
|
|
copy_calltypes[scopy] = calltypes[stmt]
|
|
kernel_copy.blocks[block_label] = new_block
|
|
return (kernel_copy, copy_calltypes)
|
|
|
|
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args):
|
|
# Overall approach:
|
|
# 1) Construct a string containing a function definition for the stencil function
|
|
# that will execute the stencil kernel. This function definition includes a
|
|
# unique stencil function name, the parameters to the stencil kernel, loop
|
|
# nests across the dimensions of the input array. Those loop nests use the
|
|
# computed stencil kernel size so as not to try to compute elements where
|
|
# elements outside the bounds of the input array would be needed.
|
|
# 2) The but of the loop nest in this new function is a special sentinel
|
|
# assignment.
|
|
# 3) Get the IR of this new function.
|
|
# 4) Split the block containing the sentinel assignment and remove the sentinel
|
|
# assignment. Insert the stencil kernel IR into the stencil function IR
|
|
# after label and variable renaming of the stencil kernel IR to prevent
|
|
# conflicts with the stencil function IR.
|
|
# 5) Compile the combined stencil function IR + stencil kernel IR into existence.
|
|
|
|
# Copy the kernel so that our changes for this callsite
|
|
# won't effect other callsites.
|
|
(kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(
|
|
self.kernel_ir, calltypes)
|
|
# The stencil kernel body becomes the body of a loop, for which args aren't needed.
|
|
ir_utils.remove_args(kernel_copy.blocks)
|
|
first_arg = kernel_copy.arg_names[0]
|
|
|
|
in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
|
|
name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
|
|
ir_utils.apply_copy_propagate(
|
|
kernel_copy.blocks,
|
|
in_cps,
|
|
name_var_table,
|
|
typemap,
|
|
copy_calltypes)
|
|
|
|
if "out" in name_var_table:
|
|
raise NumbaValueError("Cannot use the reserved word 'out' in stencil kernels.")
|
|
|
|
sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table)
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("name_var_table", name_var_table, sentinel_name)
|
|
|
|
the_array = args[0]
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("_stencil_wrapper", return_type, return_type.dtype,
|
|
type(return_type.dtype), args)
|
|
ir_utils.dump_blocks(kernel_copy.blocks)
|
|
|
|
# We generate a Numba function to execute this stencil and here
|
|
# create the unique name of this function.
|
|
stencil_func_name = "__numba_stencil_%s_%s" % (
|
|
hex(id(the_array)).replace("-", "_"),
|
|
self.id)
|
|
|
|
# We will put a loop nest in the generated function for each
|
|
# dimension in the input array. Here we create the name for
|
|
# the index variable for each dimension. index0, index1, ...
|
|
index_vars = []
|
|
for i in range(the_array.ndim):
|
|
index_var_name = ir_utils.get_unused_var_name("index" + str(i),
|
|
name_var_table)
|
|
index_vars += [index_var_name]
|
|
|
|
# Create extra signature for out and neighborhood.
|
|
out_name = ir_utils.get_unused_var_name("out", name_var_table)
|
|
neighborhood_name = ir_utils.get_unused_var_name("neighborhood",
|
|
name_var_table)
|
|
sig_extra = ""
|
|
if result is not None:
|
|
sig_extra += ", {}=None".format(out_name)
|
|
if "neighborhood" in dict(self.kws):
|
|
sig_extra += ", {}=None".format(neighborhood_name)
|
|
|
|
# Get a list of the standard indexed array names.
|
|
standard_indexed = self.options.get("standard_indexing", [])
|
|
|
|
if first_arg in standard_indexed:
|
|
raise NumbaValueError("The first argument to a stencil kernel must "
|
|
"use relative indexing, not standard indexing.")
|
|
|
|
if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
|
|
raise NumbaValueError("Standard indexing requested for an array name "
|
|
"not present in the stencil kernel definition.")
|
|
|
|
# Add index variables to getitems in the IR to transition the accesses
|
|
# in the kernel from relative to regular Python indexing. Returns the
|
|
# computed size of the stencil kernel and a list of the relatively indexed
|
|
# arrays.
|
|
kernel_size, relatively_indexed = self.add_indices_to_kernel(
|
|
kernel_copy, index_vars, the_array.ndim,
|
|
self.neighborhood, standard_indexed, typemap, copy_calltypes)
|
|
if self.neighborhood is None:
|
|
self.neighborhood = kernel_size
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("After add_indices_to_kernel")
|
|
ir_utils.dump_blocks(kernel_copy.blocks)
|
|
|
|
# The return in the stencil kernel becomes a setitem for that
|
|
# particular point in the iteration space.
|
|
ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
|
|
index_vars, out_name)
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("After replace_return_with_setitem", ret_blocks)
|
|
ir_utils.dump_blocks(kernel_copy.blocks)
|
|
|
|
# Start to form the new function to execute the stencil kernel.
|
|
func_text = "def {}({}{}):\n".format(stencil_func_name,
|
|
",".join(kernel_copy.arg_names), sig_extra)
|
|
|
|
# Get loop ranges for each dimension, which could be either int
|
|
# or variable. In the latter case we'll use the extra neighborhood
|
|
# argument to the function.
|
|
ranges = []
|
|
for i in range(the_array.ndim):
|
|
if isinstance(kernel_size[i][0], int):
|
|
lo = kernel_size[i][0]
|
|
hi = kernel_size[i][1]
|
|
else:
|
|
lo = "{}[{}][0]".format(neighborhood_name, i)
|
|
hi = "{}[{}][1]".format(neighborhood_name, i)
|
|
ranges.append((lo, hi))
|
|
|
|
# If there are more than one relatively indexed arrays, add a call to
|
|
# a function that will raise an error if any of the relatively indexed
|
|
# arrays are of different size than the first input array.
|
|
if len(relatively_indexed) > 1:
|
|
func_text += " raise_if_incompatible_array_sizes(" + first_arg
|
|
for other_array in relatively_indexed:
|
|
if other_array != first_arg:
|
|
func_text += "," + other_array
|
|
func_text += ")\n"
|
|
|
|
# Get the shape of the first input array.
|
|
shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
|
|
func_text += " {} = {}.shape\n".format(shape_name, first_arg)
|
|
|
|
# Converts cval to a string constant
|
|
def cval_as_str(cval):
|
|
if not np.isfinite(cval):
|
|
# See if this is a string-repr numerical const, issue #7286
|
|
if np.isnan(cval):
|
|
return "np.nan"
|
|
elif np.isinf(cval):
|
|
if cval < 0:
|
|
return "-np.inf"
|
|
else:
|
|
return "np.inf"
|
|
else:
|
|
return str(cval)
|
|
|
|
# If we have to allocate the output array (the out argument was not used)
|
|
# then us numpy.full if the user specified a cval stencil decorator option
|
|
# or np.zeros if they didn't to allocate the array.
|
|
if result is None:
|
|
return_type_name = numpy_support.as_dtype(
|
|
return_type.dtype).type.__name__
|
|
out_init ="{} = np.empty({}, dtype=np.{})\n".format(
|
|
out_name, shape_name, return_type_name)
|
|
|
|
if "cval" in self.options:
|
|
cval = self.options["cval"]
|
|
cval_ty = typing.typeof.typeof(cval)
|
|
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
|
|
msg = "cval type does not match stencil return type."
|
|
raise NumbaValueError(msg)
|
|
else:
|
|
cval = 0
|
|
func_text += " " + out_init
|
|
for dim in range(the_array.ndim):
|
|
start_items = [":"] * the_array.ndim
|
|
end_items = [":"] * the_array.ndim
|
|
start_items[dim] = ":-{}".format(self.neighborhood[dim][0])
|
|
end_items[dim] = "-{}:".format(self.neighborhood[dim][1])
|
|
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(start_items), cval_as_str(cval))
|
|
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(end_items), cval_as_str(cval))
|
|
else: # result is present, if cval is set then use it
|
|
if "cval" in self.options:
|
|
cval = self.options["cval"]
|
|
cval_ty = typing.typeof.typeof(cval)
|
|
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
|
|
msg = "cval type does not match stencil return type."
|
|
raise NumbaValueError(msg)
|
|
out_init = "{}[:] = {}\n".format(out_name, cval_as_str(cval))
|
|
func_text += " " + out_init
|
|
|
|
offset = 1
|
|
# Add the loop nests to the new function.
|
|
for i in range(the_array.ndim):
|
|
for j in range(offset):
|
|
func_text += " "
|
|
# ranges[i][0] is the minimum index used in the i'th dimension
|
|
# but minimum's greater than 0 don't preclude any entry in the array.
|
|
# So, take the minimum of 0 and the minimum index found in the kernel
|
|
# and this will be a negative number (potentially -0). Then, we do
|
|
# unary - on that to get the positive offset in this dimension whose
|
|
# use is precluded.
|
|
# ranges[i][1] is the maximum of 0 and the observed maximum index
|
|
# in this dimension because negative maximums would not cause us to
|
|
# preclude any entry in the array from being used.
|
|
func_text += ("for {} in range(-min(0,{}),"
|
|
"{}[{}]-max(0,{})):\n").format(
|
|
index_vars[i],
|
|
ranges[i][0],
|
|
shape_name,
|
|
i,
|
|
ranges[i][1])
|
|
offset += 1
|
|
|
|
for j in range(offset):
|
|
func_text += " "
|
|
# Put a sentinel in the code so we can locate it in the IR. We will
|
|
# remove this sentinel assignment and replace it with the IR for the
|
|
# stencil kernel body.
|
|
func_text += "{} = 0\n".format(sentinel_name)
|
|
func_text += " return {}\n".format(out_name)
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("new stencil func text")
|
|
print(func_text)
|
|
|
|
# Force the new stencil function into existence.
|
|
exec(func_text) in globals(), locals()
|
|
stencil_func = eval(stencil_func_name)
|
|
if sigret is not None:
|
|
pysig = utils.pysignature(stencil_func)
|
|
sigret.pysig = pysig
|
|
# Get the IR for the newly created stencil function.
|
|
from numba.core import compiler
|
|
stencil_ir = compiler.run_frontend(stencil_func)
|
|
ir_utils.remove_dels(stencil_ir.blocks)
|
|
|
|
# rename all variables in stencil_ir afresh
|
|
var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
|
|
new_var_dict = {}
|
|
reserved_names = ([sentinel_name, out_name, neighborhood_name,
|
|
shape_name] + kernel_copy.arg_names + index_vars)
|
|
for name, var in var_table.items():
|
|
if not name in reserved_names:
|
|
assert isinstance(var, ir.Var)
|
|
new_var = var.scope.redefine(var.name, var.loc)
|
|
new_var_dict[name] = new_var.name
|
|
ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)
|
|
|
|
stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1
|
|
|
|
# Shift labels in the kernel copy so they are guaranteed unique
|
|
# and don't conflict with any labels in the stencil_ir.
|
|
kernel_copy.blocks = ir_utils.add_offset_to_labels(
|
|
kernel_copy.blocks, stencil_stub_last_label)
|
|
new_label = max(kernel_copy.blocks.keys()) + 1
|
|
# Adjust ret_blocks to account for addition of the offset.
|
|
ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
|
|
print("before replace sentinel stencil_ir")
|
|
ir_utils.dump_blocks(stencil_ir.blocks)
|
|
print("before replace sentinel kernel_copy")
|
|
ir_utils.dump_blocks(kernel_copy.blocks)
|
|
|
|
# Search all the block in the stencil outline for the sentinel.
|
|
for label, block in stencil_ir.blocks.items():
|
|
for i, inst in enumerate(block.body):
|
|
if (isinstance( inst, ir.Assign) and
|
|
inst.target.name == sentinel_name):
|
|
# We found the sentinel assignment.
|
|
loc = inst.loc
|
|
scope = block.scope
|
|
# split block across __sentinel__
|
|
# A new block is allocated for the statements prior to the
|
|
# sentinel but the new block maintains the current block
|
|
# label.
|
|
prev_block = ir.Block(scope, loc)
|
|
prev_block.body = block.body[:i]
|
|
# The current block is used for statements after sentinel.
|
|
block.body = block.body[i + 1:]
|
|
# But the current block gets a new label.
|
|
body_first_label = min(kernel_copy.blocks.keys())
|
|
|
|
# The previous block jumps to the minimum labelled block of
|
|
# the parfor body.
|
|
prev_block.append(ir.Jump(body_first_label, loc))
|
|
# Add all the parfor loop body blocks to the gufunc
|
|
# function's IR.
|
|
for (l, b) in kernel_copy.blocks.items():
|
|
stencil_ir.blocks[l] = b
|
|
|
|
stencil_ir.blocks[new_label] = block
|
|
stencil_ir.blocks[label] = prev_block
|
|
# Add a jump from all the blocks that previously contained
|
|
# a return in the stencil kernel to the block
|
|
# containing statements after the sentinel.
|
|
for ret_block in ret_blocks:
|
|
stencil_ir.blocks[ret_block].append(
|
|
ir.Jump(new_label, loc))
|
|
break
|
|
else:
|
|
continue
|
|
break
|
|
|
|
stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
|
|
ir_utils.remove_dels(stencil_ir.blocks)
|
|
|
|
assert(isinstance(the_array, types.Type))
|
|
array_types = args
|
|
|
|
new_stencil_param_types = list(array_types)
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("new_stencil_param_types", new_stencil_param_types)
|
|
ir_utils.dump_blocks(stencil_ir.blocks)
|
|
|
|
# Compile the combined stencil function with the replaced loop
|
|
# body in it.
|
|
ir_utils.fixup_var_define_in_scope(stencil_ir.blocks)
|
|
new_func = compiler.compile_ir(
|
|
self._typingctx,
|
|
self._targetctx,
|
|
stencil_ir,
|
|
new_stencil_param_types,
|
|
None,
|
|
compiler.DEFAULT_FLAGS,
|
|
{})
|
|
return new_func
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
self._typingctx.refresh()
|
|
if (self.neighborhood is not None and
|
|
len(self.neighborhood) != args[0].ndim):
|
|
raise ValueError("{} dimensional neighborhood specified for {} "
|
|
"dimensional input array".format(
|
|
len(self.neighborhood), args[0].ndim))
|
|
|
|
if 'out' in kwargs:
|
|
result = kwargs['out']
|
|
rdtype = result.dtype
|
|
rttype = numpy_support.from_dtype(rdtype)
|
|
result_type = types.npytypes.Array(rttype, result.ndim,
|
|
numpy_support.map_layout(result))
|
|
array_types = tuple([typing.typeof.typeof(x) for x in args])
|
|
array_types_full = tuple([typing.typeof.typeof(x) for x in args] +
|
|
[result_type])
|
|
else:
|
|
result = None
|
|
array_types = tuple([typing.typeof.typeof(x) for x in args])
|
|
array_types_full = array_types
|
|
|
|
if config.DEBUG_ARRAY_OPT >= 1:
|
|
print("__call__", array_types, args, kwargs)
|
|
|
|
(real_ret, typemap, calltypes) = self.get_return_type(array_types)
|
|
new_func = self._stencil_wrapper(result, None, real_ret, typemap,
|
|
calltypes, *array_types_full)
|
|
|
|
if result is None:
|
|
return new_func.entry_point(*args)
|
|
else:
|
|
return new_func.entry_point(*(args+(result,)))
|
|
|
|
def stencil(func_or_mode='constant', **options):
|
|
# called on function without specifying mode style
|
|
if not isinstance(func_or_mode, str):
|
|
mode = 'constant' # default style
|
|
func = func_or_mode
|
|
else:
|
|
mode = func_or_mode
|
|
func = None
|
|
|
|
for option in options:
|
|
if option not in ["cval", "standard_indexing", "neighborhood"]:
|
|
raise ValueError("Unknown stencil option " + option)
|
|
|
|
wrapper = _stencil(mode, options)
|
|
if func is not None:
|
|
return wrapper(func)
|
|
return wrapper
|
|
|
|
def _stencil(mode, options):
|
|
if mode != 'constant':
|
|
raise ValueError("Unsupported mode style " + mode)
|
|
|
|
def decorated(func):
|
|
from numba.core import compiler
|
|
kernel_ir = compiler.run_frontend(func)
|
|
return StencilFunc(kernel_ir, mode, options)
|
|
|
|
return decorated
|
|
|
|
@lower_builtin(stencil)
|
|
def stencil_dummy_lower(context, builder, sig, args):
|
|
"lowering for dummy stencil calls"
|
|
return lir.Constant(lir.IntType(types.intp.bitwidth), 0)
|