188 lines
6.4 KiB
Python
188 lines
6.4 KiB
Python
|
import numpy as np
|
||
|
from numba.core import types
|
||
|
from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
|
||
|
from numba import cuda
|
||
|
from numba.cuda import libdevice, compile_ptx
|
||
|
from numba.cuda.libdevicefuncs import functions, create_signature
|
||
|
|
||
|
|
||
|
def use_sincos(s, c, x):
|
||
|
i = cuda.grid(1)
|
||
|
|
||
|
if i < len(x):
|
||
|
sr, cr = libdevice.sincos(x[i])
|
||
|
s[i] = sr
|
||
|
c[i] = cr
|
||
|
|
||
|
|
||
|
def use_frexp(frac, exp, x):
|
||
|
i = cuda.grid(1)
|
||
|
|
||
|
if i < len(x):
|
||
|
fracr, expr = libdevice.frexp(x[i])
|
||
|
frac[i] = fracr
|
||
|
exp[i] = expr
|
||
|
|
||
|
|
||
|
def use_sad(r, x, y, z):
|
||
|
i = cuda.grid(1)
|
||
|
|
||
|
if i < len(x):
|
||
|
r[i] = libdevice.sad(x[i], y[i], z[i])
|
||
|
|
||
|
|
||
|
@skip_on_cudasim('Libdevice functions are not supported on cudasim')
|
||
|
class TestLibdevice(CUDATestCase):
|
||
|
"""
|
||
|
Some tests of libdevice function wrappers that check the returned values.
|
||
|
|
||
|
These are mainly to check that the generation of the implementations
|
||
|
results in correct typing and lowering for each type of function return
|
||
|
(e.g. scalar return, UniTuple return, Tuple return, etc.).
|
||
|
"""
|
||
|
|
||
|
def test_sincos(self):
|
||
|
# Tests return of a UniTuple from a libdevice function
|
||
|
arr = np.arange(100, dtype=np.float64)
|
||
|
sres = np.zeros_like(arr)
|
||
|
cres = np.zeros_like(arr)
|
||
|
|
||
|
cufunc = cuda.jit(use_sincos)
|
||
|
cufunc[4, 32](sres, cres, arr)
|
||
|
|
||
|
np.testing.assert_allclose(np.cos(arr), cres)
|
||
|
np.testing.assert_allclose(np.sin(arr), sres)
|
||
|
|
||
|
def test_frexp(self):
|
||
|
# Test return of a Tuple from a libdevice function
|
||
|
arr = np.linspace(start=1.0, stop=10.0, num=100, dtype=np.float64)
|
||
|
fracres = np.zeros_like(arr)
|
||
|
expres = np.zeros(shape=arr.shape, dtype=np.int32)
|
||
|
|
||
|
cufunc = cuda.jit(use_frexp)
|
||
|
cufunc[4, 32](fracres, expres, arr)
|
||
|
|
||
|
frac_expect, exp_expect = np.frexp(arr)
|
||
|
|
||
|
np.testing.assert_array_equal(frac_expect, fracres)
|
||
|
np.testing.assert_array_equal(exp_expect, expres)
|
||
|
|
||
|
def test_sad(self):
|
||
|
# Test return of a scalar from a libdevice function
|
||
|
x = np.arange(0, 200, 2)
|
||
|
y = np.arange(50, 150)
|
||
|
z = np.arange(15, 115)
|
||
|
r = np.zeros_like(x)
|
||
|
|
||
|
cufunc = cuda.jit(use_sad)
|
||
|
cufunc[4, 32](r, x, y, z)
|
||
|
|
||
|
np.testing.assert_array_equal(np.abs(x - y) + z, r)
|
||
|
|
||
|
|
||
|
# A template for generating tests of compiling calls to libdevice functions.
|
||
|
# The purpose of the call and assignment of the return variables is to ensure
|
||
|
# the actual function implementations are not thrown away resulting in a PTX
|
||
|
# implementation that only contains the ret instruction - this may hide certain
|
||
|
# errors.
|
||
|
function_template = """\
|
||
|
from numba.cuda import libdevice
|
||
|
|
||
|
def pyfunc(%(pyargs)s):
|
||
|
ret = libdevice.%(func)s(%(funcargs)s)
|
||
|
%(retvars)s = ret
|
||
|
"""
|
||
|
|
||
|
|
||
|
def make_test_call(libname):
|
||
|
"""
|
||
|
Generates a test function for each libdevice function.
|
||
|
"""
|
||
|
|
||
|
def _test_call_functions(self):
|
||
|
# Strip off '__nv_' from libdevice name to get Python name
|
||
|
apiname = libname[5:]
|
||
|
apifunc = getattr(libdevice, apiname)
|
||
|
retty, args = functions[libname]
|
||
|
sig = create_signature(retty, args)
|
||
|
|
||
|
# Construct arguments to the libdevice function. These are all
|
||
|
# non-pointer arguments to the underlying bitcode function.
|
||
|
funcargs = ", ".join(['a%d' % i for i, arg in enumerate(args) if not
|
||
|
arg.is_ptr])
|
||
|
|
||
|
# Arguments to the Python function (`pyfunc` in the template above) are
|
||
|
# the arguments to the libdevice function, plus as many extra arguments
|
||
|
# as there are in the return type of the libdevice function - one for
|
||
|
# scalar-valued returns, or the length of the tuple for tuple-valued
|
||
|
# returns.
|
||
|
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
||
|
# Start with the parameters for the return values
|
||
|
pyargs = ", ".join(['r%d' % i for i in
|
||
|
range(len(sig.return_type))])
|
||
|
# Add the parameters for the argument values
|
||
|
pyargs += ", " + funcargs
|
||
|
# Generate the unpacking of the return value from the libdevice
|
||
|
# function into the Python function return values (`r0`, `r1`,
|
||
|
# etc.).
|
||
|
retvars = ", ".join(['r%d[0]' % i for i in
|
||
|
range(len(sig.return_type))])
|
||
|
else:
|
||
|
# Scalar return is a more straightforward case
|
||
|
pyargs = "r0, " + funcargs
|
||
|
retvars = "r0[0]"
|
||
|
|
||
|
# Create the string containing the function to compile
|
||
|
d = { 'func': apiname,
|
||
|
'pyargs': pyargs,
|
||
|
'funcargs': funcargs,
|
||
|
'retvars': retvars }
|
||
|
code = function_template % d
|
||
|
|
||
|
# Convert the string to a Python function
|
||
|
locals = {}
|
||
|
exec(code, globals(), locals)
|
||
|
pyfunc = locals['pyfunc']
|
||
|
|
||
|
# Compute the signature for compilation. This mirrors the creation of
|
||
|
# arguments to the Python function above.
|
||
|
pyargs = [ arg.ty for arg in args if not arg.is_ptr ]
|
||
|
if isinstance(sig.return_type, (types.Tuple, types.UniTuple)):
|
||
|
pyreturns = [ret[::1] for ret in sig.return_type]
|
||
|
pyargs = pyreturns + pyargs
|
||
|
else:
|
||
|
pyargs.insert(0, sig.return_type[::1])
|
||
|
|
||
|
pyargs = tuple(pyargs)
|
||
|
ptx, resty = compile_ptx(pyfunc, pyargs)
|
||
|
|
||
|
# If the function body was discarded by optimization (therefore making
|
||
|
# the test a bit weak), there won't be any loading of parameters -
|
||
|
# ensure that a load from parameters occurs somewhere in the PTX
|
||
|
self.assertIn('ld.param', ptx)
|
||
|
|
||
|
# Returning the result (through a passed-in array) should also require
|
||
|
# a store to global memory, so check for at least one of those too.
|
||
|
self.assertIn('st.global', ptx)
|
||
|
|
||
|
return _test_call_functions
|
||
|
|
||
|
|
||
|
@skip_on_cudasim('Compilation to PTX is not supported on cudasim')
|
||
|
class TestLibdeviceCompilation(unittest.TestCase):
|
||
|
"""
|
||
|
Class for holding all tests of compiling calls to libdevice functions. We
|
||
|
generate the actual tests in this class (as opposed to using subTest and
|
||
|
one test within this class) because there are a lot of tests, and it makes
|
||
|
the test suite appear frozen to test them all as subTests in one test.
|
||
|
"""
|
||
|
|
||
|
|
||
|
for libname in functions:
|
||
|
setattr(TestLibdeviceCompilation, 'test_%s' % libname,
|
||
|
make_test_call(libname))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|