import math
import numpy as np
import numbers
import re
import traceback
import multiprocessing as mp
import numba
from numba import njit, prange
from numba.core import config
from import TestCase, tag, override_env_config
import unittest
needs_svml = unittest.skipUnless(config.USING_SVML,
"SVML tests need SVML to be present")
# a map of float64 vector lengths with corresponding CPU architecture
vlen2cpu = {2: 'nehalem', 4: 'haswell', 8: 'skylake-avx512'}
# force LLVM to use AVX512 registers for vectorization
vlen2cpu_features = {2: '', 4: '', 8: '-prefer-256-bit'}
# env vars to force CPU as skylake-avx512.
# force LLVM to use AVX512 registers for vectorization
_skylake_axv512_envvars = {'NUMBA_CPU_NAME': 'skylake-avx512',
'NUMBA_CPU_FEATURES': '-prefer-256-bit'}
# K: SVML functions, V: python functions which are expected to be SIMD-vectorized
# using SVML, explicit references to Python functions here are mostly for sake of
# instant import checks.
# TODO: [] and comments below mean unused/untested SVML function, it's to be
# either enabled or to be replaced with the explanation why the function
# cannot be used in Numba
# TODO: this test does not support functions with more than 1 arguments yet
# The test logic should be modified if there is an SVML function being used under
# different name or module from Python
svml_funcs = {
"sin": [np.sin, math.sin],
"cos": [np.cos, math.cos],
"pow": [], # pow, math.pow],
"exp": [np.exp, math.exp],
"log": [np.log, math.log],
"acos": [math.acos],
"acosh": [math.acosh],
"asin": [math.asin],
"asinh": [math.asinh],
"atan2": [], # math.atan2],
"atan": [math.atan],
"atanh": [math.atanh],
"cbrt": [], # np.cbrt],
"cdfnorm": [],
"cdfnorminv": [],
"ceil": [], # np.ceil, math.ceil],
"cosd": [],
"cosh": [np.cosh, math.cosh],
"erf": [math.erf], # np.erf is available in Intel Distribution
"erfc": [math.erfc],
"erfcinv": [],
"erfinv": [],
"exp10": [],
"exp2": [], # np.exp2],
"expm1": [np.expm1, math.expm1],
"floor": [], # np.floor, math.floor],
"fmod": [], # np.fmod, math.fmod],
"hypot": [], # np.hypot, math.hypot],
"invsqrt": [], # available in Intel Distribution
"log10": [np.log10, math.log10],
"log1p": [np.log1p, math.log1p],
"log2": [], # np.log2],
"logb": [],
"nearbyint": [],
"rint": [], # np.rint],
"round": [], # round],
"sind": [],
"sinh": [np.sinh, math.sinh],
"tan": [np.tan, math.tan],
"tanh": [np.tanh, math.tanh],
"trunc": [], # np.trunc, math.trunc],
# TODO: these functions are not vectorizable with complex types
complex_funcs_exclude = ["tan", "log10", "expm1", "log1p", "tanh", "log"]
# remove untested entries
svml_funcs = {k: v for k, v in svml_funcs.items() if len(v) > 0}
# lists for functions which belong to numpy and math modules correpondently
numpy_funcs = [f for f, v in svml_funcs.items() if "<ufunc" in \
[str(p).split(' ')[0] for p in v]]
other_funcs = [f for f, v in svml_funcs.items() if "<built-in" in \
[str(p).split(' ')[0] for p in v]]
def func_patterns(func, args, res, dtype, mode, vlen, fastmath, pad=' '*8):
For a given function and its usage modes,
returns python code and assembly patterns it should and should not generate
# generate a function call according to the usecase
if mode == "scalar":
arg_list = ','.join([a+'[0]' for a in args])
body = '%s%s[0] += math.%s(%s)\n' % (pad, res, func, arg_list)
elif mode == "numpy":
body = '%s%s += np.%s(%s)' % (pad, res, func, ','.join(args))
body += '.astype(np.%s)\n' % dtype if dtype.startswith('int') else '\n'
assert mode == "range" or mode == "prange"
arg_list = ','.join([a+'[i]' for a in args])
body = '{pad}for i in {mode}({res}.size):\n' \
'{pad}{pad}{res}[i] += math.{func}({arg_list})\n'. \
# TODO: refactor so this for-loop goes into umbrella function,
# 'mode' can be 'numpy', '0', 'i' instead
# TODO: it will enable mixed usecases like prange + numpy
# type specialization
is_f32 = dtype == 'float32' or dtype == 'complex64'
f = func+'f' if is_f32 else func
v = vlen*2 if is_f32 else vlen
# general expectations
prec_suff = '' if fastmath else '_ha'
scalar_func = '$_'+f if config.IS_OSX else '$'+f
svml_func = '__svml_%s%d%s,' % (f, v, prec_suff)
if mode == "scalar":
contains = [scalar_func]
avoids = ['__svml_', svml_func]
else: # will vectorize
contains = [svml_func]
avoids = [] # [scalar_func] - TODO: if possible, force LLVM to prevent
# generating the failsafe scalar paths
if vlen != 8 and (is_f32 or dtype == 'int32'): # Issue #3016
avoids += ['%zmm', '__svml_%s%d%s,' % (f, v*2, prec_suff)]
return body, contains, avoids
def usecase_name(dtype, mode, vlen, name):
""" Returns pretty name for given set of modes """
return f"{dtype}_{mode}{vlen}_{name}"
def combo_svml_usecase(dtype, mode, vlen, fastmath, name):
""" Combine multiple function calls under single umbrella usecase """
name = usecase_name(dtype, mode, vlen, name)
body = """def {name}(n):
x = np.empty(n*8, dtype=np.{dtype})
ret = np.empty_like(x)\n""".format(**locals())
funcs = set(numpy_funcs if mode == "numpy" else other_funcs)
if dtype.startswith('complex'):
funcs = funcs.difference(complex_funcs_exclude)
contains = set()
avoids = set()
# fill body and expectation patterns
for f in funcs:
b, c, a = func_patterns(f, ['x'], 'ret', dtype, mode, vlen, fastmath)
body += b
body += " "*8 + "return ret"
# now compile and return it along with its body in __doc__ and patterns
ldict = {}
exec(body, globals(), ldict)
ldict[name].__doc__ = body
return ldict[name], contains, avoids
class TestSVMLGeneration(TestCase):
""" Tests all SVML-generating functions produce desired calls """
# env mutating, must not run in parallel
_numba_parallel_test_ = False
# RE for a generic symbol reference and for each particular SVML function
asm_filter = re.compile('|'.join([r'\$[a-z_]\w+,']+list(svml_funcs)))
def mp_runner(cls, testname, outqueue):
method = getattr(cls, testname)
ok, msg = method()
except Exception:
msg = traceback.format_exc()
ok = False
outqueue.put({'status': ok, 'msg': msg})
def _inject_test(cls, dtype, mode, vlen, flags):
# unsupported combinations
if dtype.startswith('complex') and mode != 'numpy':
# TODO: address skipped tests below
skipped = dtype.startswith('int') and vlen == 2
sig = (numba.int64,)
# unit test body template
def run_template():
fn, contains, avoids = combo_svml_usecase(dtype, mode, vlen,
# look for specific patterns in the asm for a given target
with override_env_config('NUMBA_CPU_NAME', vlen2cpu[vlen]), \
override_env_config('NUMBA_CPU_FEATURES', vlen2cpu_features[vlen]):
# recompile for overridden CPU
jitted_fn = njit(sig, fastmath=flags['fastmath'],
raise Exception("raised while compiling "+fn.__doc__)
asm = jitted_fn.inspect_asm(sig)
missed = [pattern for pattern in contains if not pattern in asm]
found = [pattern for pattern in avoids if pattern in asm]
ok = not missed and not found
detail = '\n'.join(
[line for line in asm.split('\n')
if and not '"' in line])
msg = (
f"While expecting {missed} and not {found},\n"
f"it contains:\n{detail}\n"
f"when compiling {fn.__doc__}"
return ok, msg
# inject it into the class
postfix = usecase_name(dtype, mode, vlen, flags['name'])
testname = f"run_{postfix}"
setattr(cls, testname, run_template)
@unittest.skipUnless(not skipped, "Not implemented")
def test_runner(self):
ctx = mp.get_context("spawn")
q = ctx.Queue()
p = ctx.Process(target=type(self).mp_runner, args=[testname, q])
# timeout to avoid hanging and long enough to avoid bailing too
# early. Note: this was timeout=10 but that seemed to caused
# intermittent failures on heavily loaded machines.
term_or_timeout = p.join(timeout=30)
exitcode = p.exitcode
if term_or_timeout is None:
if exitcode is None:"Process timed out.")
elif exitcode < 0:"Process terminated with signal {-exitcode}.")
self.assertEqual(exitcode, 0, msg="process ended unexpectedly")
out = q.get()
status = out['status']
msg = out['msg']
self.assertTrue(status, msg=msg)
setattr(cls, f"test_{postfix}", test_runner)
def autogenerate(cls):
flag_list = [{'fastmath':False, 'error_model':'numpy',
{'fastmath':True, 'error_model':'numpy',
# main loop covering all the modes and use-cases
for dtype in ('complex64', 'float64', 'float32', 'int32', ):
for vlen in vlen2cpu:
for flags in flag_list:
for mode in "scalar", "range", "prange", "numpy":
cls._inject_test(dtype, mode, vlen, dict(flags))
# mark important
for n in ( "test_int32_range4_usecase", # issue #3016
setattr(cls, n, tag("important")(getattr(cls, n)))
def math_sin_scalar(x):
return math.sin(x)
def math_sin_loop(n):
ret = np.empty(n, dtype=np.float64)
for x in range(n):
ret[x] = math.sin(np.float64(x))
return ret
class TestSVML(TestCase):
""" Tests SVML behaves as expected """
# env mutating, must not run in parallel
_numba_parallel_test_ = False
def compile(self, func, *args, **kwargs):
assert not kwargs
sig = tuple([numba.typeof(x) for x in args])
std = njit(sig)(func)
fast = njit(sig, fastmath=True)(func)
return std.overloads[sig], fast.overloads[sig]
def copy_args(self, *args):
if not args:
return tuple()
new_args = []
for x in args:
if isinstance(x, np.ndarray):
elif isinstance(x, np.number):
elif isinstance(x, numbers.Number):
raise ValueError('Unsupported argument type encountered')
return tuple(new_args)
def check_result(self, pyfunc, *args, **kwargs):
jitstd, jitfast = self.compile(pyfunc, *args)
# python result
py_expected = pyfunc(*self.copy_args(*args))
# jit result
jitstd_result = jitstd.entry_point(*self.copy_args(*args))
# fastmath result
jitfast_result = jitfast.entry_point(*self.copy_args(*args))
# assert numerical equality
np.testing.assert_almost_equal(jitstd_result, py_expected, **kwargs)
np.testing.assert_almost_equal(jitfast_result, py_expected, **kwargs)
def check_asm(self, pyfunc, *args, **kwargs):
std_pattern = kwargs.pop('std_pattern', None)
fast_pattern = kwargs.pop('fast_pattern', None)
# look for specific patterns in the asm for a given target
# recompile for overridden CPU
jitstd, jitfast = self.compile(pyfunc, *args)
if std_pattern:
self.check_svml_presence(jitstd, std_pattern)
if fast_pattern:
self.check_svml_presence(jitfast, fast_pattern)
def check(self, pyfunc, *args, what="both", **kwargs):
assert what in ("both", "result", "asm")
if what == "both" or what == "result":
self.check_result(pyfunc, *args, **kwargs)
if what == "both" or what == "asm":
self.check_asm(pyfunc, *args, **kwargs)
def check_svml_presence(self, func, pattern):
asm = func.library.get_asm_str()
self.assertIn(pattern, asm)
def test_scalar_context_asm(self):
# SVML will not be used.
pat = '$_sin' if config.IS_OSX else '$sin'
self.check(math_sin_scalar, 7., what="asm", std_pattern=pat)
self.check(math_sin_scalar, 7., what="asm", fast_pattern=pat)
def test_scalar_context_result(self):
# checks result for test_scalar_context_asm
self.check(math_sin_scalar, 7., what="result")
def test_svml_asm(self):
# loops both with and without fastmath should use SVML.
# The high accuracy routines are dropped if `fastmath` is set
std = "__svml_sin8_ha,"
fast = "__svml_sin8," # No `_ha`!
self.check(math_sin_loop, 10, what="asm", std_pattern=std,
def test_svml_result(self):
# checks result for test_svml_asm
self.check(math_sin_loop, 10, what="result")
@TestCase.run_test_in_subprocess(envvars={'NUMBA_DISABLE_INTEL_SVML': "1",
def test_svml_disabled(self):
def math_sin_loop(n):
ret = np.empty(n, dtype=np.float64)
for x in range(n):
ret[x] = math.sin(np.float64(x))
return ret
sig = (numba.int32,)
std = njit(sig)(math_sin_loop)
fast = njit(sig, fastmath=True)(math_sin_loop)
fns = std.overloads[sig], fast.overloads[sig]
# assert no SVML call is present in the asm
for fn in fns:
asm = fn.library.get_asm_str()
self.assertNotIn('__svml_sin', asm)
def test_svml_working_in_non_isolated_context(self):
@njit(fastmath={'fast'}, error_model="numpy")
def impl(n):
x = np.empty(n * 8, dtype=np.float64)
ret = np.empty_like(x)
for i in range(ret.size):
ret[i] += math.cosh(x[i])
return ret
self.assertTrue('intel_svmlcc' in impl.inspect_llvm(impl.signatures[0]))
if __name__ == '__main__':