1254 lines
41 KiB
Python
1254 lines
41 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
Tests the parallel backend
|
|
"""
|
|
import faulthandler
|
|
import itertools
|
|
import multiprocessing
|
|
import os
|
|
import random
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
import threading
|
|
import unittest
|
|
|
|
import numpy as np
|
|
|
|
from numba import jit, vectorize, guvectorize, set_num_threads
|
|
from numba.tests.support import (temp_directory, override_config, TestCase, tag,
|
|
skip_parfors_unsupported, linux_only)
|
|
|
|
import queue as t_queue
|
|
from numba.testing.main import _TIMEOUT as _RUNNER_TIMEOUT
|
|
from numba.core import config
|
|
|
|
|
|
_TEST_TIMEOUT = _RUNNER_TIMEOUT - 60.
|
|
|
|
|
|
# Check which backends are available
|
|
# TODO: Put this in a subprocess so the address space is kept clean
|
|
try:
|
|
# Check it's a compatible TBB before loading it
|
|
from numba.np.ufunc.parallel import _check_tbb_version_compatible
|
|
_check_tbb_version_compatible()
|
|
from numba.np.ufunc import tbbpool # noqa: F401
|
|
_HAVE_TBB_POOL = True
|
|
except ImportError:
|
|
_HAVE_TBB_POOL = False
|
|
|
|
try:
|
|
from numba.np.ufunc import omppool
|
|
_HAVE_OMP_POOL = True
|
|
except ImportError:
|
|
_HAVE_OMP_POOL = False
|
|
|
|
try:
|
|
import scipy.linalg.cython_lapack # noqa: F401
|
|
_HAVE_LAPACK = True
|
|
except ImportError:
|
|
_HAVE_LAPACK = False
|
|
|
|
# test skipping decorators
|
|
skip_no_omp = unittest.skipUnless(_HAVE_OMP_POOL, "OpenMP threadpool required")
|
|
skip_no_tbb = unittest.skipUnless(_HAVE_TBB_POOL, "TBB threadpool required")
|
|
|
|
_gnuomp = _HAVE_OMP_POOL and omppool.openmp_vendor == "GNU"
|
|
skip_unless_gnu_omp = unittest.skipUnless(_gnuomp, "GNU OpenMP only tests")
|
|
|
|
_windows = sys.platform.startswith('win')
|
|
_osx = sys.platform.startswith('darwin')
|
|
_32bit = sys.maxsize <= 2 ** 32
|
|
_parfors_unsupported = _32bit
|
|
|
|
_HAVE_OS_FORK = not _windows
|
|
|
|
|
|
# some functions to jit
|
|
|
|
def foo(n, v):
|
|
return np.ones(n) + v
|
|
|
|
|
|
if _HAVE_LAPACK:
|
|
def linalg(n, v):
|
|
x = np.dot(np.ones((n, n)), np.ones((n, n)))
|
|
return x + np.arange(n) + v
|
|
else:
|
|
def linalg(n, v):
|
|
# no way to trigger MKL without the lapack bindings.
|
|
return np.arange(n) + v
|
|
|
|
|
|
def ufunc_foo(a, b):
|
|
return a + b
|
|
|
|
|
|
def gufunc_foo(a, b, out):
|
|
out[0] = a + b
|
|
|
|
|
|
class runnable(object):
|
|
def __init__(self, **options):
|
|
self._options = options
|
|
|
|
|
|
class jit_runner(runnable):
|
|
|
|
def __call__(self):
|
|
cfunc = jit(**self._options)(foo)
|
|
a = 4
|
|
b = 10
|
|
expected = foo(a, b)
|
|
got = cfunc(a, b)
|
|
np.testing.assert_allclose(expected, got)
|
|
|
|
|
|
class mask_runner(object):
|
|
def __init__(self, runner, mask, **options):
|
|
self.runner = runner
|
|
self.mask = mask
|
|
|
|
def __call__(self):
|
|
if self.mask:
|
|
# Tests are all run in isolated subprocesses, so we
|
|
# don't have to worry about this affecting other tests
|
|
set_num_threads(self.mask)
|
|
self.runner()
|
|
|
|
|
|
class linalg_runner(runnable):
|
|
|
|
def __call__(self):
|
|
cfunc = jit(**self._options)(linalg)
|
|
a = 4
|
|
b = 10
|
|
expected = linalg(a, b)
|
|
got = cfunc(a, b)
|
|
np.testing.assert_allclose(expected, got)
|
|
|
|
|
|
class vectorize_runner(runnable):
|
|
|
|
def __call__(self):
|
|
cfunc = vectorize(['(f4, f4)'], **self._options)(ufunc_foo)
|
|
a = b = np.random.random(10).astype(np.float32)
|
|
expected = ufunc_foo(a, b)
|
|
got = cfunc(a, b)
|
|
np.testing.assert_allclose(expected, got)
|
|
|
|
|
|
class guvectorize_runner(runnable):
|
|
|
|
def __call__(self):
|
|
sig = ['(f4, f4, f4[:])']
|
|
cfunc = guvectorize(sig, '(),()->()', **self._options)(gufunc_foo)
|
|
a = b = np.random.random(10).astype(np.float32)
|
|
expected = ufunc_foo(a, b)
|
|
got = cfunc(a, b)
|
|
np.testing.assert_allclose(expected, got)
|
|
|
|
|
|
def chooser(fnlist, **kwargs):
|
|
q = kwargs.get('queue')
|
|
try:
|
|
faulthandler.enable()
|
|
for _ in range(int(len(fnlist) * 1.5)):
|
|
fn = random.choice(fnlist)
|
|
fn()
|
|
except Exception as e:
|
|
q.put(e)
|
|
|
|
|
|
def compile_factory(parallel_class, queue_impl):
|
|
def run_compile(fnlist):
|
|
q = queue_impl()
|
|
kws = {'queue': q}
|
|
ths = [parallel_class(target=chooser, args=(fnlist,), kwargs=kws)
|
|
for i in range(4)]
|
|
for th in ths:
|
|
th.start()
|
|
for th in ths:
|
|
th.join()
|
|
if not q.empty():
|
|
errors = []
|
|
while not q.empty():
|
|
errors.append(q.get(False))
|
|
_msg = "Error(s) occurred in delegated runner:\n%s"
|
|
raise RuntimeError(_msg % '\n'.join([repr(x) for x in errors]))
|
|
return run_compile
|
|
|
|
|
|
# workers
|
|
_thread_class = threading.Thread
|
|
|
|
|
|
class _proc_class_impl(object):
|
|
|
|
def __init__(self, method):
|
|
self._method = method
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
ctx = multiprocessing.get_context(self._method)
|
|
return ctx.Process(*args, **kwargs)
|
|
|
|
|
|
def _get_mp_classes(method):
|
|
if method == 'default':
|
|
method = None
|
|
ctx = multiprocessing.get_context(method)
|
|
proc = _proc_class_impl(method)
|
|
queue = ctx.Queue
|
|
return proc, queue
|
|
|
|
|
|
thread_impl = compile_factory(_thread_class, t_queue.Queue)
|
|
spawn_proc_impl = compile_factory(*_get_mp_classes('spawn'))
|
|
if not _windows:
|
|
fork_proc_impl = compile_factory(*_get_mp_classes('fork'))
|
|
forkserver_proc_impl = compile_factory(*_get_mp_classes('forkserver'))
|
|
|
|
# this is duplication as Py27, linux uses fork, windows uses spawn, it however
|
|
# is kept like this so that when tests fail it's less confusing!
|
|
default_proc_impl = compile_factory(*_get_mp_classes('default'))
|
|
|
|
|
|
class TestParallelBackendBase(TestCase):
|
|
"""
|
|
Base class for testing the parallel backends
|
|
"""
|
|
|
|
all_impls = [
|
|
jit_runner(nopython=True),
|
|
jit_runner(nopython=True, cache=True),
|
|
jit_runner(nopython=True, nogil=True),
|
|
linalg_runner(nopython=True),
|
|
linalg_runner(nopython=True, nogil=True),
|
|
vectorize_runner(nopython=True),
|
|
vectorize_runner(nopython=True, target='parallel'),
|
|
vectorize_runner(nopython=True, target='parallel', cache=True),
|
|
guvectorize_runner(nopython=True),
|
|
guvectorize_runner(nopython=True, target='parallel'),
|
|
guvectorize_runner(nopython=True, target='parallel', cache=True),
|
|
]
|
|
|
|
if not _parfors_unsupported:
|
|
parfor_impls = [
|
|
jit_runner(nopython=True, parallel=True),
|
|
jit_runner(nopython=True, parallel=True, cache=True),
|
|
linalg_runner(nopython=True, parallel=True),
|
|
linalg_runner(nopython=True, parallel=True, cache=True),
|
|
]
|
|
all_impls.extend(parfor_impls)
|
|
|
|
if config.NUMBA_NUM_THREADS < 2:
|
|
# Not enough cores
|
|
masks = []
|
|
else:
|
|
masks = [1, 2]
|
|
|
|
mask_impls = []
|
|
for impl in all_impls:
|
|
for mask in masks:
|
|
mask_impls.append(mask_runner(impl, mask))
|
|
|
|
parallelism = ['threading', 'random']
|
|
parallelism.append('multiprocessing_spawn')
|
|
if _HAVE_OS_FORK:
|
|
parallelism.append('multiprocessing_fork')
|
|
parallelism.append('multiprocessing_forkserver')
|
|
|
|
runners = {
|
|
'concurrent_jit': [
|
|
jit_runner(nopython=True, parallel=(not _parfors_unsupported)),
|
|
],
|
|
'concurrent_vectorize': [
|
|
vectorize_runner(nopython=True, target='parallel'),
|
|
],
|
|
'concurrent_guvectorize': [
|
|
guvectorize_runner(nopython=True, target='parallel'),
|
|
],
|
|
'concurrent_mix_use': all_impls,
|
|
'concurrent_mix_use_masks': mask_impls,
|
|
}
|
|
|
|
safe_backends = {'omp', 'tbb'}
|
|
|
|
def run_compile(self, fnlist, parallelism='threading'):
|
|
self._cache_dir = temp_directory(self.__class__.__name__)
|
|
with override_config('CACHE_DIR', self._cache_dir):
|
|
if parallelism == 'threading':
|
|
thread_impl(fnlist)
|
|
elif parallelism == 'multiprocessing_fork':
|
|
fork_proc_impl(fnlist)
|
|
elif parallelism == 'multiprocessing_forkserver':
|
|
forkserver_proc_impl(fnlist)
|
|
elif parallelism == 'multiprocessing_spawn':
|
|
spawn_proc_impl(fnlist)
|
|
elif parallelism == 'multiprocessing_default':
|
|
default_proc_impl(fnlist)
|
|
elif parallelism == 'random':
|
|
ps = [thread_impl, spawn_proc_impl]
|
|
if _HAVE_OS_FORK:
|
|
ps.append(fork_proc_impl)
|
|
ps.append(forkserver_proc_impl)
|
|
|
|
random.shuffle(ps)
|
|
for impl in ps:
|
|
impl(fnlist)
|
|
else:
|
|
raise ValueError(
|
|
'Unknown parallelism supplied %s' % parallelism)
|
|
|
|
|
|
_specific_backends = config.THREADING_LAYER in ('omp', 'tbb', 'workqueue')
|
|
|
|
|
|
@unittest.skipUnless(_specific_backends, "Threading layer not explicit")
|
|
class TestParallelBackend(TestParallelBackendBase):
|
|
""" These are like the numba.tests.test_threadsafety tests but designed
|
|
instead to torture the parallel backend.
|
|
If a suitable backend is supplied via NUMBA_THREADING_LAYER these tests
|
|
can be run directly. This test class cannot be run using the multiprocessing
|
|
option to the test runner (i.e. `./runtests -m`) as daemon processes cannot
|
|
have children.
|
|
"""
|
|
|
|
# NOTE: All tests are generated based on what a platform supports concurrent
|
|
# execution wise from Python, irrespective of whether the native libraries
|
|
# can actually handle the behaviour present.
|
|
@classmethod
|
|
def generate(cls):
|
|
for p in cls.parallelism:
|
|
for name, impl in cls.runners.items():
|
|
methname = "test_" + p + '_' + name
|
|
|
|
def methgen(impl, p):
|
|
def test_method(self):
|
|
selfproc = multiprocessing.current_process()
|
|
# daemonized processes cannot have children
|
|
if selfproc.daemon:
|
|
_msg = 'daemonized processes cannot have children'
|
|
self.skipTest(_msg)
|
|
else:
|
|
self.run_compile(impl, parallelism=p)
|
|
return test_method
|
|
fn = methgen(impl, p)
|
|
fn.__name__ = methname
|
|
setattr(cls, methname, fn)
|
|
|
|
|
|
TestParallelBackend.generate()
|
|
|
|
|
|
class TestInSubprocess(object):
|
|
backends = {'tbb': skip_no_tbb,
|
|
'omp': skip_no_omp,
|
|
'workqueue': unittest.skipIf(False, '')}
|
|
|
|
def run_cmd(self, cmdline, env):
|
|
popen = subprocess.Popen(cmdline,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
env=env)
|
|
# finish in _TEST_TIMEOUT seconds or kill it
|
|
timeout = threading.Timer(_TEST_TIMEOUT, popen.kill)
|
|
try:
|
|
timeout.start()
|
|
out, err = popen.communicate()
|
|
if popen.returncode != 0:
|
|
raise AssertionError(
|
|
"process failed with code %s: stderr follows\n%s\n" %
|
|
(popen.returncode, err.decode()))
|
|
return out.decode(), err.decode()
|
|
finally:
|
|
timeout.cancel()
|
|
return None, None
|
|
|
|
def run_test_in_separate_process(self, test, threading_layer):
|
|
env_copy = os.environ.copy()
|
|
env_copy['NUMBA_THREADING_LAYER'] = str(threading_layer)
|
|
cmdline = [sys.executable, "-m", "numba.runtests", test]
|
|
return self.run_cmd(cmdline, env_copy)
|
|
|
|
|
|
class TestSpecificBackend(TestInSubprocess, TestParallelBackendBase):
|
|
"""
|
|
This is quite contrived, for each test in the TestParallelBackend tests it
|
|
generates a test that will run the TestParallelBackend test in a new python
|
|
process with an environment modified to ensure a specific threadsafe backend
|
|
is used. This is with view of testing the backends independently and in an
|
|
isolated manner such that if they hang/crash/have issues, it doesn't kill
|
|
the test suite.
|
|
"""
|
|
_DEBUG = False
|
|
|
|
@classmethod
|
|
def _inject(cls, p, name, backend, backend_guard):
|
|
themod = cls.__module__
|
|
thecls = TestParallelBackend.__name__
|
|
methname = "test_" + p + '_' + name
|
|
injected_method = '%s.%s.%s' % (themod, thecls, methname)
|
|
|
|
def test_template(self):
|
|
o, e = self.run_test_in_separate_process(injected_method, backend)
|
|
if self._DEBUG:
|
|
print('stdout:\n "%s"\n stderr:\n "%s"' % (o, e))
|
|
# If the test was skipped in the subprocess, then mark this as a
|
|
# skipped test.
|
|
m = re.search(r"\.\.\. skipped '(.*?)'", e)
|
|
if m is not None:
|
|
self.skipTest(m.group(1))
|
|
self.assertIn('OK', e)
|
|
self.assertTrue('FAIL' not in e)
|
|
self.assertTrue('ERROR' not in e)
|
|
injected_test = "test_%s_%s_%s" % (p, name, backend)
|
|
# Mark as long_running
|
|
setattr(cls, injected_test,
|
|
tag('long_running')(backend_guard(test_template)))
|
|
|
|
@classmethod
|
|
def generate(cls):
|
|
for backend, backend_guard in cls.backends.items():
|
|
for p in cls.parallelism:
|
|
for name in cls.runners.keys():
|
|
# handle known problem cases...
|
|
|
|
# GNU OpenMP is not fork safe
|
|
if (p in ('multiprocessing_fork', 'random') and
|
|
backend == 'omp' and
|
|
sys.platform.startswith('linux')):
|
|
continue
|
|
|
|
# workqueue is not thread safe
|
|
if (p in ('threading', 'random') and
|
|
backend == 'workqueue'):
|
|
continue
|
|
|
|
cls._inject(p, name, backend, backend_guard)
|
|
|
|
|
|
TestSpecificBackend.generate()
|
|
|
|
|
|
class ThreadLayerTestHelper(TestCase):
|
|
"""
|
|
Helper class for running an isolated piece of code based on a template
|
|
"""
|
|
# sys path injection and separate usecase module to make sure everything
|
|
# is importable by children of multiprocessing
|
|
_here = "%r" % os.path.dirname(__file__)
|
|
|
|
template = """if 1:
|
|
import sys
|
|
sys.path.insert(0, "%(here)r")
|
|
import multiprocessing
|
|
import numpy as np
|
|
from numba import njit
|
|
import numba
|
|
try:
|
|
import threading_backend_usecases
|
|
except ImportError as e:
|
|
print("DEBUG:", sys.path)
|
|
raise e
|
|
import os
|
|
|
|
sigterm_handler = threading_backend_usecases.sigterm_handler
|
|
busy_func = threading_backend_usecases.busy_func
|
|
|
|
def the_test():
|
|
%%s
|
|
|
|
if __name__ == "__main__":
|
|
the_test()
|
|
""" % {'here': _here}
|
|
|
|
def run_cmd(self, cmdline, env=None):
|
|
if env is None:
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = str("omp")
|
|
popen = subprocess.Popen(cmdline,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
env=env)
|
|
# finish in _TEST_TIMEOUT seconds or kill it
|
|
timeout = threading.Timer(_TEST_TIMEOUT, popen.kill)
|
|
try:
|
|
timeout.start()
|
|
out, err = popen.communicate()
|
|
if popen.returncode != 0:
|
|
raise AssertionError(
|
|
"process failed with code %s: stderr follows\n%s\n" %
|
|
(popen.returncode, err.decode()))
|
|
finally:
|
|
timeout.cancel()
|
|
return out.decode(), err.decode()
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
class TestThreadingLayerSelection(ThreadLayerTestHelper):
|
|
"""
|
|
Checks that numba.threading_layer() reports correctly.
|
|
"""
|
|
_DEBUG = False
|
|
|
|
backends = {'tbb': skip_no_tbb,
|
|
'omp': skip_no_omp,
|
|
'workqueue': unittest.skipIf(False, '')}
|
|
|
|
@classmethod
|
|
def _inject(cls, backend, backend_guard):
|
|
|
|
def test_template(self):
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
Z = busy_func(X, Y)
|
|
assert numba.threading_layer() == '%s'
|
|
"""
|
|
runme = self.template % (body % backend)
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = str(backend)
|
|
out, err = self.run_cmd(cmdline, env=env)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
injected_test = "test_threading_layer_selector_%s" % backend
|
|
setattr(cls, injected_test,
|
|
tag("important")(backend_guard(test_template)))
|
|
|
|
@classmethod
|
|
def generate(cls):
|
|
for backend, backend_guard in cls.backends.items():
|
|
cls._inject(backend, backend_guard)
|
|
|
|
|
|
TestThreadingLayerSelection.generate()
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
class TestThreadingLayerPriority(ThreadLayerTestHelper):
|
|
|
|
def each_env_var(self, env_var: str):
|
|
"""Test setting priority via env var NUMBA_THREADING_LAYER_PRIORITY.
|
|
"""
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = 'default'
|
|
env['NUMBA_THREADING_LAYER_PRIORITY'] = env_var
|
|
|
|
code = f"""
|
|
import numba
|
|
|
|
# trigger threading layer decision
|
|
# hence catching invalid THREADING_LAYER_PRIORITY
|
|
@numba.jit(
|
|
'float64[::1](float64[::1], float64[::1])',
|
|
nopython=True,
|
|
parallel=True,
|
|
)
|
|
def plus(x, y):
|
|
return x + y
|
|
|
|
captured_envvar = list("{env_var}".split())
|
|
assert numba.config.THREADING_LAYER_PRIORITY == \
|
|
captured_envvar, "priority mismatch"
|
|
assert numba.threading_layer() == captured_envvar[0],\
|
|
"selected backend mismatch"
|
|
"""
|
|
cmd = [
|
|
sys.executable,
|
|
'-c',
|
|
textwrap.dedent(code),
|
|
]
|
|
self.run_cmd(cmd, env=env)
|
|
|
|
@skip_no_omp
|
|
@skip_no_tbb
|
|
def test_valid_env_var(self):
|
|
default = ['tbb', 'omp', 'workqueue']
|
|
for p in itertools.permutations(default):
|
|
env_var = ' '.join(p)
|
|
self.each_env_var(env_var)
|
|
|
|
@skip_no_omp
|
|
@skip_no_tbb
|
|
def test_invalid_env_var(self):
|
|
env_var = 'tbb omp workqueue notvalidhere'
|
|
with self.assertRaises(AssertionError) as raises:
|
|
self.each_env_var(env_var)
|
|
for msg in (
|
|
"THREADING_LAYER_PRIORITY invalid:",
|
|
"It must be a permutation of"
|
|
):
|
|
self.assertIn(f"{msg}", str(raises.exception))
|
|
|
|
@skip_no_omp
|
|
def test_omp(self):
|
|
for env_var in ("omp tbb workqueue", "omp workqueue tbb"):
|
|
self.each_env_var(env_var)
|
|
|
|
@skip_no_tbb
|
|
def test_tbb(self):
|
|
for env_var in ("tbb omp workqueue", "tbb workqueue omp"):
|
|
self.each_env_var(env_var)
|
|
|
|
def test_workqueue(self):
|
|
for env_var in ("workqueue tbb omp", "workqueue omp tbb"):
|
|
self.each_env_var(env_var)
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
class TestMiscBackendIssues(ThreadLayerTestHelper):
|
|
"""
|
|
Checks fixes for the issues with threading backends implementation
|
|
"""
|
|
_DEBUG = False
|
|
|
|
@skip_no_omp
|
|
def test_omp_stack_overflow(self):
|
|
"""
|
|
Tests that OMP does not overflow stack
|
|
"""
|
|
runme = """if 1:
|
|
from numba import vectorize, threading_layer
|
|
import numpy as np
|
|
|
|
@vectorize(['f4(f4,f4,f4,f4,f4,f4,f4,f4)'], target='parallel')
|
|
def foo(a, b, c, d, e, f, g, h):
|
|
return a+b+c+d+e+f+g+h
|
|
|
|
x = np.ones(2**20, np.float32)
|
|
foo(*([x]*8))
|
|
assert threading_layer() == "omp", "omp not found"
|
|
"""
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = "omp"
|
|
env['OMP_STACKSIZE'] = "100K"
|
|
self.run_cmd(cmdline, env=env)
|
|
|
|
@skip_no_tbb
|
|
def test_single_thread_tbb(self):
|
|
"""
|
|
Tests that TBB works well with single thread
|
|
https://github.com/numba/numba/issues/3440
|
|
"""
|
|
runme = """if 1:
|
|
from numba import njit, prange, threading_layer
|
|
|
|
@njit(parallel=True)
|
|
def foo(n):
|
|
acc = 0
|
|
for i in prange(n):
|
|
acc += i
|
|
return acc
|
|
|
|
foo(100)
|
|
assert threading_layer() == "tbb", "tbb not found"
|
|
"""
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = "tbb"
|
|
env['NUMBA_NUM_THREADS'] = "1"
|
|
self.run_cmd(cmdline, env=env)
|
|
|
|
def test_workqueue_aborts_on_nested_parallelism(self):
|
|
"""
|
|
Tests workqueue raises sigabrt if a nested parallel call is performed
|
|
"""
|
|
runme = """if 1:
|
|
from numba import njit, prange
|
|
import numpy as np
|
|
|
|
@njit(parallel=True)
|
|
def nested(x):
|
|
for i in prange(len(x)):
|
|
x[i] += 1
|
|
|
|
|
|
@njit(parallel=True)
|
|
def main():
|
|
Z = np.zeros((5, 10))
|
|
for i in prange(Z.shape[0]):
|
|
nested(Z[i])
|
|
return Z
|
|
|
|
main()
|
|
"""
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = "workqueue"
|
|
env['NUMBA_NUM_THREADS'] = "4"
|
|
|
|
try:
|
|
out, err = self.run_cmd(cmdline, env=env)
|
|
except AssertionError as e:
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
e_msg = str(e)
|
|
self.assertIn("failed with code", e_msg)
|
|
# raised a SIGABRT, but the value is platform specific so just check
|
|
# the error message
|
|
expected = ("Numba workqueue threading layer is terminating: "
|
|
"Concurrent access has been detected.")
|
|
self.assertIn(expected, e_msg)
|
|
|
|
@unittest.skipUnless(_HAVE_OS_FORK, "Test needs fork(2)")
|
|
def test_workqueue_handles_fork_from_non_main_thread(self):
|
|
# For context see #7872, but essentially the multiprocessing pool
|
|
# implementation has a number of Python threads for handling the worker
|
|
# processes, one of which calls fork(2), this results in a fork from a
|
|
# non-main thread.
|
|
|
|
runme = """if 1:
|
|
from numba import njit, prange, threading_layer
|
|
import numpy as np
|
|
import multiprocessing
|
|
|
|
if __name__ == "__main__":
|
|
# Need for force fork context (OSX default is "spawn")
|
|
multiprocessing.set_start_method('fork')
|
|
|
|
@njit(parallel=True)
|
|
def func(x):
|
|
return 10. * x
|
|
|
|
arr = np.arange(2.)
|
|
|
|
# run in single process to start Numba's thread pool
|
|
np.testing.assert_allclose(func(arr), func.py_func(arr))
|
|
|
|
# now run in a multiprocessing pool to get a fork from a
|
|
# non-main thread
|
|
with multiprocessing.Pool(10) as p:
|
|
result = p.map(func, [arr])
|
|
np.testing.assert_allclose(result,
|
|
func.py_func(np.expand_dims(arr, 0)))
|
|
|
|
assert threading_layer() == "workqueue"
|
|
"""
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = "workqueue"
|
|
env['NUMBA_NUM_THREADS'] = "4"
|
|
|
|
self.run_cmd(cmdline, env=env)
|
|
|
|
|
|
# 32bit or windows py27 (not that this runs on windows)
|
|
@skip_parfors_unsupported
|
|
@skip_unless_gnu_omp
|
|
class TestForkSafetyIssues(ThreadLayerTestHelper):
|
|
"""
|
|
Checks Numba's behaviour in various situations involving GNU OpenMP and fork
|
|
"""
|
|
_DEBUG = False
|
|
|
|
def test_check_threading_layer_is_gnu(self):
|
|
runme = """if 1:
|
|
from numba.np.ufunc import omppool
|
|
assert omppool.openmp_vendor == 'GNU'
|
|
"""
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
|
|
def test_par_parent_os_fork_par_child(self):
|
|
"""
|
|
Whilst normally valid, this actually isn't for Numba invariant of OpenMP
|
|
Checks SIGABRT is received.
|
|
"""
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
Z = busy_func(X, Y)
|
|
pid = os.fork()
|
|
if pid == 0:
|
|
Z = busy_func(X, Y)
|
|
else:
|
|
os.wait()
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
try:
|
|
out, err = self.run_cmd(cmdline)
|
|
except AssertionError as e:
|
|
self.assertIn("failed with code -6", str(e))
|
|
|
|
def test_par_parent_implicit_mp_fork_par_child(self):
|
|
"""
|
|
Implicit use of multiprocessing fork context.
|
|
Does this:
|
|
1. Start with OpenMP
|
|
2. Fork to processes using OpenMP (this is invalid)
|
|
3. Joins fork
|
|
4. Check the exception pushed onto the queue that is a result of
|
|
catching SIGTERM coming from the C++ aborting on illegal fork
|
|
pattern for GNU OpenMP
|
|
"""
|
|
body = """if 1:
|
|
mp = multiprocessing.get_context('fork')
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
q = mp.Queue()
|
|
|
|
# Start OpenMP runtime on parent via parallel function
|
|
Z = busy_func(X, Y, q)
|
|
|
|
# fork() underneath with no exec, will abort
|
|
proc = mp.Process(target = busy_func, args=(X, Y, q))
|
|
proc.start()
|
|
|
|
err = q.get()
|
|
assert "Caught SIGTERM" in str(err)
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
|
|
@linux_only
|
|
def test_par_parent_explicit_mp_fork_par_child(self):
|
|
"""
|
|
Explicit use of multiprocessing fork context.
|
|
Does this:
|
|
1. Start with OpenMP
|
|
2. Fork to processes using OpenMP (this is invalid)
|
|
3. Joins fork
|
|
4. Check the exception pushed onto the queue that is a result of
|
|
catching SIGTERM coming from the C++ aborting on illegal fork
|
|
pattern for GNU OpenMP
|
|
"""
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
ctx = multiprocessing.get_context('fork')
|
|
q = ctx.Queue()
|
|
|
|
# Start OpenMP runtime on parent via parallel function
|
|
Z = busy_func(X, Y, q)
|
|
|
|
# fork() underneath with no exec, will abort
|
|
proc = ctx.Process(target = busy_func, args=(X, Y, q))
|
|
proc.start()
|
|
proc.join()
|
|
|
|
err = q.get()
|
|
assert "Caught SIGTERM" in str(err)
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
|
|
def test_par_parent_mp_spawn_par_child_par_parent(self):
|
|
"""
|
|
Explicit use of multiprocessing spawn, this is safe.
|
|
Does this:
|
|
1. Start with OpenMP
|
|
2. Spawn to processes using OpenMP
|
|
3. Join spawns
|
|
4. Run some more OpenMP
|
|
"""
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
ctx = multiprocessing.get_context('spawn')
|
|
q = ctx.Queue()
|
|
|
|
# Start OpenMP runtime and run on parent via parallel function
|
|
Z = busy_func(X, Y, q)
|
|
procs = []
|
|
for x in range(20): # start a lot to try and get overlap
|
|
## fork() + exec() to run some OpenMP on children
|
|
proc = ctx.Process(target = busy_func, args=(X, Y, q))
|
|
procs.append(proc)
|
|
sys.stdout.flush()
|
|
sys.stderr.flush()
|
|
proc.start()
|
|
|
|
[p.join() for p in procs]
|
|
|
|
try:
|
|
q.get(False)
|
|
except multiprocessing.queues.Empty:
|
|
pass
|
|
else:
|
|
raise RuntimeError("Queue was not empty")
|
|
|
|
# Run some more OpenMP on parent
|
|
Z = busy_func(X, Y, q)
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
|
|
def test_serial_parent_implicit_mp_fork_par_child_then_par_parent(self):
|
|
"""
|
|
Implicit use of multiprocessing (will be fork, but cannot declare that
|
|
in Py2.7 as there's no process launch context).
|
|
Does this:
|
|
1. Start with no OpenMP
|
|
2. Fork to processes using OpenMP
|
|
3. Join forks
|
|
4. Run some OpenMP
|
|
"""
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
q = multiprocessing.Queue()
|
|
|
|
# this is ok
|
|
procs = []
|
|
for x in range(10):
|
|
# fork() underneath with but no OpenMP in parent, this is ok
|
|
proc = multiprocessing.Process(target = busy_func,
|
|
args=(X, Y, q))
|
|
procs.append(proc)
|
|
proc.start()
|
|
|
|
[p.join() for p in procs]
|
|
|
|
# and this is still ok as the OpenMP happened in forks
|
|
Z = busy_func(X, Y, q)
|
|
try:
|
|
q.get(False)
|
|
except multiprocessing.queues.Empty:
|
|
pass
|
|
else:
|
|
raise RuntimeError("Queue was not empty")
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
|
|
@linux_only
|
|
def test_serial_parent_explicit_mp_fork_par_child_then_par_parent(self):
|
|
"""
|
|
Explicit use of multiprocessing 'fork'.
|
|
Does this:
|
|
1. Start with no OpenMP
|
|
2. Fork to processes using OpenMP
|
|
3. Join forks
|
|
4. Run some OpenMP
|
|
"""
|
|
body = """if 1:
|
|
X = np.arange(1000000.)
|
|
Y = np.arange(1000000.)
|
|
ctx = multiprocessing.get_context('fork')
|
|
q = ctx.Queue()
|
|
|
|
# this is ok
|
|
procs = []
|
|
for x in range(10):
|
|
# fork() underneath with but no OpenMP in parent, this is ok
|
|
proc = ctx.Process(target = busy_func, args=(X, Y, q))
|
|
procs.append(proc)
|
|
proc.start()
|
|
|
|
[p.join() for p in procs]
|
|
|
|
# and this is still ok as the OpenMP happened in forks
|
|
Z = busy_func(X, Y, q)
|
|
try:
|
|
q.get(False)
|
|
except multiprocessing.queues.Empty:
|
|
pass
|
|
else:
|
|
raise RuntimeError("Queue was not empty")
|
|
"""
|
|
runme = self.template % body
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print(out, err)
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
@skip_no_tbb
|
|
class TestTBBSpecificIssues(ThreadLayerTestHelper):
|
|
|
|
_DEBUG = False
|
|
|
|
@linux_only # os.fork required.
|
|
def test_fork_from_non_main_thread(self):
|
|
# See issue #5973 and PR #6208 for original context.
|
|
# See issue #6963 for context on the following comments:
|
|
#
|
|
# Important things to note:
|
|
# 1. Compilation of code containing an objmode block will result in the
|
|
# use of and `ObjModeLiftedWith` as the dispatcher. This inherits
|
|
# from `LiftedCode` which handles the serialization. In that
|
|
# serialization is a call to uuid.uuid1() which causes a fork_exec in
|
|
# CPython internals.
|
|
# 2. The selected parallel backend thread pool is started during the
|
|
# compilation of a function that has `parallel=True`.
|
|
# 3. The TBB backend can handle forks from the main thread, it will
|
|
# safely reinitialise after so doing. If a fork occurs from a
|
|
# non-main thread it will warn and the state is invalid in the child
|
|
# process.
|
|
#
|
|
# Due to 1. and 2. the `obj_mode_func` function separated out and is
|
|
# `njit` decorated. This means during type inference of `work` it will
|
|
# trigger a standard compilation of the function and the thread pools
|
|
# won't have started yet as the parallelisation compiler passes for
|
|
# `work` won't yet have run. This mitigates the fork() call from 1.
|
|
# occurring after 2. The result of this is that 3. can be tested using
|
|
# the threading etc herein with the state being known as the above
|
|
# described, i.e. the TBB threading layer has not experienced a fork().
|
|
|
|
runme = """if 1:
|
|
import threading
|
|
import numba
|
|
numba.config.THREADING_LAYER='tbb'
|
|
from numba import njit, prange, objmode
|
|
from numba.core.serialize import PickleCallableByPath
|
|
import os
|
|
|
|
e_running = threading.Event()
|
|
e_proceed = threading.Event()
|
|
|
|
def indirect_core():
|
|
e_running.set()
|
|
# wait for forker() to have forked
|
|
while not e_proceed.isSet():
|
|
pass
|
|
|
|
indirect = PickleCallableByPath(indirect_core)
|
|
|
|
@njit
|
|
def obj_mode_func():
|
|
with objmode():
|
|
indirect()
|
|
|
|
@njit(parallel=True, nogil=True)
|
|
def work():
|
|
acc = 0
|
|
for x in prange(10):
|
|
acc += x
|
|
obj_mode_func()
|
|
return acc
|
|
|
|
def runner():
|
|
work()
|
|
|
|
def forker():
|
|
# wait for the jit function to say it's running
|
|
while not e_running.isSet():
|
|
pass
|
|
# then fork
|
|
os.fork()
|
|
# now fork is done signal the runner to proceed to exit
|
|
e_proceed.set()
|
|
|
|
numba_runner = threading.Thread(target=runner,)
|
|
fork_runner = threading.Thread(target=forker,)
|
|
|
|
threads = (numba_runner, fork_runner)
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join()
|
|
"""
|
|
|
|
cmdline = [sys.executable, '-c', runme]
|
|
out, err = self.run_cmd(cmdline)
|
|
# assert error message printed on stderr
|
|
msg_head = "Attempted to fork from a non-main thread, the TBB library"
|
|
self.assertIn(msg_head, err)
|
|
|
|
if self._DEBUG:
|
|
print("OUT:", out)
|
|
print("ERR:", err)
|
|
|
|
@linux_only # fork required.
|
|
def test_lifetime_of_task_scheduler_handle(self):
|
|
|
|
self.skip_if_no_external_compiler() # external compiler needed
|
|
|
|
# See PR #7280 for context.
|
|
BROKEN_COMPILERS = 'SKIP: COMPILATION FAILED'
|
|
runme = """if 1:
|
|
import ctypes
|
|
import sys
|
|
import multiprocessing as mp
|
|
from tempfile import TemporaryDirectory, NamedTemporaryFile
|
|
from numba.pycc.platform import Toolchain, external_compiler_works
|
|
from numba import njit, prange, threading_layer
|
|
import faulthandler
|
|
faulthandler.enable()
|
|
if not external_compiler_works():
|
|
raise AssertionError('External compilers are not found.')
|
|
with TemporaryDirectory() as tmpdir:
|
|
with NamedTemporaryFile(dir=tmpdir) as tmpfile:
|
|
try:
|
|
src = \"\"\"
|
|
#define TBB_PREVIEW_WAITING_FOR_WORKERS 1
|
|
#include <tbb/tbb.h>
|
|
static tbb::task_scheduler_handle tsh;
|
|
extern "C"
|
|
{
|
|
void launch(void)
|
|
{
|
|
tsh = tbb::task_scheduler_handle::get();
|
|
}
|
|
}
|
|
\"\"\"
|
|
cxxfile = f"{tmpfile.name}.cxx"
|
|
with open(cxxfile, 'wt') as f:
|
|
f.write(src)
|
|
tc = Toolchain()
|
|
object_files = tc.compile_objects([cxxfile,],
|
|
output_dir=tmpdir)
|
|
dso_name = f"{tmpfile.name}.so"
|
|
tc.link_shared(dso_name, object_files,
|
|
libraries=['tbb',],
|
|
export_symbols=['launch'])
|
|
# Load into the process, it doesn't matter whether the
|
|
# DSO exists on disk once it's loaded in.
|
|
DLL = ctypes.CDLL(dso_name)
|
|
except Exception as e:
|
|
# Something is broken in compilation, could be one of
|
|
# many things including, but not limited to: missing tbb
|
|
# headers, incorrect permissions, compilers that don't
|
|
# work for the above
|
|
print(e)
|
|
print('BROKEN_COMPILERS')
|
|
sys.exit(0)
|
|
|
|
# Do the test, launch this library and also execute a
|
|
# function with the TBB threading layer.
|
|
|
|
DLL.launch()
|
|
|
|
@njit(parallel=True)
|
|
def foo(n):
|
|
acc = 0
|
|
for i in prange(n):
|
|
acc += i
|
|
return acc
|
|
|
|
foo(1)
|
|
|
|
# Check the threading layer used was TBB
|
|
assert threading_layer() == 'tbb'
|
|
|
|
# Use mp context for a controlled version of fork, this triggers the
|
|
# reported bug.
|
|
|
|
ctx = mp.get_context('fork')
|
|
def nowork():
|
|
pass
|
|
p = ctx.Process(target=nowork)
|
|
p.start()
|
|
p.join(10)
|
|
print("SUCCESS")
|
|
""".replace('BROKEN_COMPILERS', BROKEN_COMPILERS)
|
|
|
|
cmdline = [sys.executable, '-c', runme]
|
|
env = os.environ.copy()
|
|
env['NUMBA_THREADING_LAYER'] = 'tbb'
|
|
out, err = self.run_cmd(cmdline, env=env)
|
|
|
|
if BROKEN_COMPILERS in out:
|
|
self.skipTest("Compilation of DSO failed. Check output for details")
|
|
else:
|
|
self.assertIn("SUCCESS", out)
|
|
|
|
if self._DEBUG:
|
|
print("OUT:", out)
|
|
print("ERR:", err)
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
class TestInitSafetyIssues(TestCase):
|
|
|
|
_DEBUG = False
|
|
|
|
def run_cmd(self, cmdline):
|
|
popen = subprocess.Popen(cmdline,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,)
|
|
# finish in _TEST_TIMEOUT seconds or kill it
|
|
timeout = threading.Timer(_TEST_TIMEOUT, popen.kill)
|
|
try:
|
|
timeout.start()
|
|
out, err = popen.communicate()
|
|
if popen.returncode != 0:
|
|
raise AssertionError(
|
|
"process failed with code %s: stderr follows\n%s\n" %
|
|
(popen.returncode, err.decode()))
|
|
finally:
|
|
timeout.cancel()
|
|
return out.decode(), err.decode()
|
|
|
|
@linux_only # only linux can leak semaphores
|
|
def test_orphaned_semaphore(self):
|
|
# sys path injection and separate usecase module to make sure everything
|
|
# is importable by children of multiprocessing
|
|
|
|
test_file = os.path.join(os.path.dirname(__file__),
|
|
"orphaned_semaphore_usecase.py")
|
|
cmdline = [sys.executable, test_file]
|
|
out, err = self.run_cmd(cmdline)
|
|
|
|
# assert no semaphore leaks reported on stderr
|
|
self.assertNotIn("leaked semaphore", err)
|
|
|
|
if self._DEBUG:
|
|
print("OUT:", out)
|
|
print("ERR:", err)
|
|
|
|
def test_lazy_lock_init(self):
|
|
# checks based on https://github.com/numba/numba/pull/5724
|
|
# looking for "lazy" process lock initialisation so as to avoid setting
|
|
# a multiprocessing context as part of import.
|
|
for meth in ('fork', 'spawn', 'forkserver'):
|
|
# if a context is available on the host check it can be set as the
|
|
# start method in a separate process
|
|
try:
|
|
multiprocessing.get_context(meth)
|
|
except ValueError:
|
|
continue
|
|
cmd = ("import numba; import multiprocessing;"
|
|
"multiprocessing.set_start_method('{}');"
|
|
"print(multiprocessing.get_context().get_start_method())")
|
|
cmdline = [sys.executable, "-c", cmd.format(meth)]
|
|
out, err = self.run_cmd(cmdline)
|
|
if self._DEBUG:
|
|
print("OUT:", out)
|
|
print("ERR:", err)
|
|
self.assertIn(meth, out)
|
|
|
|
|
|
@skip_parfors_unsupported
|
|
@skip_no_omp
|
|
class TestOpenMPVendors(TestCase):
|
|
|
|
def test_vendors(self):
|
|
"""
|
|
Checks the OpenMP vendor strings are correct
|
|
"""
|
|
expected = dict()
|
|
expected['win32'] = "MS"
|
|
expected['darwin'] = "Intel"
|
|
expected['linux'] = "GNU"
|
|
|
|
# only check OS that are supported, custom toolchains may well work as
|
|
# may other OS
|
|
for k in expected.keys():
|
|
if sys.platform.startswith(k):
|
|
self.assertEqual(expected[k], omppool.openmp_vendor)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|