183 lines
5.8 KiB
Python
183 lines
5.8 KiB
Python
|
import ctypes
|
||
|
import ctypes.util
|
||
|
import os
|
||
|
import sys
|
||
|
import threading
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
import unittest
|
||
|
from numba import jit
|
||
|
from numba.core import errors
|
||
|
from numba.tests.support import TestCase, tag
|
||
|
|
||
|
|
||
|
# This CPython API function is a portable way to get the current thread id.
|
||
|
PyThread_get_thread_ident = ctypes.pythonapi.PyThread_get_thread_ident
|
||
|
PyThread_get_thread_ident.restype = ctypes.c_long
|
||
|
PyThread_get_thread_ident.argtypes = []
|
||
|
|
||
|
# A way of sleeping from nopython code
|
||
|
if os.name == 'nt':
|
||
|
sleep = ctypes.windll.kernel32.Sleep
|
||
|
sleep.argtypes = [ctypes.c_uint]
|
||
|
sleep.restype = None
|
||
|
sleep_factor = 1 # milliseconds
|
||
|
else:
|
||
|
sleep = ctypes.CDLL(ctypes.util.find_library("c")).usleep
|
||
|
sleep.argtypes = [ctypes.c_uint]
|
||
|
sleep.restype = ctypes.c_int
|
||
|
sleep_factor = 1000 # microseconds
|
||
|
|
||
|
|
||
|
def f(a, indices):
|
||
|
# If run from one thread at a time, the function will always fill the
|
||
|
# array with identical values.
|
||
|
# If run from several threads at a time, the function will probably
|
||
|
# fill the array with differing values.
|
||
|
for idx in indices:
|
||
|
# Let another thread run
|
||
|
sleep(10 * sleep_factor)
|
||
|
a[idx] = PyThread_get_thread_ident()
|
||
|
|
||
|
f_sig = "void(int64[:], intp[:])"
|
||
|
|
||
|
def lifted_f(a, indices):
|
||
|
"""
|
||
|
Same as f(), but inside a lifted loop
|
||
|
"""
|
||
|
object() # Force object mode
|
||
|
for idx in indices:
|
||
|
# Let another thread run
|
||
|
sleep(10 * sleep_factor)
|
||
|
a[idx] = PyThread_get_thread_ident()
|
||
|
|
||
|
def object_f(a, indices):
|
||
|
"""
|
||
|
Same as f(), but in object mode
|
||
|
"""
|
||
|
for idx in indices:
|
||
|
# Let another thread run
|
||
|
sleep(10 * sleep_factor)
|
||
|
object() # Force object mode
|
||
|
a[idx] = PyThread_get_thread_ident()
|
||
|
|
||
|
|
||
|
class TestGILRelease(TestCase):
|
||
|
|
||
|
def make_test_array(self, n_members):
|
||
|
return np.arange(n_members, dtype=np.int64)
|
||
|
|
||
|
def run_in_threads(self, func, n_threads):
|
||
|
# Run the function in parallel over an array and collect results.
|
||
|
threads = []
|
||
|
# Warm up compilation, since we don't want that to interfere with
|
||
|
# the test proper.
|
||
|
func(self.make_test_array(1), np.arange(1, dtype=np.intp))
|
||
|
arr = self.make_test_array(50)
|
||
|
for i in range(n_threads):
|
||
|
# Ensure different threads write into the array in different
|
||
|
# orders.
|
||
|
indices = np.arange(arr.size, dtype=np.intp)
|
||
|
np.random.shuffle(indices)
|
||
|
t = threading.Thread(target=func, args=(arr, indices))
|
||
|
threads.append(t)
|
||
|
for t in threads:
|
||
|
t.start()
|
||
|
for t in threads:
|
||
|
t.join()
|
||
|
return arr
|
||
|
|
||
|
def check_gil_held(self, func):
|
||
|
arr = self.run_in_threads(func, n_threads=4)
|
||
|
distinct = set(arr)
|
||
|
self.assertEqual(len(distinct), 1, distinct)
|
||
|
|
||
|
def check_gil_released(self, func):
|
||
|
for n_threads in (4, 12, 32):
|
||
|
# Try harder each time. On an empty machine 4 threads seems
|
||
|
# sufficient, but in some contexts (e.g. Travis CI) we need more.
|
||
|
arr = self.run_in_threads(func, n_threads)
|
||
|
distinct = set(arr)
|
||
|
try:
|
||
|
self.assertGreater(len(distinct), 1, distinct)
|
||
|
except AssertionError as e:
|
||
|
failure = e
|
||
|
else:
|
||
|
return
|
||
|
raise failure
|
||
|
|
||
|
def test_gil_held(self):
|
||
|
"""
|
||
|
Test the GIL is held by default, by checking serialized runs
|
||
|
produce deterministic results.
|
||
|
"""
|
||
|
cfunc = jit(f_sig, nopython=True)(f)
|
||
|
self.check_gil_held(cfunc)
|
||
|
|
||
|
def test_gil_released(self):
|
||
|
"""
|
||
|
Test releasing the GIL, by checking parallel runs produce
|
||
|
unpredictable results.
|
||
|
"""
|
||
|
cfunc = jit(f_sig, nopython=True, nogil=True)(f)
|
||
|
self.check_gil_released(cfunc)
|
||
|
|
||
|
def test_gil_released_inside_lifted_loop(self):
|
||
|
"""
|
||
|
Test the GIL can by released by a lifted loop even though the
|
||
|
surrounding code uses object mode.
|
||
|
"""
|
||
|
cfunc = jit(f_sig, forceobj=True, nogil=True)(lifted_f)
|
||
|
self.check_gil_released(cfunc)
|
||
|
|
||
|
def test_gil_released_by_caller(self):
|
||
|
"""
|
||
|
Releasing the GIL in the caller is sufficient to have it
|
||
|
released in a callee.
|
||
|
"""
|
||
|
compiled_f = jit(f_sig, nopython=True)(f)
|
||
|
@jit(f_sig, nopython=True, nogil=True)
|
||
|
def caller(a, i):
|
||
|
compiled_f(a, i)
|
||
|
self.check_gil_released(caller)
|
||
|
|
||
|
def test_gil_released_by_caller_and_callee(self):
|
||
|
"""
|
||
|
Same, but with both caller and callee asking to release the GIL.
|
||
|
"""
|
||
|
compiled_f = jit(f_sig, nopython=True, nogil=True)(f)
|
||
|
@jit(f_sig, nopython=True, nogil=True)
|
||
|
def caller(a, i):
|
||
|
compiled_f(a, i)
|
||
|
self.check_gil_released(caller)
|
||
|
|
||
|
def test_gil_ignored_by_callee(self):
|
||
|
"""
|
||
|
When only the callee asks to release the GIL, it gets ignored.
|
||
|
"""
|
||
|
compiled_f = jit(f_sig, nopython=True, nogil=True)(f)
|
||
|
@jit(f_sig, nopython=True)
|
||
|
def caller(a, i):
|
||
|
compiled_f(a, i)
|
||
|
self.check_gil_held(caller)
|
||
|
|
||
|
def test_object_mode(self):
|
||
|
"""
|
||
|
When the function is compiled in object mode, a warning is
|
||
|
printed out.
|
||
|
"""
|
||
|
with warnings.catch_warnings(record=True) as wlist:
|
||
|
warnings.simplefilter('always', errors.NumbaWarning)
|
||
|
cfunc = jit(f_sig, forceobj=True, nogil=True)(object_f)
|
||
|
self.assertTrue(any(w.category is errors.NumbaWarning
|
||
|
and "Code running in object mode won't allow parallel execution" in str(w.message)
|
||
|
for w in wlist), wlist)
|
||
|
# Just check it doesn't crash.
|
||
|
self.run_in_threads(cfunc, 2)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|