4904 lines
160 KiB
Python
4904 lines
160 KiB
Python
|
#
|
||
|
# Copyright (c) 2017 Intel Corporation
|
||
|
# SPDX-License-Identifier: BSD-2-Clause
|
||
|
#
|
||
|
|
||
|
|
||
|
import math
|
||
|
import os
|
||
|
import re
|
||
|
import dis
|
||
|
import numbers
|
||
|
import platform
|
||
|
import sys
|
||
|
import subprocess
|
||
|
import types as pytypes
|
||
|
import warnings
|
||
|
from functools import reduce
|
||
|
import numpy as np
|
||
|
from numpy.random import randn
|
||
|
import operator
|
||
|
from collections import defaultdict, namedtuple
|
||
|
import copy
|
||
|
from itertools import cycle, chain
|
||
|
import subprocess as subp
|
||
|
|
||
|
import numba.parfors.parfor
|
||
|
from numba import (njit, prange, parallel_chunksize,
|
||
|
get_parallel_chunksize, set_parallel_chunksize,
|
||
|
set_num_threads, get_num_threads, typeof)
|
||
|
from numba.core import (types, errors, ir, rewrites,
|
||
|
typed_passes, inline_closurecall, config, compiler, cpu)
|
||
|
from numba.extending import (overload_method, register_model,
|
||
|
typeof_impl, unbox, NativeValue, models)
|
||
|
from numba.core.registry import cpu_target
|
||
|
from numba.core.annotations import type_annotations
|
||
|
from numba.core.ir_utils import (find_callname, guard, build_definitions,
|
||
|
get_definition, is_getitem, is_setitem,
|
||
|
index_var_of_get_setitem)
|
||
|
from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty
|
||
|
from numba.core.compiler import (CompilerBase, DefaultPassBuilder)
|
||
|
from numba.core.compiler_machinery import register_pass, AnalysisPass
|
||
|
from numba.core.typed_passes import IRLegalization
|
||
|
from numba.tests.support import (TestCase, captured_stdout, MemoryLeakMixin,
|
||
|
override_env_config, linux_only, tag,
|
||
|
skip_parfors_unsupported, _32bit, needs_blas,
|
||
|
needs_lapack, disabled_test, skip_unless_scipy,
|
||
|
needs_subprocess,
|
||
|
skip_ppc64le_invalid_ctr_loop)
|
||
|
from numba.core.extending import register_jitable
|
||
|
from numba.core.bytecode import _fix_LOAD_GLOBAL_arg
|
||
|
from numba.core import utils
|
||
|
|
||
|
import cmath
|
||
|
import unittest
|
||
|
|
||
|
# NOTE: Each parfors test class is run in separate subprocess, this is to reduce
|
||
|
# memory pressure in CI settings. The environment variable "SUBPROC_TEST" is
|
||
|
# used to determine whether a test is skipped or not, such that if you want to
|
||
|
# run any parfors test directly this environment variable can be set. The
|
||
|
# subprocesses running the test classes set this environment variable as the new
|
||
|
# process starts which enables the tests within the process. The decorator
|
||
|
# @needs_subprocess is used to ensure the appropriate test skips are made.
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsRunner(TestCase):
|
||
|
|
||
|
_numba_parallel_test_ = False
|
||
|
|
||
|
# Each test class can run for 30 minutes before time out. Extend this to an
|
||
|
# hour on aarch64 (some public CI systems were timing out).
|
||
|
_TIMEOUT = 1800 if platform.machine() != 'aarch64' else 3600
|
||
|
|
||
|
"""This is the test runner for all the parfors tests, it runs them in
|
||
|
subprocesses as described above. The convention for the test method naming
|
||
|
is: `test_<TestClass>` where <TestClass> is the name of the test class in
|
||
|
this module.
|
||
|
"""
|
||
|
def runner(self):
|
||
|
themod = self.__module__
|
||
|
test_clazz_name = self.id().split('.')[-1].split('_')[-1]
|
||
|
# don't specify a given test, it's an entire class that needs running
|
||
|
self.subprocess_test_runner(test_module=themod,
|
||
|
test_class=test_clazz_name,
|
||
|
timeout=self._TIMEOUT)
|
||
|
|
||
|
def test_TestParforBasic(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforNumericalMisc(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforNumPy(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParfors(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsBitMask(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsDiagnostics(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsLeaks(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsMisc(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsOptions(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsSlice(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestParforsVectorizer(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestPrangeBasic(self):
|
||
|
self.runner()
|
||
|
|
||
|
def test_TestPrangeSpecific(self):
|
||
|
self.runner()
|
||
|
|
||
|
|
||
|
x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test')
|
||
|
|
||
|
_GLOBAL_INT_FOR_TESTING1 = 17
|
||
|
_GLOBAL_INT_FOR_TESTING2 = 5
|
||
|
|
||
|
TestNamedTuple = namedtuple('TestNamedTuple', ('part0', 'part1'))
|
||
|
|
||
|
|
||
|
def null_comparer(a, b):
|
||
|
"""
|
||
|
Used with check_arq_equality to indicate that we do not care
|
||
|
whether the value of the parameter at the end of the function
|
||
|
has a particular value.
|
||
|
"""
|
||
|
pass
|
||
|
|
||
|
|
||
|
@needs_subprocess
|
||
|
class TestParforsBase(TestCase):
|
||
|
"""
|
||
|
Base class for testing parfors.
|
||
|
Provides functions for compilation and three way comparison between
|
||
|
python functions, njit'd functions and parfor njit'd functions.
|
||
|
"""
|
||
|
|
||
|
_numba_parallel_test_ = False
|
||
|
|
||
|
def _compile_this(self, func, sig, **flags):
|
||
|
# This method originally used `compile_isolated` which returns a
|
||
|
# "CompileResult", hence this does the same.
|
||
|
return njit(sig, **flags)(func).overloads[sig]
|
||
|
|
||
|
def compile_parallel(self, func, sig):
|
||
|
return self._compile_this(func, sig, parallel=True)
|
||
|
|
||
|
def compile_parallel_fastmath(self, func, sig):
|
||
|
return self._compile_this(func, sig, parallel=True, fastmath=True)
|
||
|
|
||
|
def compile_njit(self, func, sig):
|
||
|
return self._compile_this(func, sig)
|
||
|
|
||
|
def compile_all(self, pyfunc, *args, **kwargs):
|
||
|
sig = tuple([numba.typeof(x) for x in args])
|
||
|
|
||
|
# compile the prange injected function
|
||
|
cpfunc = self.compile_parallel(pyfunc, sig)
|
||
|
|
||
|
# compile a standard njit of the original function
|
||
|
cfunc = self.compile_njit(pyfunc, sig)
|
||
|
|
||
|
return cfunc, cpfunc
|
||
|
|
||
|
def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs):
|
||
|
"""
|
||
|
Checks python, njit and parfor impls produce the same result.
|
||
|
|
||
|
Arguments:
|
||
|
pyfunc - the python function to test
|
||
|
cfunc - CompilerResult from njit of pyfunc
|
||
|
cpfunc - CompilerResult from njit(parallel=True) of pyfunc
|
||
|
args - arguments for the function being tested
|
||
|
Keyword Arguments:
|
||
|
scheduler_type - 'signed', 'unsigned' or None, default is None.
|
||
|
Supply in cases where the presence of a specific
|
||
|
scheduler is to be asserted.
|
||
|
fastmath_pcres - a fastmath parallel compile result, if supplied
|
||
|
will be run to make sure the result is correct
|
||
|
check_arg_equality - some functions need to check that a
|
||
|
parameter is modified rather than a certain
|
||
|
value returned. If this keyword argument
|
||
|
is supplied, it should be a list of
|
||
|
comparison functions such that the i'th
|
||
|
function in the list is used to compare the
|
||
|
i'th parameter of the njit and parallel=True
|
||
|
functions against the i'th parameter of the
|
||
|
standard Python function, asserting if they
|
||
|
differ. The length of this list must be equal
|
||
|
to the number of parameters to the function.
|
||
|
The null comparator is available for use
|
||
|
when you do not desire to test if some
|
||
|
particular parameter is changed.
|
||
|
Remaining kwargs are passed to np.testing.assert_almost_equal
|
||
|
"""
|
||
|
scheduler_type = kwargs.pop('scheduler_type', None)
|
||
|
check_fastmath = kwargs.pop('check_fastmath', None)
|
||
|
fastmath_pcres = kwargs.pop('fastmath_pcres', None)
|
||
|
check_scheduling = kwargs.pop('check_scheduling', True)
|
||
|
check_args_for_equality = kwargs.pop('check_arg_equality', None)
|
||
|
|
||
|
def copy_args(*args):
|
||
|
if not args:
|
||
|
return tuple()
|
||
|
new_args = []
|
||
|
for x in args:
|
||
|
if isinstance(x, np.ndarray):
|
||
|
new_args.append(x.copy('k'))
|
||
|
elif isinstance(x, np.number):
|
||
|
new_args.append(x.copy())
|
||
|
elif isinstance(x, numbers.Number):
|
||
|
new_args.append(x)
|
||
|
elif x is None:
|
||
|
new_args.append(x)
|
||
|
elif isinstance(x, tuple):
|
||
|
new_args.append(copy.deepcopy(x))
|
||
|
elif isinstance(x, list):
|
||
|
new_args.append(x[:])
|
||
|
else:
|
||
|
raise ValueError('Unsupported argument type encountered')
|
||
|
return tuple(new_args)
|
||
|
|
||
|
# python result
|
||
|
py_args = copy_args(*args)
|
||
|
py_expected = pyfunc(*py_args)
|
||
|
|
||
|
# njit result
|
||
|
njit_args = copy_args(*args)
|
||
|
njit_output = cfunc.entry_point(*njit_args)
|
||
|
|
||
|
# parfor result
|
||
|
parfor_args = copy_args(*args)
|
||
|
parfor_output = cpfunc.entry_point(*parfor_args)
|
||
|
|
||
|
if check_args_for_equality is None:
|
||
|
np.testing.assert_almost_equal(njit_output, py_expected, **kwargs)
|
||
|
np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs)
|
||
|
self.assertEqual(type(njit_output), type(parfor_output))
|
||
|
else:
|
||
|
assert(len(py_args) == len(check_args_for_equality))
|
||
|
for pyarg, njitarg, parforarg, argcomp in zip(
|
||
|
py_args, njit_args, parfor_args, check_args_for_equality):
|
||
|
argcomp(njitarg, pyarg, **kwargs)
|
||
|
argcomp(parforarg, pyarg, **kwargs)
|
||
|
|
||
|
if check_scheduling:
|
||
|
self.check_scheduling(cpfunc, scheduler_type)
|
||
|
|
||
|
# if requested check fastmath variant
|
||
|
if fastmath_pcres is not None:
|
||
|
parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args))
|
||
|
np.testing.assert_almost_equal(parfor_fastmath_output, py_expected,
|
||
|
**kwargs)
|
||
|
|
||
|
def check(self, pyfunc, *args, **kwargs):
|
||
|
"""Checks that pyfunc compiles for *args under parallel=True and njit
|
||
|
and asserts that all version execute and produce the same result"""
|
||
|
cfunc, cpfunc = self.compile_all(pyfunc, *args)
|
||
|
self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
|
||
|
|
||
|
def check_variants(self, impl, arg_gen, **kwargs):
|
||
|
"""Run self.check(impl, ...) on array data generated from arg_gen.
|
||
|
"""
|
||
|
for args in arg_gen():
|
||
|
with self.subTest(list(map(typeof, args))):
|
||
|
self.check(impl, *args, **kwargs)
|
||
|
|
||
|
def count_parfors_variants(self, impl, arg_gen, **kwargs):
|
||
|
"""Run self.countParfors(impl, ...) on array types generated from
|
||
|
arg_gen.
|
||
|
"""
|
||
|
for args in arg_gen():
|
||
|
with self.subTest(list(map(typeof, args))):
|
||
|
argtys = tuple(map(typeof, args))
|
||
|
# At least one parfors
|
||
|
self.assertGreaterEqual(countParfors(impl, argtys), 1)
|
||
|
|
||
|
def check_scheduling(self, cres, scheduler_type):
|
||
|
# make sure parfor set up scheduling
|
||
|
scheduler_str = '@do_scheduling'
|
||
|
if scheduler_type is not None:
|
||
|
if scheduler_type in ['signed', 'unsigned']:
|
||
|
scheduler_str += '_' + scheduler_type
|
||
|
else:
|
||
|
msg = "Unknown scheduler_type specified: %s"
|
||
|
raise ValueError(msg % scheduler_type)
|
||
|
|
||
|
self.assertIn(scheduler_str, cres.library.get_llvm_str())
|
||
|
|
||
|
def gen_linspace(self, n, ct):
|
||
|
"""Make *ct* sample 1D arrays of length *n* using np.linspace().
|
||
|
"""
|
||
|
def gen():
|
||
|
yield np.linspace(0, 1, n)
|
||
|
yield np.linspace(2, 1, n)
|
||
|
yield np.linspace(1, 2, n)
|
||
|
|
||
|
src = cycle(gen())
|
||
|
return [next(src) for i in range(ct)]
|
||
|
|
||
|
def gen_linspace_variants(self, ct):
|
||
|
"""Make 1D, 2D, 3D variants of the data in C and F orders
|
||
|
"""
|
||
|
# 1D
|
||
|
yield self.gen_linspace(10, ct=ct)
|
||
|
|
||
|
# 2D
|
||
|
arr2ds = [x.reshape((2, 3))
|
||
|
for x in self.gen_linspace(n=2 * 3, ct=ct)]
|
||
|
yield arr2ds
|
||
|
# Fortran order
|
||
|
yield [np.asfortranarray(x) for x in arr2ds]
|
||
|
|
||
|
# 3D
|
||
|
arr3ds = [x.reshape((2, 3, 4))
|
||
|
for x in self.gen_linspace(n=2 * 3 * 4, ct=ct)]
|
||
|
yield arr3ds
|
||
|
# Fortran order
|
||
|
yield [np.asfortranarray(x) for x in arr3ds]
|
||
|
|
||
|
def _filter_mod(self, mod, magicstr, checkstr=None):
|
||
|
""" helper function to filter out modules by name"""
|
||
|
filt = [x for x in mod if magicstr in x.name]
|
||
|
if checkstr is not None:
|
||
|
for x in filt:
|
||
|
assert checkstr in str(x)
|
||
|
return filt
|
||
|
|
||
|
def _get_gufunc_modules(self, cres, magicstr, checkstr=None):
|
||
|
""" gets the gufunc LLVM Modules"""
|
||
|
_modules = [x for x in cres.library._codegen._engine._ee._modules]
|
||
|
# make sure to only use modules that are actually used by cres and
|
||
|
# aren't just in the EE by virtue of shared compilation context.
|
||
|
potential_matches = self._filter_mod(_modules, magicstr,
|
||
|
checkstr=checkstr)
|
||
|
|
||
|
lib_asm = cres.library.get_asm_str()
|
||
|
ret = []
|
||
|
for mod in potential_matches:
|
||
|
if mod.name in lib_asm:
|
||
|
ret.append(mod)
|
||
|
return ret
|
||
|
|
||
|
def _get_gufunc_info(self, cres, fn):
|
||
|
""" helper for gufunc IR/asm generation"""
|
||
|
# get the gufunc modules
|
||
|
magicstr = '__numba_parfor_gufunc'
|
||
|
gufunc_mods = self._get_gufunc_modules(cres, magicstr)
|
||
|
x = dict()
|
||
|
for mod in gufunc_mods:
|
||
|
x[mod.name] = fn(mod)
|
||
|
return x
|
||
|
|
||
|
def _get_gufunc_ir(self, cres):
|
||
|
"""
|
||
|
Returns the IR of the gufuncs used as parfor kernels
|
||
|
as a dict mapping the gufunc name to its IR.
|
||
|
|
||
|
Arguments:
|
||
|
cres - a CompileResult from `njit(parallel=True, ...)`
|
||
|
"""
|
||
|
return self._get_gufunc_info(cres, str)
|
||
|
|
||
|
def _get_gufunc_asm(self, cres):
|
||
|
"""
|
||
|
Returns the assembly of the gufuncs used as parfor kernels
|
||
|
as a dict mapping the gufunc name to its assembly.
|
||
|
|
||
|
Arguments:
|
||
|
cres - a CompileResult from `njit(parallel=True, ...)`
|
||
|
"""
|
||
|
tm = cres.library._codegen._tm
|
||
|
def emit_asm(mod):
|
||
|
return str(tm.emit_assembly(mod))
|
||
|
return self._get_gufunc_info(cres, emit_asm)
|
||
|
|
||
|
def assert_fastmath(self, pyfunc, sig):
|
||
|
"""
|
||
|
Asserts that the fastmath flag has some effect in that suitable
|
||
|
instructions are now labelled as `fast`. Whether LLVM can actually do
|
||
|
anything to optimise better now the derestrictions are supplied is
|
||
|
another matter!
|
||
|
|
||
|
Arguments:
|
||
|
pyfunc - a function that contains operations with parallel semantics
|
||
|
sig - the type signature of pyfunc
|
||
|
"""
|
||
|
|
||
|
cres = self.compile_parallel_fastmath(pyfunc, sig)
|
||
|
_ir = self._get_gufunc_ir(cres)
|
||
|
|
||
|
def _get_fast_instructions(ir):
|
||
|
splitted = ir.splitlines()
|
||
|
fast_inst = []
|
||
|
for x in splitted:
|
||
|
m = re.search(r'\bfast\b', x) # \b for wholeword
|
||
|
if m is not None:
|
||
|
fast_inst.append(x)
|
||
|
return fast_inst
|
||
|
|
||
|
def _assert_fast(instrs):
|
||
|
ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call')
|
||
|
for inst in instrs:
|
||
|
count = 0
|
||
|
for op in ops:
|
||
|
match = op + ' fast'
|
||
|
if match in inst:
|
||
|
count += 1
|
||
|
self.assertTrue(count > 0)
|
||
|
|
||
|
for name, guir in _ir.items():
|
||
|
inst = _get_fast_instructions(guir)
|
||
|
_assert_fast(inst)
|
||
|
|
||
|
|
||
|
def blackscholes_impl(sptprice, strike, rate, volatility, timev):
|
||
|
# blackscholes example
|
||
|
logterm = np.log(sptprice / strike)
|
||
|
powterm = 0.5 * volatility * volatility
|
||
|
den = volatility * np.sqrt(timev)
|
||
|
d1 = (((rate + powterm) * timev) + logterm) / den
|
||
|
d2 = d1 - den
|
||
|
NofXd1 = 0.5 + 0.5 * 2.0 * d1
|
||
|
NofXd2 = 0.5 + 0.5 * 2.0 * d2
|
||
|
futureValue = strike * np.exp(- rate * timev)
|
||
|
c1 = futureValue * NofXd2
|
||
|
call = sptprice * NofXd1 - c1
|
||
|
put = call - futureValue + sptprice
|
||
|
return put
|
||
|
|
||
|
|
||
|
def lr_impl(Y, X, w, iterations):
|
||
|
# logistic regression example
|
||
|
for i in range(iterations):
|
||
|
w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
|
||
|
return w
|
||
|
|
||
|
def example_kmeans_test(A, numCenter, numIter, init_centroids):
|
||
|
centroids = init_centroids
|
||
|
N, D = A.shape
|
||
|
|
||
|
for l in range(numIter):
|
||
|
dist = np.array([[math.sqrt(np.sum((A[i,:]-centroids[j,:])**2))
|
||
|
for j in range(numCenter)] for i in range(N)])
|
||
|
labels = np.array([dist[i,:].argmin() for i in range(N)])
|
||
|
|
||
|
centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i)
|
||
|
for j in range(D)] for i in range(numCenter)])
|
||
|
|
||
|
return centroids
|
||
|
|
||
|
def get_optimized_numba_ir(test_func, args, **kws):
|
||
|
typingctx = cpu_target.typing_context
|
||
|
targetctx = cpu_target.target_context
|
||
|
test_ir = compiler.run_frontend(test_func)
|
||
|
if kws:
|
||
|
options = cpu.ParallelOptions(kws)
|
||
|
else:
|
||
|
options = cpu.ParallelOptions(True)
|
||
|
|
||
|
tp = TestPipeline(typingctx, targetctx, args, test_ir)
|
||
|
|
||
|
typingctx.refresh()
|
||
|
targetctx.refresh()
|
||
|
|
||
|
inline_pass = inline_closurecall.InlineClosureCallPass(tp.state.func_ir,
|
||
|
options,
|
||
|
typed=True)
|
||
|
inline_pass.run()
|
||
|
|
||
|
rewrites.rewrite_registry.apply('before-inference', tp.state)
|
||
|
|
||
|
tp.state.typemap, tp.state.return_type, tp.state.calltypes, _ = \
|
||
|
typed_passes.type_inference_stage(tp.state.typingctx,
|
||
|
tp.state.targetctx, tp.state.func_ir, tp.state.args, None)
|
||
|
|
||
|
type_annotations.TypeAnnotation(
|
||
|
func_ir=tp.state.func_ir,
|
||
|
typemap=tp.state.typemap,
|
||
|
calltypes=tp.state.calltypes,
|
||
|
lifted=(),
|
||
|
lifted_from=None,
|
||
|
args=tp.state.args,
|
||
|
return_type=tp.state.return_type,
|
||
|
html_output=config.HTML)
|
||
|
|
||
|
diagnostics = numba.parfors.parfor.ParforDiagnostics()
|
||
|
|
||
|
preparfor_pass = numba.parfors.parfor.PreParforPass(
|
||
|
tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
|
||
|
tp.state.typingctx, tp.state.targetctx, options,
|
||
|
swapped=diagnostics.replaced_fns)
|
||
|
preparfor_pass.run()
|
||
|
|
||
|
rewrites.rewrite_registry.apply('after-inference', tp.state)
|
||
|
|
||
|
flags = compiler.Flags()
|
||
|
parfor_pass = numba.parfors.parfor.ParforPass(
|
||
|
tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
|
||
|
tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
|
||
|
options, flags, tp.state.metadata, diagnostics=diagnostics)
|
||
|
parfor_pass.run()
|
||
|
parfor_pass = numba.parfors.parfor.ParforFusionPass(
|
||
|
tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
|
||
|
tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
|
||
|
options, flags, tp.state.metadata, diagnostics=diagnostics)
|
||
|
parfor_pass.run()
|
||
|
parfor_pass = numba.parfors.parfor.ParforPreLoweringPass(
|
||
|
tp.state.func_ir, tp.state.typemap, tp.state.calltypes,
|
||
|
tp.state.return_type, tp.state.typingctx, tp.state.targetctx,
|
||
|
options, flags, tp.state.metadata, diagnostics=diagnostics)
|
||
|
parfor_pass.run()
|
||
|
test_ir._definitions = build_definitions(test_ir.blocks)
|
||
|
|
||
|
return test_ir, tp
|
||
|
|
||
|
def countParfors(test_func, args, **kws):
|
||
|
test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
|
||
|
ret_count = 0
|
||
|
|
||
|
for label, block in test_ir.blocks.items():
|
||
|
for i, inst in enumerate(block.body):
|
||
|
if isinstance(inst, numba.parfors.parfor.Parfor):
|
||
|
ret_count += 1
|
||
|
|
||
|
return ret_count
|
||
|
|
||
|
|
||
|
def countArrays(test_func, args, **kws):
|
||
|
test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
|
||
|
return _count_arrays_inner(test_ir.blocks, tp.state.typemap)
|
||
|
|
||
|
def get_init_block_size(test_func, args, **kws):
|
||
|
test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
|
||
|
blocks = test_ir.blocks
|
||
|
|
||
|
ret_count = 0
|
||
|
|
||
|
for label, block in blocks.items():
|
||
|
for i, inst in enumerate(block.body):
|
||
|
if isinstance(inst, numba.parfors.parfor.Parfor):
|
||
|
ret_count += len(inst.init_block.body)
|
||
|
|
||
|
return ret_count
|
||
|
|
||
|
def _count_arrays_inner(blocks, typemap):
|
||
|
ret_count = 0
|
||
|
arr_set = set()
|
||
|
|
||
|
for label, block in blocks.items():
|
||
|
for i, inst in enumerate(block.body):
|
||
|
if isinstance(inst, numba.parfors.parfor.Parfor):
|
||
|
parfor_blocks = inst.loop_body.copy()
|
||
|
parfor_blocks[0] = inst.init_block
|
||
|
ret_count += _count_arrays_inner(parfor_blocks, typemap)
|
||
|
if (isinstance(inst, ir.Assign)
|
||
|
and isinstance(typemap[inst.target.name],
|
||
|
types.ArrayCompatible)):
|
||
|
arr_set.add(inst.target.name)
|
||
|
|
||
|
ret_count += len(arr_set)
|
||
|
return ret_count
|
||
|
|
||
|
def countArrayAllocs(test_func, args, **kws):
|
||
|
test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
|
||
|
ret_count = 0
|
||
|
|
||
|
for block in test_ir.blocks.values():
|
||
|
ret_count += _count_array_allocs_inner(test_ir, block)
|
||
|
|
||
|
return ret_count
|
||
|
|
||
|
def _count_array_allocs_inner(func_ir, block):
|
||
|
ret_count = 0
|
||
|
for inst in block.body:
|
||
|
if isinstance(inst, numba.parfors.parfor.Parfor):
|
||
|
ret_count += _count_array_allocs_inner(func_ir, inst.init_block)
|
||
|
for b in inst.loop_body.values():
|
||
|
ret_count += _count_array_allocs_inner(func_ir, b)
|
||
|
|
||
|
if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr)
|
||
|
and inst.value.op == 'call'
|
||
|
and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy')
|
||
|
or guard(find_callname, func_ir, inst.value)
|
||
|
== ('empty_inferred', 'numba.np.unsafe.ndarray'))):
|
||
|
ret_count += 1
|
||
|
|
||
|
return ret_count
|
||
|
|
||
|
def countNonParforArrayAccesses(test_func, args, **kws):
|
||
|
test_ir, tp = get_optimized_numba_ir(test_func, args, **kws)
|
||
|
return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks,
|
||
|
tp.state.typemap)
|
||
|
|
||
|
def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None):
|
||
|
ret_count = 0
|
||
|
if parfor_indices is None:
|
||
|
parfor_indices = set()
|
||
|
|
||
|
for label, block in blocks.items():
|
||
|
for stmt in block.body:
|
||
|
if isinstance(stmt, numba.parfors.parfor.Parfor):
|
||
|
parfor_indices.add(stmt.index_var.name)
|
||
|
parfor_blocks = stmt.loop_body.copy()
|
||
|
parfor_blocks[0] = stmt.init_block
|
||
|
ret_count += _count_non_parfor_array_accesses_inner(
|
||
|
f_ir, parfor_blocks, typemap, parfor_indices)
|
||
|
|
||
|
# getitem
|
||
|
elif (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name],
|
||
|
types.ArrayCompatible) and not _uses_indices(
|
||
|
f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
|
||
|
ret_count += 1
|
||
|
|
||
|
# setitem
|
||
|
elif (is_setitem(stmt) and isinstance(typemap[stmt.target.name],
|
||
|
types.ArrayCompatible) and not _uses_indices(
|
||
|
f_ir, index_var_of_get_setitem(stmt), parfor_indices)):
|
||
|
ret_count += 1
|
||
|
|
||
|
# find parfor_index aliases
|
||
|
elif (isinstance(stmt, ir.Assign) and
|
||
|
isinstance(stmt.value, ir.Var) and
|
||
|
stmt.value.name in parfor_indices):
|
||
|
parfor_indices.add(stmt.target.name)
|
||
|
|
||
|
return ret_count
|
||
|
|
||
|
def _uses_indices(f_ir, index, index_set):
|
||
|
if index.name in index_set:
|
||
|
return True
|
||
|
|
||
|
ind_def = guard(get_definition, f_ir, index)
|
||
|
if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple':
|
||
|
varnames = set(v.name for v in ind_def.items)
|
||
|
return len(varnames & index_set) != 0
|
||
|
|
||
|
return False
|
||
|
|
||
|
|
||
|
class TestPipeline(object):
|
||
|
def __init__(self, typingctx, targetctx, args, test_ir):
|
||
|
self.state = compiler.StateDict()
|
||
|
self.state.typingctx = typingctx
|
||
|
self.state.targetctx = targetctx
|
||
|
self.state.args = args
|
||
|
self.state.func_ir = test_ir
|
||
|
self.state.typemap = None
|
||
|
self.state.return_type = None
|
||
|
self.state.calltypes = None
|
||
|
self.state.metadata = {}
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforBasic(TestParforsBase):
|
||
|
"""Smoke tests for the parfors transforms. These tests check the most basic
|
||
|
functionality"""
|
||
|
|
||
|
def __init__(self, *args):
|
||
|
TestParforsBase.__init__(self, *args)
|
||
|
# these are used in the mass of simple tests
|
||
|
m = np.reshape(np.arange(12.), (3, 4))
|
||
|
self.simple_args = [np.arange(3.), np.arange(4.), m, m.T]
|
||
|
|
||
|
def test_simple01(self):
|
||
|
def test_impl():
|
||
|
return np.ones(())
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
def test_simple02(self):
|
||
|
def test_impl():
|
||
|
return np.ones((1,))
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple03(self):
|
||
|
def test_impl():
|
||
|
return np.ones((1, 2))
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple04(self):
|
||
|
def test_impl():
|
||
|
return np.ones(1)
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple07(self):
|
||
|
def test_impl():
|
||
|
return np.ones((1, 2), dtype=np.complex128)
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple08(self):
|
||
|
def test_impl():
|
||
|
return np.ones((1, 2)) + np.ones((1, 2))
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple09(self):
|
||
|
def test_impl():
|
||
|
return np.ones((1, 1))
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple10(self):
|
||
|
def test_impl():
|
||
|
return np.ones((0, 0))
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple11(self):
|
||
|
def test_impl():
|
||
|
return np.ones((10, 10)) + 1.
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple12(self):
|
||
|
def test_impl():
|
||
|
return np.ones((10, 10)) + np.complex128(1.)
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple13(self):
|
||
|
def test_impl():
|
||
|
return np.complex128(1.)
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
def test_simple14(self):
|
||
|
def test_impl():
|
||
|
return np.ones((10, 10))[0::20]
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_simple15(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return v1 + v1
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_simple16(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return m1 + m1
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_simple17(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return m2 + v1
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
@needs_lapack
|
||
|
def test_simple18(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return m1.T + np.linalg.svd(m2)[1]
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
@needs_blas
|
||
|
def test_simple19(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return np.dot(m1, v2)
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
@needs_blas
|
||
|
def test_simple20(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return np.dot(m1, m2)
|
||
|
# gemm is left to BLAS
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
@needs_blas
|
||
|
def test_simple21(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return np.dot(v1, v1)
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_simple22(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return np.sum(v1 + v1)
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_simple23(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
x = 2 * v1
|
||
|
y = 2 * v1
|
||
|
return 4 * np.sum(x**2 + y**2 < 1) / 10
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_simple24(self):
|
||
|
def test_impl():
|
||
|
n = 20
|
||
|
A = np.ones((n, n))
|
||
|
b = np.arange(n)
|
||
|
return np.sum(A[:, b])
|
||
|
self.check(test_impl)
|
||
|
|
||
|
@disabled_test
|
||
|
def test_simple_operator_15(self):
|
||
|
"""same as corresponding test_simple_<n> case but using operator.add"""
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return operator.add(v1, v1)
|
||
|
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
@disabled_test
|
||
|
def test_simple_operator_16(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return operator.add(m1, m1)
|
||
|
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
@disabled_test
|
||
|
def test_simple_operator_17(self):
|
||
|
def test_impl(v1, v2, m1, m2):
|
||
|
return operator.add(m2, v1)
|
||
|
|
||
|
self.check(test_impl, *self.simple_args)
|
||
|
|
||
|
def test_inplace_alias(self):
|
||
|
# issue7201
|
||
|
def test_impl(a):
|
||
|
a += 1
|
||
|
a[:] = 3
|
||
|
|
||
|
def comparer(a, b):
|
||
|
np.testing.assert_equal(a, b)
|
||
|
|
||
|
x = np.ones(1)
|
||
|
self.check(test_impl, x, check_arg_equality=[comparer])
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforNumericalMisc(TestParforsBase):
|
||
|
""" Miscellaneous 'classical' numerical tests """
|
||
|
|
||
|
def test_pi(self):
|
||
|
def test_impl(n):
|
||
|
x = 2 * np.random.ranf(n) - 1
|
||
|
y = 2 * np.random.ranf(n) - 1
|
||
|
return 4 * np.sum(x**2 + y**2 < 1) / n
|
||
|
|
||
|
self.check(test_impl, 100000, decimal=1)
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
|
||
|
self.assertEqual(countArrays(test_impl, (types.intp,)), 0)
|
||
|
|
||
|
def test_blackscholes(self):
|
||
|
# blackscholes takes 5 1D float array args
|
||
|
args = (numba.float64[:], ) * 5
|
||
|
self.assertEqual(countParfors(blackscholes_impl, args), 1)
|
||
|
|
||
|
@needs_blas
|
||
|
def test_logistic_regression(self):
|
||
|
args = (numba.float64[:], numba.float64[:,:], numba.float64[:],
|
||
|
numba.int64)
|
||
|
self.assertEqual(countParfors(lr_impl, args), 2)
|
||
|
self.assertEqual(countArrayAllocs(lr_impl, args), 1)
|
||
|
|
||
|
def test_kmeans(self):
|
||
|
np.random.seed(0)
|
||
|
N = 1024
|
||
|
D = 10
|
||
|
centers = 3
|
||
|
A = np.random.ranf((N, D))
|
||
|
init_centroids = np.random.ranf((centers, D))
|
||
|
self.check(example_kmeans_test, A, centers, 3, init_centroids,
|
||
|
decimal=1)
|
||
|
# TODO: count parfors after k-means fusion is working
|
||
|
# requires recursive parfor counting
|
||
|
arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp,
|
||
|
types.Array(types.float64, 2, 'C'))
|
||
|
self.assertEqual(
|
||
|
countNonParforArrayAccesses(example_kmeans_test, arg_typs), 0)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforNumPy(TestParforsBase):
|
||
|
"""Tests NumPy functionality under parfors"""
|
||
|
|
||
|
@needs_blas
|
||
|
def test_mvdot(self):
|
||
|
def test_impl(a, v):
|
||
|
return np.dot(a, v)
|
||
|
|
||
|
A = np.linspace(0, 1, 20).reshape(2, 10)
|
||
|
v = np.linspace(2, 1, 10)
|
||
|
|
||
|
self.check(test_impl, A, v)
|
||
|
|
||
|
def test_fuse_argmin_argmax_max_min(self):
|
||
|
for op in [np.argmin, np.argmax, np.min, np.max]:
|
||
|
def test_impl(n):
|
||
|
A = np.ones(n)
|
||
|
C = op(A)
|
||
|
B = A.sum()
|
||
|
return B + C
|
||
|
self.check(test_impl, 256)
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
|
||
|
self.assertEqual(countArrays(test_impl, (types.intp,)), 0)
|
||
|
|
||
|
def test_np_random_func_direct_import(self):
|
||
|
def test_impl(n):
|
||
|
A = randn(n)
|
||
|
return A[0]
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
|
||
|
|
||
|
def test_arange(self):
|
||
|
# test with stop only
|
||
|
def test_impl1(n):
|
||
|
return np.arange(n)
|
||
|
# start and stop
|
||
|
def test_impl2(s, n):
|
||
|
return np.arange(s, n)
|
||
|
# start, step, stop
|
||
|
def test_impl3(s, n, t):
|
||
|
return np.arange(s, n, t)
|
||
|
|
||
|
for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
|
||
|
self.check(test_impl1, arg)
|
||
|
self.check(test_impl2, 2, arg)
|
||
|
self.check(test_impl3, 2, arg, 2)
|
||
|
|
||
|
def test_arange_dtype(self):
|
||
|
# test with stop only
|
||
|
def test_impl1(n):
|
||
|
return np.arange(n, dtype=np.float32)
|
||
|
# start and stop
|
||
|
def test_impl2(s, n):
|
||
|
return np.arange(s, n, dtype=np.float32)
|
||
|
# start, step, stop
|
||
|
def test_impl3(s, n, t):
|
||
|
return np.arange(s, n, t, dtype=np.float32)
|
||
|
|
||
|
for arg in [11, 128, 30.0]:
|
||
|
self.check(test_impl1, arg)
|
||
|
self.check(test_impl2, 2, arg)
|
||
|
self.check(test_impl3, 2, arg, 2)
|
||
|
|
||
|
def test_linspace(self):
|
||
|
# without num
|
||
|
def test_impl1(start, stop):
|
||
|
return np.linspace(start, stop)
|
||
|
# with num
|
||
|
def test_impl2(start, stop, num):
|
||
|
return np.linspace(start, stop, num)
|
||
|
|
||
|
for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]:
|
||
|
self.check(test_impl1, 2, arg)
|
||
|
self.check(test_impl2, 2, arg, 30)
|
||
|
|
||
|
def test_mean(self):
|
||
|
def test_impl(A):
|
||
|
return A.mean()
|
||
|
N = 100
|
||
|
A = np.random.ranf(N)
|
||
|
B = np.random.randint(10, size=(N, 3))
|
||
|
self.check(test_impl, A)
|
||
|
self.check(test_impl, B)
|
||
|
self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 1)
|
||
|
self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 1)
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl, data_gen)
|
||
|
self.count_parfors_variants(test_impl, data_gen)
|
||
|
|
||
|
def test_var(self):
|
||
|
def test_impl(A):
|
||
|
return A.var()
|
||
|
N = 100
|
||
|
A = np.random.ranf(N)
|
||
|
B = np.random.randint(10, size=(N, 3))
|
||
|
C = A + 1j * A
|
||
|
self.check(test_impl, A)
|
||
|
self.check(test_impl, B)
|
||
|
self.check(test_impl, C)
|
||
|
self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 2)
|
||
|
self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 2)
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl, data_gen)
|
||
|
self.count_parfors_variants(test_impl, data_gen)
|
||
|
|
||
|
def test_std(self):
|
||
|
def test_impl(A):
|
||
|
return A.std()
|
||
|
N = 100
|
||
|
A = np.random.ranf(N)
|
||
|
B = np.random.randint(10, size=(N, 3))
|
||
|
C = A + 1j * A
|
||
|
self.check(test_impl, A)
|
||
|
self.check(test_impl, B)
|
||
|
self.check(test_impl, C)
|
||
|
argty = (types.Array(types.float64, 1, 'C'),)
|
||
|
self.assertEqual(countParfors(test_impl, argty), 2)
|
||
|
self.assertEqual(countParfors(test_impl, argty), 2)
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl, data_gen)
|
||
|
self.count_parfors_variants(test_impl, data_gen)
|
||
|
|
||
|
def test_random_parfor(self):
|
||
|
"""
|
||
|
Test function with only a random call to make sure a random function
|
||
|
like ranf is actually translated to a parfor.
|
||
|
"""
|
||
|
def test_impl(n):
|
||
|
A = np.random.ranf((n, n))
|
||
|
return A
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
|
||
|
|
||
|
def test_randoms(self):
|
||
|
def test_impl(n):
|
||
|
A = np.random.standard_normal(size=(n, n))
|
||
|
B = np.random.randn(n, n)
|
||
|
C = np.random.normal(0.0, 1.0, (n, n))
|
||
|
D = np.random.chisquare(1.0, (n, n))
|
||
|
E = np.random.randint(1, high=3, size=(n, n))
|
||
|
F = np.random.triangular(1, 2, 3, (n, n))
|
||
|
return np.sum(A+B+C+D+E+F)
|
||
|
|
||
|
n = 128
|
||
|
cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
|
||
|
parfor_output = cpfunc.entry_point(n)
|
||
|
py_output = test_impl(n)
|
||
|
# check results within 5% since random numbers generated in parallel
|
||
|
np.testing.assert_allclose(parfor_output, py_output, rtol=0.05)
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 1)
|
||
|
|
||
|
def test_dead_randoms(self):
|
||
|
def test_impl(n):
|
||
|
A = np.random.standard_normal(size=(n, n))
|
||
|
B = np.random.randn(n, n)
|
||
|
C = np.random.normal(0.0, 1.0, (n, n))
|
||
|
D = np.random.chisquare(1.0, (n, n))
|
||
|
E = np.random.randint(1, high=3, size=(n, n))
|
||
|
F = np.random.triangular(1, 2, 3, (n, n))
|
||
|
return 3
|
||
|
|
||
|
n = 128
|
||
|
cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),))
|
||
|
parfor_output = cpfunc.entry_point(n)
|
||
|
py_output = test_impl(n)
|
||
|
self.assertEqual(parfor_output, py_output)
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 0)
|
||
|
|
||
|
def test_min(self):
|
||
|
def test_impl1(A):
|
||
|
return A.min()
|
||
|
|
||
|
def test_impl2(A):
|
||
|
return np.min(A)
|
||
|
|
||
|
n = 211
|
||
|
A = np.random.ranf(n)
|
||
|
B = np.random.randint(10, size=n).astype(np.int32)
|
||
|
C = np.random.ranf((n, n)) # test multi-dimensional array
|
||
|
D = np.array([np.inf, np.inf])
|
||
|
self.check(test_impl1, A)
|
||
|
self.check(test_impl1, B)
|
||
|
self.check(test_impl1, C)
|
||
|
self.check(test_impl1, D)
|
||
|
self.check(test_impl2, A)
|
||
|
self.check(test_impl2, B)
|
||
|
self.check(test_impl2, C)
|
||
|
self.check(test_impl2, D)
|
||
|
|
||
|
# checks that 0d array input raises
|
||
|
msg = ("zero-size array to reduction operation "
|
||
|
"minimum which has no identity")
|
||
|
for impl in (test_impl1, test_impl2):
|
||
|
pcfunc = self.compile_parallel(impl, (types.int64[:],))
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc.entry_point(np.array([], dtype=np.int64))
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl1, data_gen)
|
||
|
self.count_parfors_variants(test_impl1, data_gen)
|
||
|
self.check_variants(test_impl2, data_gen)
|
||
|
self.count_parfors_variants(test_impl2, data_gen)
|
||
|
|
||
|
def test_max(self):
|
||
|
def test_impl1(A):
|
||
|
return A.max()
|
||
|
|
||
|
def test_impl2(A):
|
||
|
return np.max(A)
|
||
|
|
||
|
n = 211
|
||
|
A = np.random.ranf(n)
|
||
|
B = np.random.randint(10, size=n).astype(np.int32)
|
||
|
C = np.random.ranf((n, n)) # test multi-dimensional array
|
||
|
D = np.array([-np.inf, -np.inf])
|
||
|
self.check(test_impl1, A)
|
||
|
self.check(test_impl1, B)
|
||
|
self.check(test_impl1, C)
|
||
|
self.check(test_impl1, D)
|
||
|
self.check(test_impl2, A)
|
||
|
self.check(test_impl2, B)
|
||
|
self.check(test_impl2, C)
|
||
|
self.check(test_impl2, D)
|
||
|
|
||
|
# checks that 0d array input raises
|
||
|
msg = ("zero-size array to reduction operation "
|
||
|
"maximum which has no identity")
|
||
|
for impl in (test_impl1, test_impl2):
|
||
|
pcfunc = self.compile_parallel(impl, (types.int64[:],))
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc.entry_point(np.array([], dtype=np.int64))
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl1, data_gen)
|
||
|
self.count_parfors_variants(test_impl1, data_gen)
|
||
|
self.check_variants(test_impl2, data_gen)
|
||
|
self.count_parfors_variants(test_impl2, data_gen)
|
||
|
|
||
|
def test_argmax(self):
|
||
|
def test_impl1(A):
|
||
|
return A.argmax()
|
||
|
|
||
|
def test_impl2(A):
|
||
|
return np.argmax(A)
|
||
|
|
||
|
n = 211
|
||
|
A = np.array([1., 0., 3., 2., 3.])
|
||
|
B = np.random.randint(10, size=n).astype(np.int32)
|
||
|
C = np.random.ranf((n, n)) # test multi-dimensional array
|
||
|
D = np.array([1., 0., np.nan, 2., 3.])
|
||
|
self.check(test_impl1, A)
|
||
|
self.check(test_impl1, B)
|
||
|
self.check(test_impl1, C)
|
||
|
self.check(test_impl1, D)
|
||
|
self.check(test_impl2, A)
|
||
|
self.check(test_impl2, B)
|
||
|
self.check(test_impl2, C)
|
||
|
self.check(test_impl2, D)
|
||
|
|
||
|
# checks that 0d array input raises
|
||
|
msg = 'attempt to get argmax of an empty sequence'
|
||
|
for impl in (test_impl1, test_impl2):
|
||
|
pcfunc = self.compile_parallel(impl, (types.int64[:],))
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc.entry_point(np.array([], dtype=np.int64))
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl1, data_gen)
|
||
|
self.count_parfors_variants(test_impl1, data_gen)
|
||
|
self.check_variants(test_impl2, data_gen)
|
||
|
self.count_parfors_variants(test_impl2, data_gen)
|
||
|
|
||
|
def test_argmin(self):
|
||
|
def test_impl1(A):
|
||
|
return A.argmin()
|
||
|
|
||
|
def test_impl2(A):
|
||
|
return np.argmin(A)
|
||
|
|
||
|
n = 211
|
||
|
A = np.array([1., 0., 2., 0., 3.])
|
||
|
B = np.random.randint(10, size=n).astype(np.int32)
|
||
|
C = np.random.ranf((n, n)) # test multi-dimensional array
|
||
|
D = np.array([1., 0., np.nan, 0., 3.])
|
||
|
self.check(test_impl1, A)
|
||
|
self.check(test_impl1, B)
|
||
|
self.check(test_impl1, C)
|
||
|
self.check(test_impl1, D)
|
||
|
self.check(test_impl2, A)
|
||
|
self.check(test_impl2, B)
|
||
|
self.check(test_impl2, C)
|
||
|
self.check(test_impl2, D)
|
||
|
|
||
|
# checks that 0d array input raises
|
||
|
msg = 'attempt to get argmin of an empty sequence'
|
||
|
for impl in (test_impl1, test_impl2):
|
||
|
pcfunc = self.compile_parallel(impl, (types.int64[:],))
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc.entry_point(np.array([], dtype=np.int64))
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
# Test variants
|
||
|
data_gen = lambda: self.gen_linspace_variants(1)
|
||
|
self.check_variants(test_impl1, data_gen)
|
||
|
self.count_parfors_variants(test_impl1, data_gen)
|
||
|
self.check_variants(test_impl2, data_gen)
|
||
|
self.count_parfors_variants(test_impl2, data_gen)
|
||
|
|
||
|
def test_ndarray_fill(self):
|
||
|
def test_impl(x):
|
||
|
x.fill(7.0)
|
||
|
return x
|
||
|
x = np.zeros(10)
|
||
|
self.check(test_impl, x)
|
||
|
argty = (types.Array(types.float64, 1, 'C'),)
|
||
|
self.assertEqual(countParfors(test_impl, argty), 1)
|
||
|
|
||
|
def test_ndarray_fill2d(self):
|
||
|
def test_impl(x):
|
||
|
x.fill(7.0)
|
||
|
return x
|
||
|
x = np.zeros((2,2))
|
||
|
self.check(test_impl, x)
|
||
|
argty = (types.Array(types.float64, 2, 'C'),)
|
||
|
self.assertEqual(countParfors(test_impl, argty), 1)
|
||
|
|
||
|
def test_reshape_with_neg_one(self):
|
||
|
# issue3314
|
||
|
def test_impl(a, b):
|
||
|
result_matrix = np.zeros((b, b, 1), dtype=np.float64)
|
||
|
sub_a = a[0:b]
|
||
|
a = sub_a.size
|
||
|
b = a / 1
|
||
|
z = sub_a.reshape(-1, 1)
|
||
|
result_data = sub_a / z
|
||
|
result_matrix[:,:,0] = result_data
|
||
|
return result_matrix
|
||
|
|
||
|
a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
|
||
|
7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
|
||
|
b = 3
|
||
|
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_reshape_with_large_neg(self):
|
||
|
# issue3314
|
||
|
def test_impl(a, b):
|
||
|
result_matrix = np.zeros((b, b, 1), dtype=np.float64)
|
||
|
sub_a = a[0:b]
|
||
|
a = sub_a.size
|
||
|
b = a / 1
|
||
|
z = sub_a.reshape(-1307, 1)
|
||
|
result_data = sub_a / z
|
||
|
result_matrix[:,:,0] = result_data
|
||
|
return result_matrix
|
||
|
|
||
|
a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
|
||
|
7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
|
||
|
b = 3
|
||
|
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_reshape_with_too_many_neg_one(self):
|
||
|
# issue3314
|
||
|
with self.assertRaises(errors.UnsupportedRewriteError) as raised:
|
||
|
@njit(parallel=True)
|
||
|
def test_impl(a, b):
|
||
|
rm = np.zeros((b, b, 1), dtype=np.float64)
|
||
|
sub_a = a[0:b]
|
||
|
a = sub_a.size
|
||
|
b = a / 1
|
||
|
z = sub_a.reshape(-1, -1)
|
||
|
result_data = sub_a / z
|
||
|
rm[:,:,0] = result_data
|
||
|
return rm
|
||
|
|
||
|
a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0,
|
||
|
7.0, 8.0, 9.0, 10.0, 11.0, 12.0])
|
||
|
b = 3
|
||
|
test_impl(a, b)
|
||
|
|
||
|
msg = ("The reshape API may only include one negative argument.")
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
def test_0d_array(self):
|
||
|
def test_impl(n):
|
||
|
return np.sum(n) + np.prod(n) + np.min(n) + np.max(n) + np.var(n)
|
||
|
self.check(test_impl, np.array(7), check_scheduling=False)
|
||
|
|
||
|
def test_real_imag_attr(self):
|
||
|
# See issue 8012
|
||
|
def test_impl(z):
|
||
|
return np.sum(z.real ** 2 + z.imag ** 2)
|
||
|
|
||
|
z = np.arange(5) * (1 + 1j)
|
||
|
self.check(test_impl, z)
|
||
|
self.assertEqual(countParfors(test_impl, (types.complex128[::1],)), 1)
|
||
|
|
||
|
|
||
|
class TestParforsUnsupported(TestCase):
|
||
|
"""Tests for unsupported use of parfors"""
|
||
|
@unittest.skipIf(not _32bit, "Only impacts 32 bit hardware")
|
||
|
@needs_blas
|
||
|
def test_unsupported_combination_raises(self):
|
||
|
"""
|
||
|
This test is in place until issues with the 'parallel'
|
||
|
target on 32 bit hardware are fixed.
|
||
|
"""
|
||
|
with self.assertRaises(errors.UnsupportedParforsError) as raised:
|
||
|
@njit(parallel=True)
|
||
|
def ddot(a, v):
|
||
|
return np.dot(a, v)
|
||
|
|
||
|
A = np.linspace(0, 1, 20).reshape(2, 10)
|
||
|
v = np.linspace(2, 1, 10)
|
||
|
ddot(A, v)
|
||
|
|
||
|
msg = ("The 'parallel' target is not currently supported on 32 bit "
|
||
|
"hardware")
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParfors(TestParforsBase):
|
||
|
""" Tests cpython, reduction and various parfors features"""
|
||
|
|
||
|
def test_arraymap(self):
|
||
|
def test_impl(a, x, y):
|
||
|
return a * x + y
|
||
|
|
||
|
self.check_variants(test_impl, lambda: self.gen_linspace_variants(3))
|
||
|
|
||
|
def test_0d_broadcast(self):
|
||
|
def test_impl():
|
||
|
X = np.array(1)
|
||
|
Y = np.ones((10, 12))
|
||
|
return np.sum(X + Y)
|
||
|
self.check(test_impl)
|
||
|
self.assertEqual(countParfors(test_impl, ()), 1)
|
||
|
|
||
|
def test_2d_parfor(self):
|
||
|
def test_impl():
|
||
|
X = np.ones((10, 12))
|
||
|
Y = np.zeros((10, 12))
|
||
|
return np.sum(X + Y)
|
||
|
self.check(test_impl)
|
||
|
self.assertEqual(countParfors(test_impl, ()), 1)
|
||
|
|
||
|
def test_nd_parfor(self):
|
||
|
def case1():
|
||
|
X = np.ones((10, 12))
|
||
|
Y = np.zeros((10, 12))
|
||
|
yield (X, Y)
|
||
|
|
||
|
data_gen = lambda: chain(case1(), self.gen_linspace_variants(2))
|
||
|
|
||
|
def test_impl(X, Y):
|
||
|
return np.sum(X + Y)
|
||
|
|
||
|
self.check_variants(test_impl, data_gen)
|
||
|
self.count_parfors_variants(test_impl, data_gen)
|
||
|
|
||
|
def test_np_func_direct_import(self):
|
||
|
from numpy import ones # import here becomes FreeVar
|
||
|
def test_impl(n):
|
||
|
A = ones(n)
|
||
|
return A[0]
|
||
|
n = 111
|
||
|
self.check(test_impl, n)
|
||
|
|
||
|
def test_size_assertion(self):
|
||
|
def test_impl(m, n):
|
||
|
A = np.ones(m)
|
||
|
B = np.ones(n)
|
||
|
return np.sum(A + B)
|
||
|
|
||
|
self.check(test_impl, 10, 10)
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
cfunc = njit(parallel=True)(test_impl)
|
||
|
cfunc(10, 9)
|
||
|
msg = "Sizes of A, B do not match"
|
||
|
self.assertIn(msg, str(raises.exception))
|
||
|
|
||
|
def test_cfg(self):
|
||
|
# from issue #2477
|
||
|
def test_impl(x, is_positive, N):
|
||
|
for i in numba.prange(2):
|
||
|
for j in range( i*N//2, (i+1)*N//2 ):
|
||
|
is_positive[j] = 0
|
||
|
if x[j] > 0:
|
||
|
is_positive[j] = 1
|
||
|
|
||
|
return is_positive
|
||
|
|
||
|
N = 100
|
||
|
x = np.random.rand(N)
|
||
|
is_positive = np.zeros(N)
|
||
|
self.check(test_impl, x, is_positive, N)
|
||
|
|
||
|
def test_reduce(self):
|
||
|
def test_impl(A):
|
||
|
init_val = 10
|
||
|
return reduce(lambda a,b: min(a, b), A, init_val)
|
||
|
|
||
|
n = 211
|
||
|
A = np.random.ranf(n)
|
||
|
self.check(test_impl, A)
|
||
|
A = np.random.randint(10, size=n).astype(np.int32)
|
||
|
self.check(test_impl, A)
|
||
|
|
||
|
# test checking the number of arguments for the reduce function
|
||
|
def test_impl():
|
||
|
g = lambda x: x ** 2
|
||
|
return reduce(g, np.array([1, 2, 3, 4, 5]), 2)
|
||
|
with self.assertTypingError():
|
||
|
self.check(test_impl)
|
||
|
|
||
|
# test checking reduction over bitarray masked arrays
|
||
|
n = 160
|
||
|
A = np.random.randint(10, size=n).astype(np.int32)
|
||
|
def test_impl(A):
|
||
|
return np.sum(A[A>=3])
|
||
|
self.check(test_impl, A)
|
||
|
# TODO: this should fuse
|
||
|
# self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1)
|
||
|
|
||
|
def test_impl(A):
|
||
|
B = A[:,0]
|
||
|
return np.sum(A[B>=3,1])
|
||
|
self.check(test_impl, A.reshape((16,10)))
|
||
|
# TODO: this should also fuse
|
||
|
#self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1)
|
||
|
|
||
|
def test_impl(A):
|
||
|
B = A[:,0]
|
||
|
return np.sum(A[B>=3,1:2])
|
||
|
self.check(test_impl, A.reshape((16,10)))
|
||
|
# this doesn't fuse due to mixed indices
|
||
|
self.assertEqual(countParfors(test_impl, (numba.float64[:,:],)), 2)
|
||
|
|
||
|
def test_impl(A):
|
||
|
min_val = np.amin(A)
|
||
|
return A - min_val
|
||
|
self.check(test_impl, A)
|
||
|
# this doesn't fuse due to use of reduction variable
|
||
|
self.assertEqual(countParfors(test_impl, (numba.float64[:],)), 2)
|
||
|
|
||
|
def test_use_of_reduction_var1(self):
|
||
|
def test_impl():
|
||
|
acc = 0
|
||
|
for i in prange(1):
|
||
|
acc = cmath.sqrt(acc)
|
||
|
return acc
|
||
|
|
||
|
# checks that invalid use of reduction variable is detected
|
||
|
msg = ("Use of reduction variable acc in an unsupported reduction function.")
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc = self.compile_parallel(test_impl, ())
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
def test_unsupported_floordiv1(self):
|
||
|
def test_impl():
|
||
|
acc = 100
|
||
|
for i in prange(2):
|
||
|
acc //= 2
|
||
|
return acc
|
||
|
|
||
|
# checks that invalid use of ifloordiv reduction operator is detected
|
||
|
msg = ("Parallel floordiv reductions are not supported. "
|
||
|
"If all divisors are integers then a floordiv "
|
||
|
"reduction can in some cases be parallelized as "
|
||
|
"a multiply reduction followed by a floordiv of "
|
||
|
"the resulting product.")
|
||
|
with self.assertRaises(errors.NumbaValueError) as e:
|
||
|
pcfunc = self.compile_parallel(test_impl, ())
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
def test_unsupported_xor1(self):
|
||
|
def test_impl():
|
||
|
acc = 100
|
||
|
for i in prange(2):
|
||
|
acc ^= i + 2
|
||
|
return acc
|
||
|
|
||
|
msg = ("Use of reduction variable acc in an unsupported reduction function.")
|
||
|
with self.assertRaises(ValueError) as e:
|
||
|
pcfunc = self.compile_parallel(test_impl, ())
|
||
|
self.assertIn(msg, str(e.exception))
|
||
|
|
||
|
def test_parfor_array_access1(self):
|
||
|
# signed index of the prange generated by sum() should be replaced
|
||
|
# resulting in array A to be eliminated (see issue #2846)
|
||
|
def test_impl(n):
|
||
|
A = np.ones(n)
|
||
|
return A.sum()
|
||
|
|
||
|
n = 211
|
||
|
self.check(test_impl, n)
|
||
|
self.assertEqual(countArrays(test_impl, (types.intp,)), 0)
|
||
|
|
||
|
def test_parfor_array_access2(self):
|
||
|
# in this test, the prange index has the same name (i) in two loops
|
||
|
# thus, i has multiple definitions and is harder to replace
|
||
|
def test_impl(n):
|
||
|
A = np.ones(n)
|
||
|
m = 0
|
||
|
n = 0
|
||
|
for i in numba.prange(len(A)):
|
||
|
m += A[i]
|
||
|
|
||
|
for i in numba.prange(len(A)):
|
||
|
if m == n: # access in another block
|
||
|
n += A[i]
|
||
|
|
||
|
return m + n
|
||
|
|
||
|
n = 211
|
||
|
self.check(test_impl, n)
|
||
|
self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)
|
||
|
|
||
|
def test_parfor_array_access3(self):
|
||
|
def test_impl(n):
|
||
|
A = np.ones(n, np.int64)
|
||
|
m = 0
|
||
|
for i in numba.prange(len(A)):
|
||
|
m += A[i]
|
||
|
if m==2:
|
||
|
i = m
|
||
|
|
||
|
n = 211
|
||
|
with self.assertRaises(errors.UnsupportedRewriteError) as raises:
|
||
|
self.check(test_impl, n)
|
||
|
self.assertIn("Overwrite of parallel loop index", str(raises.exception))
|
||
|
|
||
|
@needs_blas
|
||
|
def test_parfor_array_access4(self):
|
||
|
# in this test, one index of a multi-dim access should be replaced
|
||
|
# np.dot parallel implementation produces this case
|
||
|
def test_impl(A, b):
|
||
|
return np.dot(A, b)
|
||
|
|
||
|
n = 211
|
||
|
d = 4
|
||
|
A = np.random.ranf((n, d))
|
||
|
b = np.random.ranf(d)
|
||
|
self.check(test_impl, A, b)
|
||
|
# make sure the parfor index is replaced in build_tuple of access to A
|
||
|
test_ir, tp = get_optimized_numba_ir(
|
||
|
test_impl, (types.Array(types.float64, 2, 'C'),
|
||
|
types.Array(types.float64, 1, 'C')))
|
||
|
# this code should have one basic block after optimization
|
||
|
self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks)
|
||
|
block = test_ir.blocks[0]
|
||
|
parfor_found = False
|
||
|
parfor = None
|
||
|
for stmt in block.body:
|
||
|
if isinstance(stmt, numba.parfors.parfor.Parfor):
|
||
|
parfor_found = True
|
||
|
parfor = stmt
|
||
|
|
||
|
self.assertTrue(parfor_found)
|
||
|
build_tuple_found = False
|
||
|
# there should be only one build_tuple
|
||
|
for bl in parfor.loop_body.values():
|
||
|
for stmt in bl.body:
|
||
|
if (isinstance(stmt, ir.Assign)
|
||
|
and isinstance(stmt.value, ir.Expr)
|
||
|
and stmt.value.op == 'build_tuple'):
|
||
|
build_tuple_found = True
|
||
|
self.assertTrue(parfor.index_var in stmt.value.items)
|
||
|
|
||
|
self.assertTrue(build_tuple_found)
|
||
|
|
||
|
def test_parfor_dtype_type(self):
|
||
|
# test array type replacement creates proper type
|
||
|
def test_impl(a):
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] = a.dtype.type(0)
|
||
|
return a[4]
|
||
|
|
||
|
a = np.ones(10)
|
||
|
self.check(test_impl, a)
|
||
|
|
||
|
def test_parfor_array_access5(self):
|
||
|
# one dim is slice in multi-dim access
|
||
|
def test_impl(n):
|
||
|
X = np.ones((n, 3))
|
||
|
y = 0
|
||
|
for i in numba.prange(n):
|
||
|
y += X[i,:].sum()
|
||
|
return y
|
||
|
|
||
|
n = 211
|
||
|
self.check(test_impl, n)
|
||
|
self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0)
|
||
|
|
||
|
@disabled_test # Test itself is problematic, see #3155
|
||
|
def test_parfor_hoist_setitem(self):
|
||
|
# Make sure that read of out is not hoisted.
|
||
|
def test_impl(out):
|
||
|
for i in prange(10):
|
||
|
out[0] = 2 * out[0]
|
||
|
return out[0]
|
||
|
|
||
|
out = np.ones(1)
|
||
|
self.check(test_impl, out)
|
||
|
|
||
|
@needs_blas
|
||
|
def test_parfor_generate_fuse(self):
|
||
|
# issue #2857
|
||
|
def test_impl(N, D):
|
||
|
w = np.ones(D)
|
||
|
X = np.ones((N, D))
|
||
|
Y = np.ones(N)
|
||
|
for i in range(3):
|
||
|
B = (-Y * np.dot(X, w))
|
||
|
|
||
|
return B
|
||
|
|
||
|
n = 211
|
||
|
d = 3
|
||
|
self.check(test_impl, n, d)
|
||
|
self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4)
|
||
|
self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4)
|
||
|
|
||
|
def test_ufunc_expr(self):
|
||
|
# issue #2885
|
||
|
def test_impl(A, B):
|
||
|
return np.bitwise_and(A, B)
|
||
|
|
||
|
A = np.ones(3, np.uint8)
|
||
|
B = np.ones(3, np.uint8)
|
||
|
B[1] = 0
|
||
|
self.check(test_impl, A, B)
|
||
|
|
||
|
def test_find_callname_intrinsic(self):
|
||
|
def test_impl(n):
|
||
|
A = unsafe_empty((n,))
|
||
|
for i in range(n):
|
||
|
A[i] = i + 2.0
|
||
|
return A
|
||
|
|
||
|
# the unsafe allocation should be found even though it is imported
|
||
|
# as a different name
|
||
|
self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1)
|
||
|
|
||
|
def test_reduction_var_reuse(self):
|
||
|
# issue #3139
|
||
|
def test_impl(n):
|
||
|
acc = 0
|
||
|
for i in prange(n):
|
||
|
acc += 1
|
||
|
|
||
|
for i in prange(n):
|
||
|
acc += 2
|
||
|
|
||
|
return acc
|
||
|
self.check(test_impl, 16)
|
||
|
|
||
|
def test_non_identity_initial(self):
|
||
|
# issue #7344
|
||
|
def test_impl(A, cond):
|
||
|
s = 1
|
||
|
for i in prange(A.shape[0]):
|
||
|
if cond[i]:
|
||
|
s += 1
|
||
|
return s
|
||
|
self.check(test_impl, np.ones(10), np.ones(10).astype('bool'))
|
||
|
|
||
|
def test_if_not_else_reduction(self):
|
||
|
# issue #7344
|
||
|
def test_impl(A, cond):
|
||
|
s = 1
|
||
|
t = 10
|
||
|
for i in prange(A.shape[0]):
|
||
|
if cond[i]:
|
||
|
s += 1
|
||
|
t += 1
|
||
|
else:
|
||
|
s += 2
|
||
|
return s + t
|
||
|
self.check(test_impl, np.ones(10), np.ones(10).astype('bool'))
|
||
|
|
||
|
def test_two_d_array_reduction_reuse(self):
|
||
|
def test_impl(n):
|
||
|
shp = (13, 17)
|
||
|
size = shp[0] * shp[1]
|
||
|
result1 = np.zeros(shp, np.int_)
|
||
|
tmp = np.arange(size).reshape(shp)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 += tmp
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 += tmp
|
||
|
|
||
|
return result1
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_one_d_array_reduction(self):
|
||
|
def test_impl(n):
|
||
|
result = np.zeros(1, np.int_)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result += np.array([i], np.int_)
|
||
|
|
||
|
return result
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_two_d_array_reduction(self):
|
||
|
def test_impl(n):
|
||
|
shp = (13, 17)
|
||
|
size = shp[0] * shp[1]
|
||
|
result1 = np.zeros(shp, np.int_)
|
||
|
tmp = np.arange(size).reshape(shp)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 += tmp
|
||
|
|
||
|
return result1
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_two_d_array_reduction_with_float_sizes(self):
|
||
|
# result1 is float32 and tmp is float64.
|
||
|
# Tests reduction with differing dtypes.
|
||
|
def test_impl(n):
|
||
|
shp = (2, 3)
|
||
|
result1 = np.zeros(shp, np.float32)
|
||
|
tmp = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(shp)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 += tmp
|
||
|
|
||
|
return result1
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_two_d_array_reduction_prod(self):
|
||
|
def test_impl(n):
|
||
|
shp = (13, 17)
|
||
|
result1 = 2 * np.ones(shp, np.int_)
|
||
|
tmp = 2 * np.ones_like(result1)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 *= tmp
|
||
|
|
||
|
return result1
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_three_d_array_reduction(self):
|
||
|
def test_impl(n):
|
||
|
shp = (3, 2, 7)
|
||
|
result1 = np.zeros(shp, np.int_)
|
||
|
|
||
|
for i in numba.prange(n):
|
||
|
result1 += np.ones(shp, np.int_)
|
||
|
|
||
|
return result1
|
||
|
|
||
|
self.check(test_impl, 100)
|
||
|
|
||
|
def test_preparfor_canonicalize_kws(self):
|
||
|
# test canonicalize_array_math typing for calls with kw args
|
||
|
def test_impl(A):
|
||
|
return A.argsort() + 1
|
||
|
|
||
|
n = 211
|
||
|
A = np.arange(n)
|
||
|
self.check(test_impl, A)
|
||
|
|
||
|
def test_preparfor_datetime64(self):
|
||
|
# test array.dtype transformation for datetime64
|
||
|
def test_impl(A):
|
||
|
return A.dtype
|
||
|
|
||
|
A = np.empty(1, np.dtype('datetime64[ns]'))
|
||
|
cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
|
||
|
self.assertEqual(cpfunc.entry_point(A), test_impl(A))
|
||
|
|
||
|
def test_no_hoisting_with_member_function_call(self):
|
||
|
def test_impl(X):
|
||
|
n = X.shape[0]
|
||
|
acc = 0
|
||
|
for i in prange(n):
|
||
|
R = {1, 2, 3}
|
||
|
R.add(i)
|
||
|
tmp = 0
|
||
|
for x in R:
|
||
|
tmp += x
|
||
|
acc += tmp
|
||
|
return acc
|
||
|
|
||
|
self.check(test_impl, np.random.ranf(128))
|
||
|
|
||
|
def test_array_compare_scalar(self):
|
||
|
""" issue3671: X != 0 becomes an arrayexpr with operator.ne.
|
||
|
That is turned into a parfor by devectorizing. Make sure
|
||
|
the return type of the devectorized operator.ne
|
||
|
on integer types works properly.
|
||
|
"""
|
||
|
def test_impl():
|
||
|
X = np.zeros(10, dtype=np.int_)
|
||
|
return X != 0
|
||
|
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_array_analysis_optional_def(self):
|
||
|
def test_impl(x, half):
|
||
|
size = len(x)
|
||
|
parr = x[0:size]
|
||
|
|
||
|
if half:
|
||
|
parr = x[0:size//2]
|
||
|
|
||
|
return parr.sum()
|
||
|
x = np.ones(20)
|
||
|
self.check(test_impl, x, True, check_scheduling=False)
|
||
|
|
||
|
def test_prange_side_effects(self):
|
||
|
def test_impl(a, b):
|
||
|
data = np.empty(len(a), dtype=np.float64)
|
||
|
size = len(data)
|
||
|
for i in numba.prange(size):
|
||
|
data[i] = a[i]
|
||
|
for i in numba.prange(size):
|
||
|
data[i] = data[i] + b[i]
|
||
|
return data
|
||
|
|
||
|
x = np.arange(10 ** 2, dtype=float)
|
||
|
y = np.arange(10 ** 2, dtype=float)
|
||
|
|
||
|
self.check(test_impl, x, y)
|
||
|
self.assertEqual(countParfors(test_impl,
|
||
|
(types.Array(types.float64, 1, 'C'),
|
||
|
types.Array(types.float64, 1, 'C'))), 1)
|
||
|
|
||
|
def test_tuple1(self):
|
||
|
def test_impl(a):
|
||
|
atup = (3, 4)
|
||
|
b = 7
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] += atup[0] + atup[1] + b
|
||
|
return a
|
||
|
|
||
|
x = np.arange(10)
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_tuple2(self):
|
||
|
def test_impl(a):
|
||
|
atup = a.shape
|
||
|
b = 7
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] += atup[0] + b
|
||
|
return a
|
||
|
|
||
|
x = np.arange(10)
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_tuple3(self):
|
||
|
def test_impl(a):
|
||
|
atup = (np.arange(10), 4)
|
||
|
b = 7
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] += atup[0][5] + atup[1] + b
|
||
|
return a
|
||
|
|
||
|
x = np.arange(10)
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_namedtuple1(self):
|
||
|
def test_impl(a):
|
||
|
antup = TestNamedTuple(part0=3, part1=4)
|
||
|
b = 7
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] += antup.part0 + antup.part1 + b
|
||
|
return a
|
||
|
|
||
|
x = np.arange(10)
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_namedtuple2(self):
|
||
|
TestNamedTuple2 = namedtuple('TestNamedTuple2', ('part0', 'part1'))
|
||
|
def test_impl(a):
|
||
|
antup = TestNamedTuple2(part0=3, part1=4)
|
||
|
b = 7
|
||
|
for i in numba.prange(len(a)):
|
||
|
a[i] += antup.part0 + antup.part1 + b
|
||
|
return a
|
||
|
|
||
|
x = np.arange(10)
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_namedtuple3(self):
|
||
|
# issue5872: test that a.y[:] = 5 is not removed as
|
||
|
# deadcode.
|
||
|
TestNamedTuple3 = namedtuple(f'TestNamedTuple3',['y'])
|
||
|
|
||
|
def test_impl(a):
|
||
|
a.y[:] = 5
|
||
|
|
||
|
def comparer(a, b):
|
||
|
np.testing.assert_almost_equal(a.y, b.y)
|
||
|
|
||
|
x = TestNamedTuple3(y=np.zeros(10))
|
||
|
self.check(test_impl, x, check_arg_equality=[comparer])
|
||
|
|
||
|
def test_inplace_binop(self):
|
||
|
def test_impl(a, b):
|
||
|
b += a
|
||
|
return b
|
||
|
|
||
|
X = np.arange(10) + 10
|
||
|
Y = np.arange(10) + 100
|
||
|
self.check(test_impl, X, Y)
|
||
|
self.assertEqual(countParfors(test_impl,
|
||
|
(types.Array(types.float64, 1, 'C'),
|
||
|
types.Array(types.float64, 1, 'C'))), 1)
|
||
|
|
||
|
def test_tuple_concat(self):
|
||
|
# issue5383
|
||
|
def test_impl(a):
|
||
|
n = len(a)
|
||
|
array_shape = n, n
|
||
|
indices = np.zeros(((1,) + array_shape + (1,)), dtype=np.uint64)
|
||
|
k_list = indices[0, :]
|
||
|
|
||
|
for i, g in enumerate(a):
|
||
|
k_list[i, i] = i
|
||
|
return k_list
|
||
|
|
||
|
x = np.array([1, 1])
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_tuple_concat_with_reverse_slice(self):
|
||
|
# issue5383
|
||
|
def test_impl(a):
|
||
|
n = len(a)
|
||
|
array_shape = n, n
|
||
|
indices = np.zeros(((1,) + array_shape + (1,))[:-1],
|
||
|
dtype=np.uint64)
|
||
|
k_list = indices[0, :]
|
||
|
|
||
|
for i, g in enumerate(a):
|
||
|
k_list[i, i] = i
|
||
|
return k_list
|
||
|
|
||
|
x = np.array([1, 1])
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_array_tuple_concat(self):
|
||
|
# issue6399
|
||
|
def test_impl(a):
|
||
|
S = (a,) + (a, a)
|
||
|
return S[0].sum()
|
||
|
|
||
|
x = np.ones((3,3))
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_high_dimension1(self):
|
||
|
# issue6749
|
||
|
def test_impl(x):
|
||
|
return x * 5.0
|
||
|
x = np.ones((2, 2, 2, 2, 2, 15))
|
||
|
self.check(test_impl, x)
|
||
|
|
||
|
def test_tuple_arg(self):
|
||
|
def test_impl(x, sz):
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10, 5)
|
||
|
self.check(test_impl, np.empty(sz), sz)
|
||
|
|
||
|
def test_tuple_arg_not_whole_array(self):
|
||
|
def test_impl(x, sz):
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10, 5)
|
||
|
self.check(test_impl, np.zeros(sz), (10, 3))
|
||
|
|
||
|
def test_tuple_for_pndindex(self):
|
||
|
def test_impl(x):
|
||
|
sz = (10, 5)
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10, 5)
|
||
|
self.check(test_impl, np.zeros(sz))
|
||
|
|
||
|
def test_tuple_arg_literal(self):
|
||
|
def test_impl(x, first):
|
||
|
sz = (first, 5)
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10, 5)
|
||
|
self.check(test_impl, np.zeros(sz), 10)
|
||
|
|
||
|
def test_tuple_of_literal_nonliteral(self):
|
||
|
# This test has to be done manually as the self.check uses
|
||
|
# compile_isolated and one function cannot "see" the other
|
||
|
|
||
|
def test_impl(x, sz):
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
|
||
|
def call(x, fn):
|
||
|
return fn(x, (10, 3)) # Only want to iterate to the 3rd
|
||
|
|
||
|
get_input = lambda: np.zeros((10, 10))
|
||
|
expected = call(get_input(), test_impl)
|
||
|
|
||
|
def check(dec):
|
||
|
f1 = dec(test_impl)
|
||
|
f2 = njit(call) # no parallel semantics in the caller
|
||
|
got = f2(get_input(), f1)
|
||
|
self.assertPreciseEqual(expected, got)
|
||
|
|
||
|
for d in (njit, njit(parallel=True)):
|
||
|
check(d)
|
||
|
|
||
|
def test_tuple_arg_1d(self):
|
||
|
def test_impl(x, sz):
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10,)
|
||
|
self.check(test_impl, np.zeros(sz), sz)
|
||
|
|
||
|
def test_tuple_arg_1d_literal(self):
|
||
|
def test_impl(x):
|
||
|
sz = (10,)
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
sz = (10,)
|
||
|
self.check(test_impl, np.zeros(sz))
|
||
|
|
||
|
def test_int_arg_pndindex(self):
|
||
|
def test_impl(x, sz):
|
||
|
for i in numba.pndindex(sz):
|
||
|
x[i] = 1
|
||
|
return x
|
||
|
self.check(test_impl, np.zeros((10, 10)), 3)
|
||
|
|
||
|
def test_prange_unknown_call1(self):
|
||
|
@register_jitable
|
||
|
def issue7854_proc(u, i, even, size):
|
||
|
for j in range((even + i + 1) % 2 + 1, size - 1, 2):
|
||
|
u[i, j] = u[i + 1, j] + 1
|
||
|
|
||
|
# issue7854
|
||
|
# Forbid fusion in unanalyzable call inside prange.
|
||
|
def test_impl(u, size):
|
||
|
for i in numba.prange(1, size - 1):
|
||
|
issue7854_proc(u, i, 0, size)
|
||
|
for i in numba.prange(1, size - 1):
|
||
|
issue7854_proc(u, i, 1, size)
|
||
|
return u
|
||
|
|
||
|
size = 4
|
||
|
u = np.zeros((size, size))
|
||
|
cptypes = (numba.float64[:, ::1], types.int64)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 2)
|
||
|
self.check(test_impl, u, size)
|
||
|
|
||
|
def test_prange_index_calc1(self):
|
||
|
# Should forbid fusion due to cross-iteration dependency as
|
||
|
# detected by loop index calcuation (i+1) as array index.
|
||
|
def test_impl(u, size):
|
||
|
for i in numba.prange(1, size - 1):
|
||
|
for j in range((i + 1) % 2 + 1, size - 1, 2):
|
||
|
u[i, j] = u[i + 1, j] + 1
|
||
|
for i in numba.prange(1, size - 1):
|
||
|
for j in range(i % 2 + 1, size - 1, 2):
|
||
|
u[i, j] = u[i + 1, j] + 1
|
||
|
return u
|
||
|
|
||
|
size = 4
|
||
|
u = np.zeros((size, size))
|
||
|
cptypes = (numba.float64[:, ::1], types.int64)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 2)
|
||
|
self.check(test_impl, u, size)
|
||
|
|
||
|
def test_prange_reverse_order1(self):
|
||
|
# Testing if reversed loop index usage as array index
|
||
|
# prevents fusion.
|
||
|
def test_impl(a, b, size):
|
||
|
for i in numba.prange(size):
|
||
|
for j in range(size):
|
||
|
a[i, j] = b[i, j] + 1
|
||
|
for i in numba.prange(size):
|
||
|
for j in range(size):
|
||
|
b[j, i] = 3
|
||
|
return a[0, 0] + b[0, 0]
|
||
|
|
||
|
size = 10
|
||
|
a = np.zeros((size, size))
|
||
|
b = np.zeros((size, size))
|
||
|
cptypes = (numba.float64[:, ::1], numba.float64[:, ::1], types.int64)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 2)
|
||
|
self.check(test_impl, a, b, size)
|
||
|
|
||
|
def test_prange_parfor_index_then_not(self):
|
||
|
# Testing if accessing an array first with a parfor index then
|
||
|
# without will prevent fusion.
|
||
|
def test_impl(a, size):
|
||
|
b = 0
|
||
|
for i in numba.prange(size):
|
||
|
a[i] = i
|
||
|
for i in numba.prange(size):
|
||
|
b += a[5]
|
||
|
return b
|
||
|
|
||
|
size = 10
|
||
|
a = np.zeros(size)
|
||
|
cptypes = (numba.float64[:], types.int64)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 2)
|
||
|
self.check(test_impl, a, size)
|
||
|
|
||
|
def test_prange_parfor_index_const_tuple_fusion(self):
|
||
|
# Testing if accessing a tuple with prange index
|
||
|
# and later with a constant will not prevent fusion.
|
||
|
def test_impl(a, tup, size):
|
||
|
acc = 0
|
||
|
for i in numba.prange(size):
|
||
|
a[i] = i + tup[i]
|
||
|
for i in numba.prange(size):
|
||
|
acc += a[i] + tup[1]
|
||
|
return acc
|
||
|
|
||
|
size = 10
|
||
|
a = np.zeros(size)
|
||
|
b = tuple(a)
|
||
|
cptypes = (numba.float64[:],
|
||
|
types.containers.UniTuple(types.float64, size),
|
||
|
types.intp)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 1)
|
||
|
self.check(test_impl, a, b, size)
|
||
|
|
||
|
def test_prange_non_parfor_index_then_opposite(self):
|
||
|
# Testing if accessing an array first without a parfor index then
|
||
|
# with will prevent fusion.
|
||
|
def test_impl(a, b, size):
|
||
|
for i in numba.prange(size):
|
||
|
b[i] = a[5]
|
||
|
for i in numba.prange(size):
|
||
|
a[i] = i
|
||
|
# Need this to stop previous prange from being optimized away.
|
||
|
b[0] += a[0]
|
||
|
return b
|
||
|
|
||
|
size = 10
|
||
|
a = np.zeros(size)
|
||
|
b = np.zeros(size)
|
||
|
cptypes = (numba.float64[:], numba.float64[:], types.int64)
|
||
|
self.assertEqual(countParfors(test_impl, cptypes), 2)
|
||
|
self.check(test_impl, a, b, size)
|
||
|
|
||
|
def test_prange_optional(self):
|
||
|
def test_impl(arr, pred=None):
|
||
|
for i in prange(1):
|
||
|
if pred is not None:
|
||
|
arr[i] = 0.0
|
||
|
|
||
|
arr = np.ones(10)
|
||
|
self.check(test_impl, arr, None,
|
||
|
check_arg_equality=[np.testing.assert_almost_equal,
|
||
|
lambda x, y: x == y])
|
||
|
self.assertEqual(arr.sum(), 10.0)
|
||
|
|
||
|
def test_untraced_value_tuple(self):
|
||
|
# This is a test for issue #6478.
|
||
|
def test_impl():
|
||
|
a = (1.2, 1.3)
|
||
|
return a[0]
|
||
|
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
def test_recursive_untraced_value_tuple(self):
|
||
|
# This is a test for issue #6478.
|
||
|
def test_impl():
|
||
|
a = ((1.2, 1.3),)
|
||
|
return a[0][0]
|
||
|
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
def test_untraced_value_parfor(self):
|
||
|
# This is a test for issue #6478.
|
||
|
def test_impl(arr):
|
||
|
a = (1.2, 1.3)
|
||
|
n1 = len(arr)
|
||
|
arr2 = np.empty(n1, np.float64)
|
||
|
for i in prange(n1):
|
||
|
arr2[i] = arr[i] * a[0]
|
||
|
n2 = len(arr2)
|
||
|
arr3 = np.empty(n2, np.float64)
|
||
|
for j in prange(n2):
|
||
|
arr3[j] = arr2[j] - a[1]
|
||
|
total = 0.0
|
||
|
n3 = len(arr3)
|
||
|
for k in prange(n3):
|
||
|
total += arr3[k]
|
||
|
return total + a[0]
|
||
|
|
||
|
arg = (types.Array(types.int64, 1, 'C'), )
|
||
|
self.assertEqual(countParfors(test_impl, arg), 1)
|
||
|
|
||
|
arr = np.arange(10, dtype=np.int64)
|
||
|
self.check(test_impl, arr)
|
||
|
|
||
|
def test_setitem_2d_one_replaced(self):
|
||
|
# issue7843
|
||
|
def test_impl(x):
|
||
|
count = 0
|
||
|
for n in range(x.shape[0]):
|
||
|
# Useless "if" necessary to trigger bug.
|
||
|
if n:
|
||
|
n
|
||
|
x[count, :] = 1
|
||
|
count += 1
|
||
|
return x
|
||
|
|
||
|
self.check(test_impl, np.zeros((3, 1)))
|
||
|
|
||
|
def test_1array_control_flow(self):
|
||
|
# issue8146
|
||
|
def test_impl(arr, flag1, flag2):
|
||
|
inv = np.arange(arr.size)
|
||
|
if flag1:
|
||
|
return inv.astype(np.float64)
|
||
|
if flag2:
|
||
|
ret = inv[inv]
|
||
|
else:
|
||
|
ret = inv[inv - 1]
|
||
|
return ret / arr.size
|
||
|
|
||
|
arr = np.arange(100)
|
||
|
self.check(test_impl, arr, True, False)
|
||
|
self.check(test_impl, arr, True, True)
|
||
|
self.check(test_impl, arr, False, False)
|
||
|
|
||
|
def test_2array_1_control_flow(self):
|
||
|
# issue8146
|
||
|
def test_impl(arr, l, flag):
|
||
|
inv1 = np.arange(arr.size)
|
||
|
inv2 = np.arange(l, arr.size + l)
|
||
|
if flag:
|
||
|
ret = inv1[inv1]
|
||
|
else:
|
||
|
ret = inv1[inv1 - 1]
|
||
|
return ret / inv2
|
||
|
|
||
|
arr = np.arange(100)
|
||
|
self.check(test_impl, arr, 10, True)
|
||
|
self.check(test_impl, arr, 10, False)
|
||
|
|
||
|
def test_2array_2_control_flow(self):
|
||
|
# issue8146
|
||
|
def test_impl(arr, l, flag):
|
||
|
inv1 = np.arange(arr.size)
|
||
|
inv2 = np.arange(l, arr.size + l)
|
||
|
if flag:
|
||
|
ret1 = inv1[inv1]
|
||
|
ret2 = inv2[inv1]
|
||
|
else:
|
||
|
ret1 = inv1[inv1 - 1]
|
||
|
ret2 = inv2[inv1 - 1]
|
||
|
return ret1 / ret2
|
||
|
|
||
|
arr = np.arange(100)
|
||
|
self.check(test_impl, arr, 10, True)
|
||
|
self.check(test_impl, arr, 10, False)
|
||
|
|
||
|
def test_issue8515(self):
|
||
|
# issue8515: an array is filled in the first prange and
|
||
|
# then accessed with c[i - 1] in the next prange which
|
||
|
# should prevent fusion with the previous prange.
|
||
|
def test_impl(n):
|
||
|
r = np.zeros(n, dtype=np.intp)
|
||
|
c = np.zeros(n, dtype=np.intp)
|
||
|
for i in prange(n):
|
||
|
for j in range(i):
|
||
|
c[i] += 1
|
||
|
|
||
|
for i in prange(n):
|
||
|
if i == 0:
|
||
|
continue
|
||
|
r[i] = c[i] - c[i - 1]
|
||
|
return r[1:]
|
||
|
|
||
|
self.check(test_impl, 15)
|
||
|
self.assertEqual(countParfors(test_impl, (types.int64, )), 2)
|
||
|
|
||
|
def test_issue9029(self):
|
||
|
# issue9029: too many parfors executed in one function
|
||
|
# overflowed the stack.
|
||
|
def test_impl(i1, i2):
|
||
|
N = 30
|
||
|
S = 3
|
||
|
a = np.empty((N,N))
|
||
|
# The stack should overflow if there are 30*30*2 (# of parfors)
|
||
|
# iterations.
|
||
|
for y in range(N):
|
||
|
for x in range(N):
|
||
|
values = np.ones(S)
|
||
|
v = values[0]
|
||
|
|
||
|
p2 = np.empty(S)
|
||
|
for i in prange(i1, i2):
|
||
|
p2[i] = 1
|
||
|
j = p2[0]
|
||
|
|
||
|
a[y,x] = v + j
|
||
|
return a
|
||
|
|
||
|
# We pass in 0 and 3 so that the function can't analyze the loop
|
||
|
# bounds on the prange to generate a signed loop whereas the
|
||
|
# np.ones will be an unsigned loop.
|
||
|
self.check(test_impl, 0, 3)
|
||
|
|
||
|
def test_fusion_no_side_effects(self):
|
||
|
def test_impl(a, b):
|
||
|
X = np.ones(100)
|
||
|
b = math.ceil(b)
|
||
|
Y = np.ones(100)
|
||
|
c = int(max(a, b))
|
||
|
return X + Y + c
|
||
|
self.check(test_impl, 3.7, 4.3)
|
||
|
self.assertEqual(countParfors(test_impl, (types.float64, types.float64)), 1)
|
||
|
|
||
|
def test_issue9256_lower_sroa_conflict(self):
|
||
|
@njit(parallel=True)
|
||
|
def def_in_loop(x):
|
||
|
c = 0
|
||
|
set_num_threads(1)
|
||
|
for i in prange(x):
|
||
|
c = i
|
||
|
return c
|
||
|
|
||
|
self.assertEqual(def_in_loop(10), def_in_loop.py_func(10))
|
||
|
|
||
|
def test_issue9256_lower_sroa_conflict_variant1(self):
|
||
|
def def_in_loop(x):
|
||
|
c = x
|
||
|
set_num_threads(1)
|
||
|
for _i in prange(x):
|
||
|
if c: # forces 3 SSA versions
|
||
|
d = x + 4
|
||
|
return c, d > 0
|
||
|
|
||
|
expected = def_in_loop(4)
|
||
|
self.assertEqual(expected, njit(parallel=False)(def_in_loop)(4))
|
||
|
self.assertEqual(expected, njit(parallel=True)(def_in_loop)(4))
|
||
|
|
||
|
def test_issue9256_lower_sroa_conflict_variant2(self):
|
||
|
def def_in_loop(x):
|
||
|
c = x
|
||
|
set_num_threads(1)
|
||
|
for _i in prange(x):
|
||
|
if c:
|
||
|
for _j in range(x): # forces 4 SSA versions
|
||
|
d = x + 4
|
||
|
return c, d > 0
|
||
|
|
||
|
expected = def_in_loop(4)
|
||
|
self.assertEqual(expected, njit(parallel=False)(def_in_loop)(4))
|
||
|
self.assertEqual(expected, njit(parallel=True)(def_in_loop)(4))
|
||
|
|
||
|
@needs_lapack # use of np.linalg.solve
|
||
|
@skip_ppc64le_invalid_ctr_loop
|
||
|
def test_issue9490_non_det_ssa_problem(self):
|
||
|
cmd = [
|
||
|
sys.executable,
|
||
|
"-m",
|
||
|
"numba.tests.parfor_iss9490_usecase",
|
||
|
]
|
||
|
|
||
|
try:
|
||
|
subp.check_output(cmd, env={**os.environ,
|
||
|
"PYTHONHASHSEED": "1",
|
||
|
"NUMBA_NUM_THREADS": "1"},
|
||
|
stderr=subp.STDOUT,
|
||
|
encoding='utf-8')
|
||
|
except subp.CalledProcessError as e:
|
||
|
msg = f"subprocess failed with output:\n{e.output}"
|
||
|
self.fail(msg=msg)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsLeaks(MemoryLeakMixin, TestParforsBase):
|
||
|
def check(self, pyfunc, *args, **kwargs):
|
||
|
cfunc, cpfunc = self.compile_all(pyfunc, *args)
|
||
|
self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
|
||
|
|
||
|
def test_reduction(self):
|
||
|
# issue4299
|
||
|
def test_impl(arr):
|
||
|
return arr.sum()
|
||
|
|
||
|
arr = np.arange(10).astype(np.float64)
|
||
|
self.check(test_impl, arr)
|
||
|
|
||
|
def test_multiple_reduction_vars(self):
|
||
|
|
||
|
def test_impl(arr):
|
||
|
a = 0.
|
||
|
b = 1.
|
||
|
for i in prange(arr.size):
|
||
|
a += arr[i]
|
||
|
b += 1. / (arr[i] + 1)
|
||
|
return a * b
|
||
|
arr = np.arange(10).astype(np.float64)
|
||
|
self.check(test_impl, arr)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsSlice(TestParforsBase):
|
||
|
|
||
|
def test_parfor_slice1(self):
|
||
|
def test_impl(a):
|
||
|
(n,) = a.shape
|
||
|
b = a[0:n-2] + a[1:n-1]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.ones(10))
|
||
|
|
||
|
def test_parfor_slice2(self):
|
||
|
def test_impl(a, m):
|
||
|
(n,) = a.shape
|
||
|
b = a[0:n-2] + a[1:m]
|
||
|
return b
|
||
|
|
||
|
# runtime assertion should succeed
|
||
|
self.check(test_impl, np.ones(10), 9)
|
||
|
# next we expect failure
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
njit(parallel=True)(test_impl)(np.ones(10),10)
|
||
|
self.assertIn("do not match", str(raises.exception))
|
||
|
|
||
|
def test_parfor_slice3(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a[0:m-1,0:n-1] + a[1:m,1:n]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.ones((4,3)))
|
||
|
|
||
|
def test_parfor_slice4(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a[:,0:n-1] + a[:,1:n]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.ones((4,3)))
|
||
|
|
||
|
def test_parfor_slice5(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a[0:m-1,:] + a[1:m,:]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.ones((4,3)))
|
||
|
|
||
|
def test_parfor_slice6(self):
|
||
|
def test_impl(a):
|
||
|
b = a.transpose()
|
||
|
c = a[1,:] + b[:,1]
|
||
|
return c
|
||
|
|
||
|
self.check(test_impl, np.ones((4,3)))
|
||
|
|
||
|
def test_parfor_slice7(self):
|
||
|
def test_impl(a):
|
||
|
b = a.transpose()
|
||
|
c = a[1,:] + b[1,:]
|
||
|
return c
|
||
|
|
||
|
# runtime check should succeed
|
||
|
self.check(test_impl, np.ones((3,3)))
|
||
|
# next we expect failure
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
njit(parallel=True)(test_impl)(np.ones((3,4)))
|
||
|
self.assertIn("do not match", str(raises.exception))
|
||
|
|
||
|
@disabled_test
|
||
|
def test_parfor_slice8(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.transpose()
|
||
|
b[1:m,1:n] = a[1:m,1:n]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(9).reshape((3,3)))
|
||
|
|
||
|
@disabled_test
|
||
|
def test_parfor_slice9(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.transpose()
|
||
|
b[1:n,1:m] = a[:,1:m]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(12).reshape((3,4)))
|
||
|
|
||
|
@disabled_test
|
||
|
def test_parfor_slice10(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.transpose()
|
||
|
b[2,1:m] = a[2,1:m]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(9).reshape((3,3)))
|
||
|
|
||
|
def test_parfor_slice11(self):
|
||
|
def test_impl(a):
|
||
|
(m,n,l) = a.shape
|
||
|
b = a.copy()
|
||
|
b[:,1,1:l] = a[:,2,1:l]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(27).reshape((3,3,3)))
|
||
|
|
||
|
def test_parfor_slice12(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.copy()
|
||
|
b[1,1:-1] = a[0,:-2]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(12).reshape((3,4)))
|
||
|
|
||
|
def test_parfor_slice13(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.copy()
|
||
|
c = -1
|
||
|
b[1,1:c] = a[0,-n:c-1]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(12).reshape((3,4)))
|
||
|
|
||
|
def test_parfor_slice14(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.copy()
|
||
|
b[1,:-1] = a[0,-3:4]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(12).reshape((3,4)))
|
||
|
|
||
|
def test_parfor_slice15(self):
|
||
|
def test_impl(a):
|
||
|
(m,n) = a.shape
|
||
|
b = a.copy()
|
||
|
b[1,-(n-1):] = a[0,-3:4]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(12).reshape((3,4)))
|
||
|
|
||
|
@disabled_test
|
||
|
def test_parfor_slice16(self):
|
||
|
""" This test is disabled because if n is larger than the array size
|
||
|
then n and n-1 will both be the end of the array and thus the
|
||
|
slices will in fact be of different sizes and unable to fuse.
|
||
|
"""
|
||
|
def test_impl(a, b, n):
|
||
|
assert(a.shape == b.shape)
|
||
|
a[1:n] = 10
|
||
|
b[0:(n-1)] = 10
|
||
|
return a * b
|
||
|
|
||
|
self.check(test_impl, np.ones(10), np.zeros(10), 8)
|
||
|
args = (numba.float64[:], numba.float64[:], numba.int64)
|
||
|
self.assertEqual(countParfors(test_impl, args), 2)
|
||
|
|
||
|
def test_parfor_slice17(self):
|
||
|
def test_impl(m, A):
|
||
|
B = np.zeros(m)
|
||
|
n = len(A)
|
||
|
B[-n:] = A
|
||
|
return B
|
||
|
|
||
|
self.check(test_impl, 10, np.ones(10))
|
||
|
|
||
|
def test_parfor_slice18(self):
|
||
|
# issue 3534
|
||
|
def test_impl():
|
||
|
a = np.zeros(10)
|
||
|
a[1:8] = np.arange(0, 7)
|
||
|
y = a[3]
|
||
|
return y
|
||
|
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_parfor_slice19(self):
|
||
|
# issues #3561 and #3554, empty slice binop
|
||
|
def test_impl(X):
|
||
|
X[:0] += 1
|
||
|
return X
|
||
|
|
||
|
self.check(test_impl, np.ones(10))
|
||
|
|
||
|
def test_parfor_slice20(self):
|
||
|
# issue #4075, slice size
|
||
|
def test_impl():
|
||
|
a = np.ones(10)
|
||
|
c = a[1:]
|
||
|
s = len(c)
|
||
|
return s
|
||
|
|
||
|
self.check(test_impl, check_scheduling=False)
|
||
|
|
||
|
def test_parfor_slice21(self):
|
||
|
def test_impl(x1, x2):
|
||
|
x1 = x1.reshape(x1.size, 1)
|
||
|
x2 = x2.reshape(x2.size, 1)
|
||
|
return x1 >= x2[:-1, :]
|
||
|
|
||
|
x1 = np.random.rand(5)
|
||
|
x2 = np.random.rand(6)
|
||
|
self.check(test_impl, x1, x2)
|
||
|
|
||
|
def test_parfor_slice22(self):
|
||
|
def test_impl(x1, x2):
|
||
|
b = np.zeros((10,))
|
||
|
for i in prange(1):
|
||
|
b += x1[:, x2]
|
||
|
return b
|
||
|
|
||
|
x1 = np.zeros((10,7))
|
||
|
x2 = np.array(4)
|
||
|
self.check(test_impl, x1, x2)
|
||
|
|
||
|
def test_parfor_slice23(self):
|
||
|
# issue #4630
|
||
|
def test_impl(x):
|
||
|
x[:0] = 2
|
||
|
return x
|
||
|
|
||
|
self.check(test_impl, np.ones(10))
|
||
|
|
||
|
def test_parfor_slice24(self):
|
||
|
def test_impl(m, A, n):
|
||
|
B = np.zeros(m)
|
||
|
C = B[n:]
|
||
|
C = A[:len(C)]
|
||
|
return B
|
||
|
|
||
|
for i in range(-15, 15):
|
||
|
self.check(test_impl, 10, np.ones(10), i)
|
||
|
|
||
|
def test_parfor_slice25(self):
|
||
|
def test_impl(m, A, n):
|
||
|
B = np.zeros(m)
|
||
|
C = B[:n]
|
||
|
C = A[:len(C)]
|
||
|
return B
|
||
|
|
||
|
for i in range(-15, 15):
|
||
|
self.check(test_impl, 10, np.ones(10), i)
|
||
|
|
||
|
def test_parfor_slice26(self):
|
||
|
def test_impl(a):
|
||
|
(n,) = a.shape
|
||
|
b = a.copy()
|
||
|
b[-(n-1):] = a[-3:4]
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl, np.arange(4))
|
||
|
|
||
|
def test_parfor_slice27(self):
|
||
|
# issue5601: tests array analysis of the slice with
|
||
|
# n_valid_vals of unknown size.
|
||
|
def test_impl(a):
|
||
|
n_valid_vals = 0
|
||
|
|
||
|
for i in prange(a.shape[0]):
|
||
|
if a[i] != 0:
|
||
|
n_valid_vals += 1
|
||
|
|
||
|
if n_valid_vals:
|
||
|
unused = a[:n_valid_vals]
|
||
|
|
||
|
return 0
|
||
|
|
||
|
self.check(test_impl, np.arange(3))
|
||
|
|
||
|
def test_parfor_array_access_lower_slice(self):
|
||
|
for ts in [slice(1, 3, None), slice(2, None, None), slice(None, 2, -1),
|
||
|
slice(None, None, None), slice(None, None, -2)]:
|
||
|
|
||
|
def test_impl(n):
|
||
|
X = np.arange(n * 4).reshape((n, 4))
|
||
|
y = 0
|
||
|
for i in numba.prange(n):
|
||
|
y += X[i, ts].sum()
|
||
|
return y
|
||
|
|
||
|
n = 10
|
||
|
self.check(test_impl, n)
|
||
|
|
||
|
X = np.arange(n * 4).reshape((n, 4))
|
||
|
|
||
|
def test_impl(X):
|
||
|
y = 0
|
||
|
for i in numba.prange(X.shape[0]):
|
||
|
y += X[i, ts].sum()
|
||
|
return y
|
||
|
|
||
|
self.check(test_impl, X)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsOptions(TestParforsBase):
|
||
|
|
||
|
def test_parfor_options(self):
|
||
|
def test_impl(a):
|
||
|
n = a.shape[0]
|
||
|
b = np.ones(n)
|
||
|
c = np.array([ i for i in range(n) ])
|
||
|
b[:n] = a + b * c
|
||
|
for i in prange(n):
|
||
|
c[i] = b[i] * a[i]
|
||
|
return reduce(lambda x,y:x+y, c, 0)
|
||
|
|
||
|
self.check(test_impl, np.ones(10))
|
||
|
args = (numba.float64[:],)
|
||
|
# everything should fuse with default option
|
||
|
self.assertEqual(countParfors(test_impl, args), 1)
|
||
|
# with no fusion
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False), 6)
|
||
|
# with no fusion, comprehension
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False,
|
||
|
comprehension=False), 5)
|
||
|
#with no fusion, comprehension, setitem
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False,
|
||
|
comprehension=False, setitem=False), 4)
|
||
|
# with no fusion, comprehension, prange
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False,
|
||
|
comprehension=False, setitem=False, prange=False), 3)
|
||
|
# with no fusion, comprehension, prange, reduction
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False,
|
||
|
comprehension=False, setitem=False, prange=False,
|
||
|
reduction=False), 2)
|
||
|
# with no fusion, comprehension, prange, reduction, numpy
|
||
|
self.assertEqual(countParfors(test_impl, args, fusion=False,
|
||
|
comprehension=False, setitem=False, prange=False,
|
||
|
reduction=False, numpy=False), 0)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsBitMask(TestParforsBase):
|
||
|
|
||
|
def test_parfor_bitmask1(self):
|
||
|
def test_impl(a, n):
|
||
|
b = a > n
|
||
|
a[b] = 0
|
||
|
return a
|
||
|
|
||
|
self.check(test_impl, np.arange(10), 5)
|
||
|
|
||
|
def test_parfor_bitmask2(self):
|
||
|
def test_impl(a, b):
|
||
|
a[b] = 0
|
||
|
return a
|
||
|
|
||
|
a = np.arange(10)
|
||
|
b = a > 5
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_parfor_bitmask3(self):
|
||
|
def test_impl(a, b):
|
||
|
a[b] = a[b]
|
||
|
return a
|
||
|
|
||
|
a = np.arange(10)
|
||
|
b = a > 5
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_parfor_bitmask4(self):
|
||
|
def test_impl(a, b):
|
||
|
a[b] = (2 * a)[b]
|
||
|
return a
|
||
|
|
||
|
a = np.arange(10)
|
||
|
b = a > 5
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_parfor_bitmask5(self):
|
||
|
def test_impl(a, b):
|
||
|
a[b] = a[b] * a[b]
|
||
|
return a
|
||
|
|
||
|
a = np.arange(10)
|
||
|
b = a > 5
|
||
|
self.check(test_impl, a, b)
|
||
|
|
||
|
def test_parfor_bitmask6(self):
|
||
|
def test_impl(a, b, c):
|
||
|
a[b] = c
|
||
|
return a
|
||
|
|
||
|
a = np.arange(10)
|
||
|
b = a > 5
|
||
|
c = np.zeros(sum(b))
|
||
|
|
||
|
# expect failure due to lack of parallelism
|
||
|
with self.assertRaises(AssertionError) as raises:
|
||
|
self.check(test_impl, a, b, c)
|
||
|
self.assertIn("\'@do_scheduling\' not found", str(raises.exception))
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsMisc(TestParforsBase):
|
||
|
"""
|
||
|
Tests miscellaneous parts of ParallelAccelerator use.
|
||
|
"""
|
||
|
def test_no_warn_if_cache_set(self):
|
||
|
|
||
|
def pyfunc():
|
||
|
arr = np.ones(100)
|
||
|
for i in prange(arr.size):
|
||
|
arr[i] += i
|
||
|
return arr
|
||
|
|
||
|
cfunc = njit(parallel=True, cache=True)(pyfunc)
|
||
|
|
||
|
with warnings.catch_warnings(record=True) as raised_warnings:
|
||
|
warnings.simplefilter('always')
|
||
|
warnings.filterwarnings(action="ignore",
|
||
|
module="typeguard")
|
||
|
# Filter out warnings about TBB interface mismatch
|
||
|
warnings.filterwarnings(action='ignore',
|
||
|
message=r".*TBB_INTERFACE_VERSION.*",
|
||
|
category=numba.errors.NumbaWarning,
|
||
|
module=r'numba\.np\.ufunc\.parallel.*')
|
||
|
cfunc()
|
||
|
|
||
|
self.assertEqual(len(raised_warnings), 0)
|
||
|
|
||
|
# Make sure the dynamic globals flag is set
|
||
|
has_dynamic_globals = [cres.library.has_dynamic_globals
|
||
|
for cres in cfunc.overloads.values()]
|
||
|
self.assertEqual(has_dynamic_globals, [False])
|
||
|
|
||
|
def test_statement_reordering_respects_aliasing(self):
|
||
|
def impl():
|
||
|
a = np.zeros(10)
|
||
|
a[1:8] = np.arange(0, 7)
|
||
|
print('a[3]:', a[3])
|
||
|
print('a[3]:', a[3])
|
||
|
return a
|
||
|
|
||
|
cres = self.compile_parallel(impl, ())
|
||
|
with captured_stdout() as stdout:
|
||
|
cres.entry_point()
|
||
|
for line in stdout.getvalue().splitlines():
|
||
|
self.assertEqual('a[3]: 2.0', line)
|
||
|
|
||
|
def test_parfor_ufunc_typing(self):
|
||
|
def test_impl(A):
|
||
|
return np.isinf(A)
|
||
|
|
||
|
A = np.array([np.inf, 0.0])
|
||
|
cfunc = njit(parallel=True)(test_impl)
|
||
|
# save global state
|
||
|
old_seq_flag = numba.parfors.parfor.sequential_parfor_lowering
|
||
|
try:
|
||
|
numba.parfors.parfor.sequential_parfor_lowering = True
|
||
|
np.testing.assert_array_equal(test_impl(A), cfunc(A))
|
||
|
finally:
|
||
|
# recover global state
|
||
|
numba.parfors.parfor.sequential_parfor_lowering = old_seq_flag
|
||
|
|
||
|
def test_init_block_dce(self):
|
||
|
# issue4690
|
||
|
def test_impl():
|
||
|
res = 0
|
||
|
arr = [1,2,3,4,5]
|
||
|
numba.parfors.parfor.init_prange()
|
||
|
dummy = arr
|
||
|
for i in numba.prange(5):
|
||
|
res += arr[i]
|
||
|
return res + dummy[2]
|
||
|
|
||
|
self.assertEqual(get_init_block_size(test_impl, ()), 0)
|
||
|
|
||
|
def test_alias_analysis_for_parfor1(self):
|
||
|
def test_impl():
|
||
|
acc = 0
|
||
|
for _ in range(4):
|
||
|
acc += 1
|
||
|
|
||
|
data = np.zeros((acc,))
|
||
|
return data
|
||
|
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_no_state_change_in_gufunc_lowering_on_error(self):
|
||
|
# tests #5098, if there's an exception arising in gufunc lowering the
|
||
|
# sequential_parfor_lowering global variable should remain as False on
|
||
|
# stack unwind.
|
||
|
|
||
|
BROKEN_MSG = 'BROKEN_MSG'
|
||
|
|
||
|
@register_pass(mutates_CFG=True, analysis_only=False)
|
||
|
class BreakParfors(AnalysisPass):
|
||
|
_name = "break_parfors"
|
||
|
|
||
|
def __init__(self):
|
||
|
AnalysisPass.__init__(self)
|
||
|
|
||
|
def run_pass(self, state):
|
||
|
for blk in state.func_ir.blocks.values():
|
||
|
for stmt in blk.body:
|
||
|
if isinstance(stmt, numba.parfors.parfor.Parfor):
|
||
|
# races should be a set(), that list is iterable
|
||
|
# permits it to get through to the
|
||
|
# _create_gufunc_for_parfor_body routine at which
|
||
|
# point it needs to be a set so e.g. set.difference
|
||
|
# can be computed, this therefore creates an error
|
||
|
# in the right location.
|
||
|
class Broken(list):
|
||
|
|
||
|
def difference(self, other):
|
||
|
raise errors.LoweringError(BROKEN_MSG)
|
||
|
|
||
|
stmt.races = Broken()
|
||
|
return True
|
||
|
|
||
|
|
||
|
class BreakParforsCompiler(CompilerBase):
|
||
|
|
||
|
def define_pipelines(self):
|
||
|
pm = DefaultPassBuilder.define_nopython_pipeline(self.state)
|
||
|
pm.add_pass_after(BreakParfors, IRLegalization)
|
||
|
pm.finalize()
|
||
|
return [pm]
|
||
|
|
||
|
|
||
|
@njit(parallel=True, pipeline_class=BreakParforsCompiler)
|
||
|
def foo():
|
||
|
x = 1
|
||
|
for _ in prange(1):
|
||
|
x += 1
|
||
|
return x
|
||
|
|
||
|
# assert default state for global
|
||
|
self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
|
||
|
|
||
|
with self.assertRaises(errors.LoweringError) as raises:
|
||
|
foo()
|
||
|
|
||
|
self.assertIn(BROKEN_MSG, str(raises.exception))
|
||
|
|
||
|
# assert state has not changed
|
||
|
self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
|
||
|
|
||
|
def test_issue_5098(self):
|
||
|
class DummyType(types.Opaque):
|
||
|
pass
|
||
|
|
||
|
dummy_type = DummyType("my_dummy")
|
||
|
register_model(DummyType)(models.OpaqueModel)
|
||
|
|
||
|
class Dummy(object):
|
||
|
pass
|
||
|
|
||
|
@typeof_impl.register(Dummy)
|
||
|
def typeof_Dummy(val, c):
|
||
|
return dummy_type
|
||
|
|
||
|
@unbox(DummyType)
|
||
|
def unbox_index(typ, obj, c):
|
||
|
return NativeValue(c.context.get_dummy_value())
|
||
|
|
||
|
@overload_method(DummyType, "method1", jit_options={"parallel":True})
|
||
|
def _get_method1(obj, arr, func):
|
||
|
def _foo(obj, arr, func):
|
||
|
def baz(a, f):
|
||
|
c = a.copy()
|
||
|
c[np.isinf(a)] = np.nan
|
||
|
return f(c)
|
||
|
|
||
|
length = len(arr)
|
||
|
output_arr = np.empty(length, dtype=np.float64)
|
||
|
for i in prange(length):
|
||
|
output_arr[i] = baz(arr[i], func)
|
||
|
for i in prange(length - 1):
|
||
|
output_arr[i] += baz(arr[i], func)
|
||
|
return output_arr
|
||
|
return _foo
|
||
|
|
||
|
@njit
|
||
|
def bar(v):
|
||
|
return v.mean()
|
||
|
|
||
|
@njit
|
||
|
def test1(d):
|
||
|
return d.method1(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), bar)
|
||
|
|
||
|
save_state = numba.parfors.parfor.sequential_parfor_lowering
|
||
|
self.assertFalse(save_state)
|
||
|
try:
|
||
|
test1(Dummy())
|
||
|
self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering)
|
||
|
finally:
|
||
|
# always set the sequential_parfor_lowering state back to the
|
||
|
# original state
|
||
|
numba.parfors.parfor.sequential_parfor_lowering = save_state
|
||
|
|
||
|
def test_oversized_tuple_as_arg_to_kernel(self):
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def oversize_tuple(idx):
|
||
|
big_tup = (1,2,3,4)
|
||
|
z = 0
|
||
|
for x in prange(10):
|
||
|
z += big_tup[idx]
|
||
|
return z
|
||
|
|
||
|
with override_env_config('NUMBA_PARFOR_MAX_TUPLE_SIZE', '3'):
|
||
|
with self.assertRaises(errors.UnsupportedParforsError) as raises:
|
||
|
oversize_tuple(0)
|
||
|
|
||
|
errstr = str(raises.exception)
|
||
|
self.assertIn("Use of a tuple", errstr)
|
||
|
self.assertIn("in a parallel region", errstr)
|
||
|
|
||
|
def test_issue5167(self):
|
||
|
|
||
|
def ndvi_njit(img_nir, img_red):
|
||
|
fillvalue = 0
|
||
|
out_img = np.full(img_nir.shape, fillvalue, dtype=img_nir.dtype)
|
||
|
dims = img_nir.shape
|
||
|
for y in prange(dims[0]):
|
||
|
for x in prange(dims[1]):
|
||
|
out_img[y, x] = ((img_nir[y, x] - img_red[y, x]) /
|
||
|
(img_nir[y, x] + img_red[y, x]))
|
||
|
return out_img
|
||
|
|
||
|
tile_shape = (4, 4)
|
||
|
array1 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
|
||
|
array2 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape)
|
||
|
self.check(ndvi_njit, array1, array2)
|
||
|
|
||
|
def test_issue5065(self):
|
||
|
|
||
|
def reproducer(a, dist, dist_args):
|
||
|
result = np.zeros((a.shape[0], a.shape[0]), dtype=np.float32)
|
||
|
for i in prange(a.shape[0]):
|
||
|
for j in range(i + 1, a.shape[0]):
|
||
|
d = dist(a[i], a[j], *dist_args)
|
||
|
result[i, j] = d
|
||
|
result[j, i] = d
|
||
|
return result
|
||
|
|
||
|
@njit
|
||
|
def euclidean(x, y):
|
||
|
result = 0.0
|
||
|
for i in range(x.shape[0]):
|
||
|
result += (x[i] - y[i]) ** 2
|
||
|
return np.sqrt(result)
|
||
|
|
||
|
a = np.random.random(size=(5, 2))
|
||
|
|
||
|
got = njit(parallel=True)(reproducer)(a.copy(), euclidean,())
|
||
|
expected = reproducer(a.copy(), euclidean,())
|
||
|
|
||
|
np.testing.assert_allclose(got, expected)
|
||
|
|
||
|
def test_issue5001(self):
|
||
|
|
||
|
def test_numba_parallel(myarray):
|
||
|
result = [0] * len(myarray)
|
||
|
for i in prange(len(myarray)):
|
||
|
result[i] = len(myarray[i])
|
||
|
return result
|
||
|
|
||
|
myarray = (np.empty(100),np.empty(50))
|
||
|
self.check(test_numba_parallel, myarray)
|
||
|
|
||
|
def test_issue3169(self):
|
||
|
|
||
|
@njit
|
||
|
def foo(grids):
|
||
|
pass
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def bar(grids):
|
||
|
for x in prange(1):
|
||
|
foo(grids)
|
||
|
|
||
|
# returns nothing, just check it compiles
|
||
|
bar(([1],) * 2)
|
||
|
|
||
|
@disabled_test
|
||
|
def test_issue4846(self):
|
||
|
|
||
|
mytype = namedtuple("mytype", ("a", "b"))
|
||
|
|
||
|
def outer(mydata):
|
||
|
for k in prange(3):
|
||
|
inner(k, mydata)
|
||
|
return mydata.a
|
||
|
|
||
|
@njit(nogil=True)
|
||
|
def inner(k, mydata):
|
||
|
f = (k, mydata.a)
|
||
|
g = (k, mydata.b)
|
||
|
|
||
|
mydata = mytype(a="a", b="b")
|
||
|
|
||
|
self.check(outer, mydata)
|
||
|
|
||
|
def test_issue3748(self):
|
||
|
|
||
|
def test1b():
|
||
|
x = (1, 2, 3, 4, 5)
|
||
|
a = 0
|
||
|
for i in prange(len(x)):
|
||
|
a += x[i]
|
||
|
return a
|
||
|
|
||
|
self.check(test1b,)
|
||
|
|
||
|
def test_issue5277(self):
|
||
|
|
||
|
def parallel_test(size, arr):
|
||
|
for x in prange(size[0]):
|
||
|
for y in prange(size[1]):
|
||
|
arr[y][x] = x * 4.5 + y
|
||
|
return arr
|
||
|
|
||
|
size = (10, 10)
|
||
|
arr = np.zeros(size, dtype=int)
|
||
|
|
||
|
self.check(parallel_test, size, arr)
|
||
|
|
||
|
def test_issue5570_ssa_races(self):
|
||
|
@njit(parallel=True)
|
||
|
def foo(src, method, out):
|
||
|
for i in prange(1):
|
||
|
for j in range(1):
|
||
|
out[i, j] = 1
|
||
|
if method:
|
||
|
out += 1
|
||
|
return out
|
||
|
|
||
|
src = np.zeros((5,5))
|
||
|
method = 57
|
||
|
out = np.zeros((2, 2))
|
||
|
|
||
|
self.assertPreciseEqual(
|
||
|
foo(src, method, out),
|
||
|
foo.py_func(src, method, out)
|
||
|
)
|
||
|
|
||
|
def test_issue6095_numpy_max(self):
|
||
|
@njit(parallel=True)
|
||
|
def find_maxima_3D_jit(args):
|
||
|
package = args
|
||
|
for index in range(0, 10):
|
||
|
z_stack = package[index, :, :]
|
||
|
return np.max(z_stack)
|
||
|
|
||
|
np.random.seed(0)
|
||
|
args = np.random.random((10, 10, 10))
|
||
|
self.assertPreciseEqual(
|
||
|
find_maxima_3D_jit(args),
|
||
|
find_maxima_3D_jit.py_func(args),
|
||
|
)
|
||
|
|
||
|
def test_issue5942_1(self):
|
||
|
# issue5942: tests statement reordering of
|
||
|
# aliased arguments.
|
||
|
def test_impl(gg, gg_next):
|
||
|
gs = gg.shape
|
||
|
d = gs[0]
|
||
|
for i_gg in prange(d):
|
||
|
gg_next[i_gg, :] = gg[i_gg, :]
|
||
|
gg_next[i_gg, 0] += 1
|
||
|
|
||
|
return gg_next
|
||
|
|
||
|
d = 4
|
||
|
k = 2
|
||
|
|
||
|
gg = np.zeros((d, k), dtype = np.int32)
|
||
|
gg_next = np.zeros((d, k), dtype = np.int32)
|
||
|
self.check(test_impl, gg, gg_next)
|
||
|
|
||
|
def test_issue5942_2(self):
|
||
|
# issue5942: tests statement reordering
|
||
|
def test_impl(d, k):
|
||
|
gg = np.zeros((d, k), dtype = np.int32)
|
||
|
gg_next = np.zeros((d, k), dtype = np.int32)
|
||
|
|
||
|
for i_gg in prange(d):
|
||
|
for n in range(k):
|
||
|
gg[i_gg, n] = i_gg
|
||
|
gg_next[i_gg, :] = gg[i_gg, :]
|
||
|
gg_next[i_gg, 0] += 1
|
||
|
|
||
|
return gg_next
|
||
|
|
||
|
d = 4
|
||
|
k = 2
|
||
|
|
||
|
self.check(test_impl, d, k)
|
||
|
|
||
|
@skip_unless_scipy
|
||
|
def test_issue6102(self):
|
||
|
# The problem is originally observed on Python3.8 because of the
|
||
|
# changes in how loops are represented in 3.8 bytecode.
|
||
|
@njit(parallel=True)
|
||
|
def f(r):
|
||
|
for ir in prange(r.shape[0]):
|
||
|
dist = np.inf
|
||
|
tr = np.array([0, 0, 0], dtype=np.float32)
|
||
|
for i in [1, 0, -1]:
|
||
|
dist_t = np.linalg.norm(r[ir, :] + i)
|
||
|
if dist_t < dist:
|
||
|
dist = dist_t
|
||
|
tr = np.array([i, i, i], dtype=np.float32)
|
||
|
r[ir, :] += tr
|
||
|
return r
|
||
|
|
||
|
r = np.array([[0., 0., 0.], [0., 0., 1.]])
|
||
|
self.assertPreciseEqual(f(r), f.py_func(r))
|
||
|
|
||
|
def test_issue6774(self):
|
||
|
|
||
|
def test_impl():
|
||
|
n = 5
|
||
|
na_mask = np.ones((n,))
|
||
|
result = np.empty((n - 1,))
|
||
|
for i in prange(len(result)):
|
||
|
result[i] = np.sum(na_mask[i:i + 1])
|
||
|
return result
|
||
|
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_issue4963_globals(self):
|
||
|
def test_impl():
|
||
|
buf = np.zeros((_GLOBAL_INT_FOR_TESTING1, _GLOBAL_INT_FOR_TESTING2))
|
||
|
return buf
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_issue4963_freevars(self):
|
||
|
_FREEVAR_INT_FOR_TESTING1 = 17
|
||
|
_FREEVAR_INT_FOR_TESTING2 = 5
|
||
|
def test_impl():
|
||
|
buf = np.zeros((_FREEVAR_INT_FOR_TESTING1, _FREEVAR_INT_FOR_TESTING2))
|
||
|
return buf
|
||
|
self.check(test_impl)
|
||
|
|
||
|
def test_issue_9182_recursion_error(self):
|
||
|
from numba.types import ListType, Tuple, intp
|
||
|
|
||
|
@numba.njit
|
||
|
def _sink(x):
|
||
|
pass
|
||
|
|
||
|
|
||
|
@numba.njit(cache=False, parallel=True)
|
||
|
def _ground_node_rule(
|
||
|
clauses,
|
||
|
nodes,
|
||
|
):
|
||
|
for piter in prange(len(nodes)):
|
||
|
for clause in clauses:
|
||
|
clause_type = clause[0]
|
||
|
clause_variables = clause[2]
|
||
|
if clause_type == 0:
|
||
|
clause_var_1 = clause_variables[0]
|
||
|
elif len(clause_variables) == 2:
|
||
|
clause_var_1, clause_var_2 = (
|
||
|
clause_variables[0],
|
||
|
clause_variables[1],
|
||
|
)
|
||
|
|
||
|
elif len(clause_variables) == 4:
|
||
|
pass
|
||
|
|
||
|
if clause_type == 1:
|
||
|
_sink(clause_var_1)
|
||
|
_sink(clause_var_2)
|
||
|
|
||
|
_ground_node_rule.compile(
|
||
|
(
|
||
|
ListType(Tuple([intp, intp, ListType(intp)])),
|
||
|
ListType(intp),
|
||
|
)
|
||
|
)
|
||
|
|
||
|
def test_lookup_cycle_detection(self):
|
||
|
# This test is added due to a bug discovered in the PR 9244 patch.
|
||
|
# The cyclic detection was incorrectly flagging cycles.
|
||
|
@njit(parallel=True)
|
||
|
def foo():
|
||
|
# The following `acc` variable is used in the `lookup()` function
|
||
|
# in parfor's reduction code.
|
||
|
acc = 0
|
||
|
for n in prange(1):
|
||
|
for i in range(1):
|
||
|
for j in range(1):
|
||
|
acc += 1
|
||
|
return acc
|
||
|
|
||
|
self.assertEqual(foo(), foo.py_func())
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforsDiagnostics(TestParforsBase):
|
||
|
|
||
|
def check(self, pyfunc, *args, **kwargs):
|
||
|
cfunc, cpfunc = self.compile_all(pyfunc, *args)
|
||
|
self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
|
||
|
|
||
|
def assert_fusion_equivalence(self, got, expected):
|
||
|
a = self._fusion_equivalent(got)
|
||
|
b = self._fusion_equivalent(expected)
|
||
|
self.assertEqual(a, b)
|
||
|
|
||
|
def _fusion_equivalent(self, thing):
|
||
|
# parfors indexes the Parfors class instance id's from wherever the
|
||
|
# internal state happens to be. To assert fusion equivalence we just
|
||
|
# check that the relative difference between fusion adjacency lists
|
||
|
# is the same. For example:
|
||
|
# {3: [2, 1]} is the same as {13: [12, 11]}
|
||
|
# this function strips the indexing etc out returning something suitable
|
||
|
# for checking equivalence
|
||
|
new = defaultdict(list)
|
||
|
min_key = min(thing.keys())
|
||
|
for k in sorted(thing.keys()):
|
||
|
new[k - min_key] = [x - min_key for x in thing[k]]
|
||
|
return new
|
||
|
|
||
|
def assert_diagnostics(self, diagnostics, parfors_count=None,
|
||
|
fusion_info=None, nested_fusion_info=None,
|
||
|
replaced_fns=None, hoisted_allocations=None):
|
||
|
if parfors_count is not None:
|
||
|
self.assertEqual(parfors_count, diagnostics.count_parfors())
|
||
|
if fusion_info is not None:
|
||
|
self.assert_fusion_equivalence(fusion_info, diagnostics.fusion_info)
|
||
|
if nested_fusion_info is not None:
|
||
|
self.assert_fusion_equivalence(nested_fusion_info,
|
||
|
diagnostics.nested_fusion_info)
|
||
|
if replaced_fns is not None:
|
||
|
repl = diagnostics.replaced_fns.values()
|
||
|
for x in replaced_fns:
|
||
|
for replaced in repl:
|
||
|
if replaced[0] == x:
|
||
|
break
|
||
|
else:
|
||
|
msg = "Replacement for %s was not found. Had %s" % (x, repl)
|
||
|
raise AssertionError(msg)
|
||
|
|
||
|
if hoisted_allocations is not None:
|
||
|
hoisted_allocs = diagnostics.hoisted_allocations()
|
||
|
self.assertEqual(hoisted_allocations, len(hoisted_allocs))
|
||
|
|
||
|
# just make sure that the dump() function doesn't have an issue!
|
||
|
with captured_stdout():
|
||
|
for x in range(1, 5):
|
||
|
diagnostics.dump(x)
|
||
|
|
||
|
def test_array_expr(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.ones(n)
|
||
|
b = np.zeros(n)
|
||
|
return a + b
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=1,
|
||
|
fusion_info = {3: [4, 5]})
|
||
|
|
||
|
def test_prange(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.empty(n)
|
||
|
for i in prange(n):
|
||
|
a[i] = i * 10
|
||
|
return a
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=1)
|
||
|
|
||
|
def test_user_varname(self):
|
||
|
"""make sure original user variable name is used in fusion info
|
||
|
"""
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
x = np.ones(n)
|
||
|
a = np.sin(x)
|
||
|
b = np.cos(a * a)
|
||
|
acc = 0
|
||
|
for i in prange(n - 2):
|
||
|
for j in prange(n - 1):
|
||
|
acc += b[i] + b[j + 1]
|
||
|
return acc
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
# make sure original 'n' variable name is used in fusion report for loop
|
||
|
# dimension mismatch
|
||
|
self.assertTrue(
|
||
|
any("slice(0, n, 1)" in r.message for r in diagnostics.fusion_reports))
|
||
|
|
||
|
def test_nested_prange(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.empty((n, n))
|
||
|
for i in prange(n):
|
||
|
for j in prange(n):
|
||
|
a[i, j] = i * 10 + j
|
||
|
return a
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=2,
|
||
|
nested_fusion_info={2: [1]})
|
||
|
|
||
|
def test_function_replacement(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.ones(n)
|
||
|
b = np.argmin(a)
|
||
|
return b
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=1,
|
||
|
fusion_info={2: [3]},
|
||
|
replaced_fns = [('argmin', 'numpy'),])
|
||
|
|
||
|
def test_reduction(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.ones(n + 1) # prevent fusion
|
||
|
acc = 0
|
||
|
for i in prange(n):
|
||
|
acc += a[i]
|
||
|
return acc
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=2)
|
||
|
|
||
|
def test_setitem(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
a = np.ones(n)
|
||
|
a[:] = 7
|
||
|
return a
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, parfors_count=1)
|
||
|
|
||
|
def test_allocation_hoisting(self):
|
||
|
def test_impl():
|
||
|
n = 10
|
||
|
m = 5
|
||
|
acc = 0
|
||
|
for i in prange(n):
|
||
|
temp = np.zeros((m,)) # the np.empty call should get hoisted
|
||
|
for j in range(m):
|
||
|
temp[j] = i
|
||
|
acc += temp[-1]
|
||
|
return acc
|
||
|
|
||
|
self.check(test_impl,)
|
||
|
cpfunc = self.compile_parallel(test_impl, ())
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
self.assert_diagnostics(diagnostics, hoisted_allocations=1)
|
||
|
|
||
|
|
||
|
class TestPrangeBase(TestParforsBase):
|
||
|
|
||
|
def generate_prange_func(self, pyfunc, patch_instance):
|
||
|
"""
|
||
|
This function does the actual code augmentation to enable the explicit
|
||
|
testing of `prange` calls in place of `range`.
|
||
|
"""
|
||
|
pyfunc_code = pyfunc.__code__
|
||
|
|
||
|
prange_names = list(pyfunc_code.co_names)
|
||
|
|
||
|
if patch_instance is None:
|
||
|
# patch all instances, cheat by just switching
|
||
|
# range for prange
|
||
|
assert 'range' in pyfunc_code.co_names
|
||
|
prange_names = tuple([x if x != 'range' else 'prange'
|
||
|
for x in pyfunc_code.co_names])
|
||
|
new_code = bytes(pyfunc_code.co_code)
|
||
|
else:
|
||
|
# patch specified instances...
|
||
|
# find where 'range' is in co_names
|
||
|
range_idx = pyfunc_code.co_names.index('range')
|
||
|
range_locations = []
|
||
|
# look for LOAD_GLOBALs that point to 'range'
|
||
|
for instr in dis.Bytecode(pyfunc_code):
|
||
|
if instr.opname == 'LOAD_GLOBAL':
|
||
|
if _fix_LOAD_GLOBAL_arg(instr.arg) == range_idx:
|
||
|
range_locations.append(instr.offset + 1)
|
||
|
# add in 'prange' ref
|
||
|
prange_names.append('prange')
|
||
|
prange_names = tuple(prange_names)
|
||
|
prange_idx = len(prange_names) - 1
|
||
|
if utils.PYVERSION in ((3, 11), (3, 12)):
|
||
|
# this is the inverse of _fix_LOAD_GLOBAL_arg
|
||
|
prange_idx = 1 + (prange_idx << 1)
|
||
|
elif utils.PYVERSION in ((3, 9), (3, 10)):
|
||
|
pass
|
||
|
else:
|
||
|
raise NotImplementedError(utils.PYVERSION)
|
||
|
new_code = bytearray(pyfunc_code.co_code)
|
||
|
assert len(patch_instance) <= len(range_locations)
|
||
|
# patch up the new byte code
|
||
|
for i in patch_instance:
|
||
|
idx = range_locations[i]
|
||
|
new_code[idx] = prange_idx
|
||
|
new_code = bytes(new_code)
|
||
|
|
||
|
# create code object with prange mutation
|
||
|
prange_code = pyfunc_code.replace(co_code=new_code,
|
||
|
co_names=prange_names)
|
||
|
|
||
|
# get function
|
||
|
pfunc = pytypes.FunctionType(prange_code, globals())
|
||
|
|
||
|
return pfunc
|
||
|
|
||
|
def prange_tester(self, pyfunc, *args, **kwargs):
|
||
|
"""
|
||
|
The `prange` tester
|
||
|
This is a hack. It basically switches out range calls for prange.
|
||
|
It does this by copying the live code object of a function
|
||
|
containing 'range' then copying the .co_names and mutating it so
|
||
|
that 'range' is replaced with 'prange'. It then creates a new code
|
||
|
object containing the mutation and instantiates a function to contain
|
||
|
it. At this point three results are created:
|
||
|
1. The result of calling the original python function.
|
||
|
2. The result of calling a njit compiled version of the original
|
||
|
python function.
|
||
|
3. The result of calling a njit(parallel=True) version of the mutated
|
||
|
function containing `prange`.
|
||
|
The three results are then compared and the `prange` based function's
|
||
|
llvm_ir is inspected to ensure the scheduler code is present.
|
||
|
|
||
|
Arguments:
|
||
|
pyfunc - the python function to test
|
||
|
args - data arguments to pass to the pyfunc under test
|
||
|
|
||
|
Keyword Arguments:
|
||
|
patch_instance - iterable containing which instances of `range` to
|
||
|
replace. If not present all instance of `range` are
|
||
|
replaced.
|
||
|
scheduler_type - 'signed', 'unsigned' or None, default is None.
|
||
|
Supply in cases where the presence of a specific
|
||
|
scheduler is to be asserted.
|
||
|
check_fastmath - if True then a check will be performed to ensure the
|
||
|
IR contains instructions labelled with 'fast'
|
||
|
check_fastmath_result - if True then a check will be performed to
|
||
|
ensure the result of running with fastmath
|
||
|
on matches that of the pyfunc
|
||
|
Remaining kwargs are passed to np.testing.assert_almost_equal
|
||
|
|
||
|
|
||
|
Example:
|
||
|
def foo():
|
||
|
acc = 0
|
||
|
for x in range(5):
|
||
|
for y in range(10):
|
||
|
acc +=1
|
||
|
return acc
|
||
|
|
||
|
# calling as
|
||
|
prange_tester(foo)
|
||
|
# will test code equivalent to
|
||
|
# def foo():
|
||
|
# acc = 0
|
||
|
# for x in prange(5): # <- changed
|
||
|
# for y in prange(10): # <- changed
|
||
|
# acc +=1
|
||
|
# return acc
|
||
|
|
||
|
# calling as
|
||
|
prange_tester(foo, patch_instance=[1])
|
||
|
# will test code equivalent to
|
||
|
# def foo():
|
||
|
# acc = 0
|
||
|
# for x in range(5): # <- outer loop (0) unchanged
|
||
|
# for y in prange(10): # <- inner loop (1) changed
|
||
|
# acc +=1
|
||
|
# return acc
|
||
|
|
||
|
"""
|
||
|
patch_instance = kwargs.pop('patch_instance', None)
|
||
|
check_fastmath = kwargs.pop('check_fastmath', False)
|
||
|
check_fastmath_result = kwargs.pop('check_fastmath_result', False)
|
||
|
|
||
|
pfunc = self.generate_prange_func(pyfunc, patch_instance)
|
||
|
|
||
|
# Compile functions
|
||
|
# compile a standard njit of the original function
|
||
|
sig = tuple([numba.typeof(x) for x in args])
|
||
|
cfunc = self.compile_njit(pyfunc, sig)
|
||
|
|
||
|
# compile the prange injected function
|
||
|
with warnings.catch_warnings(record=True) as raised_warnings:
|
||
|
warnings.simplefilter('always')
|
||
|
cpfunc = self.compile_parallel(pfunc, sig)
|
||
|
|
||
|
# if check_fastmath is True then check fast instructions
|
||
|
if check_fastmath:
|
||
|
self.assert_fastmath(pfunc, sig)
|
||
|
|
||
|
# if check_fastmath_result is True then compile a function
|
||
|
# so that the parfors checker can assert the result is ok.
|
||
|
if check_fastmath_result:
|
||
|
fastcpfunc = self.compile_parallel_fastmath(pfunc, sig)
|
||
|
kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs)
|
||
|
|
||
|
self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs)
|
||
|
return raised_warnings
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestPrangeBasic(TestPrangeBase):
|
||
|
""" Tests Prange """
|
||
|
|
||
|
def test_prange01(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.zeros(n)
|
||
|
for i in range(n):
|
||
|
A[i] = 2.0 * i
|
||
|
return A
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange02(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.zeros(n - 1)
|
||
|
for i in range(1, n):
|
||
|
A[i - 1] = 2.0 * i
|
||
|
return A
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange03(self):
|
||
|
def test_impl():
|
||
|
s = 10
|
||
|
for i in range(10):
|
||
|
s += 2
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange03mul(self):
|
||
|
def test_impl():
|
||
|
s = 3
|
||
|
for i in range(10):
|
||
|
s *= 2
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange03sub(self):
|
||
|
def test_impl():
|
||
|
s = 100
|
||
|
for i in range(10):
|
||
|
s -= 2
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange03div(self):
|
||
|
def test_impl():
|
||
|
s = 10
|
||
|
for i in range(10):
|
||
|
s /= 2
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange04(self):
|
||
|
def test_impl():
|
||
|
a = 2
|
||
|
b = 3
|
||
|
A = np.empty(4)
|
||
|
for i in range(4):
|
||
|
if i == a:
|
||
|
A[i] = b
|
||
|
else:
|
||
|
A[i] = 0
|
||
|
return A
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange05(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.ones((n), dtype=np.float64)
|
||
|
s = 0
|
||
|
for i in range(1, n - 1, 1):
|
||
|
s += A[i]
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange06(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.ones((n), dtype=np.float64)
|
||
|
s = 0
|
||
|
for i in range(1, 1, 1):
|
||
|
s += A[i]
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange07(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.ones((n), dtype=np.float64)
|
||
|
s = 0
|
||
|
for i in range(n, 1):
|
||
|
s += A[i]
|
||
|
return s
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange08(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.ones((n))
|
||
|
acc = 0
|
||
|
for i in range(len(A)):
|
||
|
for j in range(len(A)):
|
||
|
acc += A[i]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange08_1(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
A = np.ones((n))
|
||
|
acc = 0
|
||
|
for i in range(4):
|
||
|
for j in range(4):
|
||
|
acc += A[i]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange09(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
acc = 0
|
||
|
for i in range(n):
|
||
|
for j in range(n):
|
||
|
acc += 1
|
||
|
return acc
|
||
|
# patch inner loop to 'prange'
|
||
|
self.prange_tester(test_impl, patch_instance=[1],
|
||
|
scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange10(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
acc2 = 0
|
||
|
for j in range(n):
|
||
|
acc1 = 0
|
||
|
for i in range(n):
|
||
|
acc1 += 1
|
||
|
acc2 += acc1
|
||
|
return acc2
|
||
|
# patch outer loop to 'prange'
|
||
|
self.prange_tester(test_impl, patch_instance=[0],
|
||
|
scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
@unittest.skip("list append is not thread-safe yet (#2391, #2408)")
|
||
|
def test_prange11(self):
|
||
|
def test_impl():
|
||
|
n = 4
|
||
|
return [np.sin(j) for j in range(n)]
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange12(self):
|
||
|
def test_impl():
|
||
|
acc = 0
|
||
|
n = 4
|
||
|
X = np.ones(n)
|
||
|
for i in range(-len(X)):
|
||
|
acc += X[i]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange13(self):
|
||
|
def test_impl(n):
|
||
|
acc = 0
|
||
|
for i in range(n):
|
||
|
acc += 1
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange14(self):
|
||
|
def test_impl(A):
|
||
|
s = 3
|
||
|
for i in range(len(A)):
|
||
|
s += A[i]*2
|
||
|
return s
|
||
|
# this tests reduction detection well since the accumulated variable
|
||
|
# is initialized before the parfor and the value accessed from the array
|
||
|
# is updated before accumulation
|
||
|
self.prange_tester(test_impl, np.random.ranf(4),
|
||
|
scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange15(self):
|
||
|
# from issue 2587
|
||
|
# test parfor type inference when there is multi-dimensional indexing
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
for i in range(N):
|
||
|
x = np.ones((1, 1))
|
||
|
acc += x[0, 0]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 1024, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
# Tests for negative ranges
|
||
|
def test_prange16(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
for i in range(-N, N):
|
||
|
acc += 2
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 1024, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange17(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
X = np.ones(N)
|
||
|
for i in range(-N, N):
|
||
|
acc += X[i]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 9, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange18(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
X = np.ones(N)
|
||
|
for i in range(-N, 5):
|
||
|
acc += X[i]
|
||
|
for j in range(-4, N):
|
||
|
acc += X[j]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 9, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange19(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
M = N + 4
|
||
|
X = np.ones((N, M))
|
||
|
for i in range(-N, N):
|
||
|
for j in range(-M, M):
|
||
|
acc += X[i, j]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 9, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange20(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
X = np.ones(N)
|
||
|
for i in range(-1, N):
|
||
|
acc += X[i]
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 9, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange21(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
for i in range(-3, -1):
|
||
|
acc += 3
|
||
|
return acc
|
||
|
self.prange_tester(test_impl, 9, scheduler_type='signed',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange22(self):
|
||
|
def test_impl():
|
||
|
a = 0
|
||
|
b = 3
|
||
|
A = np.empty(4)
|
||
|
for i in range(-2, 2):
|
||
|
if i == a:
|
||
|
A[i] = b
|
||
|
elif i < 1:
|
||
|
A[i] = -1
|
||
|
else:
|
||
|
A[i] = 7
|
||
|
return A
|
||
|
self.prange_tester(test_impl, scheduler_type='signed',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
|
||
|
def test_prange23(self):
|
||
|
# test non-contig input
|
||
|
def test_impl(A):
|
||
|
for i in range(len(A)):
|
||
|
A[i] = i
|
||
|
return A
|
||
|
A = np.zeros(32)[::2]
|
||
|
self.prange_tester(test_impl, A, scheduler_type='unsigned',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
|
||
|
def test_prange24(self):
|
||
|
# test non-contig input, signed range
|
||
|
def test_impl(A):
|
||
|
for i in range(-len(A), 0):
|
||
|
A[i] = i
|
||
|
return A
|
||
|
A = np.zeros(32)[::2]
|
||
|
self.prange_tester(test_impl, A, scheduler_type='signed',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
|
||
|
def test_prange25(self):
|
||
|
def test_impl(A):
|
||
|
n = len(A)
|
||
|
buf = [np.zeros_like(A) for _ in range(n)]
|
||
|
for i in range(n):
|
||
|
buf[i] = A + i
|
||
|
return buf
|
||
|
A = np.ones((10,))
|
||
|
self.prange_tester(test_impl, A, patch_instance=[1],
|
||
|
scheduler_type='unsigned', check_fastmath=True,
|
||
|
check_fastmath_result=True)
|
||
|
|
||
|
cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),))
|
||
|
diagnostics = cpfunc.metadata['parfor_diagnostics']
|
||
|
hoisted_allocs = diagnostics.hoisted_allocations()
|
||
|
self.assertEqual(len(hoisted_allocs), 0)
|
||
|
|
||
|
def test_prange26(self):
|
||
|
def test_impl(A):
|
||
|
B = A[::3]
|
||
|
for i in range(len(B)):
|
||
|
B[i] = i
|
||
|
return A
|
||
|
A = np.zeros(32)[::2]
|
||
|
self.prange_tester(test_impl, A, scheduler_type='unsigned',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
|
||
|
def test_prange27(self):
|
||
|
# issue5597: usedef error in parfor
|
||
|
def test_impl(a, b, c):
|
||
|
for j in range(b[0]-1):
|
||
|
for k in range(2):
|
||
|
z = np.abs(a[c-1:c+1])
|
||
|
return 0
|
||
|
|
||
|
# patch inner loop to 'prange'
|
||
|
self.prange_tester(test_impl,
|
||
|
np.arange(20),
|
||
|
np.asarray([4,4,4,4,4,4,4,4,4,4]),
|
||
|
0,
|
||
|
patch_instance=[1],
|
||
|
scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
|
||
|
def test_prange28(self):
|
||
|
# issue7105: label conflict in nested parfor
|
||
|
def test_impl(x, y):
|
||
|
out = np.zeros(len(y))
|
||
|
for idx in range(0, len(y)):
|
||
|
i0 = y[idx, 0]
|
||
|
i1 = y[idx, 1]
|
||
|
Pt1 = x[i0]
|
||
|
Pt2 = x[i1]
|
||
|
v = Pt1 - Pt2
|
||
|
vl2 = v[0] + v[1]
|
||
|
out[idx] = vl2
|
||
|
return out
|
||
|
|
||
|
X = np.array([[-1., -1.],
|
||
|
[-1., 1.],
|
||
|
[ 0., 0.],
|
||
|
[ 1., -1.],
|
||
|
[ 1., 0.],
|
||
|
[ 1., 1.]])
|
||
|
|
||
|
Y = np.array([[0, 1],
|
||
|
[1, 2],
|
||
|
[2, 3],
|
||
|
[3, 4],
|
||
|
[4, 5]])
|
||
|
|
||
|
self.prange_tester(test_impl, X, Y, scheduler_type='unsigned',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
|
||
|
def test_prange29(self):
|
||
|
# issue7630: SSA renaming in prange header
|
||
|
def test_impl(flag):
|
||
|
result = 0
|
||
|
if flag:
|
||
|
for i in range(1):
|
||
|
result += 1
|
||
|
else:
|
||
|
for i in range(1):
|
||
|
result -= 3
|
||
|
return result
|
||
|
|
||
|
self.prange_tester(test_impl, True)
|
||
|
self.prange_tester(test_impl, False)
|
||
|
|
||
|
def test_prange30(self):
|
||
|
# issue7675: broadcast setitem
|
||
|
def test_impl(x, par, numthreads):
|
||
|
n_par = par.shape[0]
|
||
|
n_x = len(x)
|
||
|
result = np.zeros((n_par, n_x), dtype=np.float64)
|
||
|
chunklen = (len(x) + numthreads - 1) // numthreads
|
||
|
|
||
|
for i in range(numthreads):
|
||
|
start = i * chunklen
|
||
|
stop = (i + 1) * chunklen
|
||
|
result[:, start:stop] = x[start:stop] * par[:]
|
||
|
|
||
|
return result
|
||
|
|
||
|
x = np.array(np.arange(0, 6, 1.0))
|
||
|
par = np.array([1.0, 2.0, 3.0])
|
||
|
|
||
|
self.prange_tester(test_impl, x, par, 2)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestPrangeSpecific(TestPrangeBase):
|
||
|
""" Tests specific features/problems found under prange"""
|
||
|
|
||
|
def test_prange_two_instances_same_reduction_var(self):
|
||
|
# issue4922 - multiple uses of same reduction variable
|
||
|
def test_impl(n):
|
||
|
c = 0
|
||
|
for i in range(n):
|
||
|
c += 1
|
||
|
if i > 10:
|
||
|
c += 1
|
||
|
return c
|
||
|
self.prange_tester(test_impl, 9)
|
||
|
|
||
|
def test_prange_conflicting_reduction_ops(self):
|
||
|
def test_impl(n):
|
||
|
c = 0
|
||
|
for i in range(n):
|
||
|
c += 1
|
||
|
if i > 10:
|
||
|
c *= 1
|
||
|
return c
|
||
|
|
||
|
with self.assertRaises(errors.UnsupportedError) as raises:
|
||
|
self.prange_tester(test_impl, 9)
|
||
|
msg = ('Reduction variable c has multiple conflicting reduction '
|
||
|
'operators.')
|
||
|
self.assertIn(msg, str(raises.exception))
|
||
|
|
||
|
def test_prange_two_conditional_reductions(self):
|
||
|
# issue6414
|
||
|
def test_impl():
|
||
|
A = B = 0
|
||
|
for k in range(1):
|
||
|
if k == 2:
|
||
|
A += 1
|
||
|
else:
|
||
|
x = np.zeros((1, 1))
|
||
|
if x[0, 0]:
|
||
|
B += 1
|
||
|
return A, B
|
||
|
self.prange_tester(test_impl)
|
||
|
|
||
|
def test_prange_nested_reduction1(self):
|
||
|
def test_impl():
|
||
|
A = 0
|
||
|
for k in range(1):
|
||
|
for i in range(1):
|
||
|
if i == 0:
|
||
|
A += 1
|
||
|
return A
|
||
|
self.prange_tester(test_impl)
|
||
|
|
||
|
@disabled_test
|
||
|
def test_check_error_model(self):
|
||
|
def test_impl():
|
||
|
n = 32
|
||
|
A = np.zeros(n)
|
||
|
for i in range(n):
|
||
|
A[i] = 1 / i # div-by-zero when i = 0
|
||
|
return A
|
||
|
|
||
|
with self.assertRaises(ZeroDivisionError) as raises:
|
||
|
test_impl()
|
||
|
|
||
|
# compile parallel functions
|
||
|
pfunc = self.generate_prange_func(test_impl, None)
|
||
|
pcres = self.compile_parallel(pfunc, ())
|
||
|
pfcres = self.compile_parallel_fastmath(pfunc, ())
|
||
|
|
||
|
# should raise
|
||
|
with self.assertRaises(ZeroDivisionError) as raises:
|
||
|
pcres.entry_point()
|
||
|
|
||
|
# should not raise
|
||
|
result = pfcres.entry_point()
|
||
|
self.assertEqual(result[0], np.inf)
|
||
|
|
||
|
def test_check_alias_analysis(self):
|
||
|
# check alias analysis reports ok
|
||
|
def test_impl(A):
|
||
|
for i in range(len(A)):
|
||
|
B = A[i]
|
||
|
B[:] = 1
|
||
|
return A
|
||
|
A = np.zeros(32).reshape(4, 8)
|
||
|
self.prange_tester(test_impl, A, scheduler_type='unsigned',
|
||
|
check_fastmath=True, check_fastmath_result=True)
|
||
|
pfunc = self.generate_prange_func(test_impl, None)
|
||
|
sig = tuple([numba.typeof(A)])
|
||
|
cres = self.compile_parallel_fastmath(pfunc, sig)
|
||
|
_ir = self._get_gufunc_ir(cres)
|
||
|
for k, v in _ir.items():
|
||
|
for line in v.splitlines():
|
||
|
# get the fn definition line
|
||
|
if 'define' in line and k in line:
|
||
|
# there should only be 2x noalias, one on each of the first
|
||
|
# 2 args (retptr, excinfo).
|
||
|
# Note: used to be 3x no noalias, but env arg is dropped.
|
||
|
self.assertEqual(line.count('noalias'), 2)
|
||
|
break
|
||
|
|
||
|
def test_prange_raises_invalid_step_size(self):
|
||
|
def test_impl(N):
|
||
|
acc = 0
|
||
|
for i in range(0, N, 2):
|
||
|
acc += 2
|
||
|
return acc
|
||
|
|
||
|
with self.assertRaises(errors.UnsupportedRewriteError) as raises:
|
||
|
self.prange_tester(test_impl, 1024)
|
||
|
msg = 'Only constant step size of 1 is supported for prange'
|
||
|
self.assertIn(msg, str(raises.exception))
|
||
|
|
||
|
def test_prange_fastmath_check_works(self):
|
||
|
# this function will benefit from `fastmath`, the div will
|
||
|
# get optimised to a multiply by reciprocal and the accumulator
|
||
|
# then becomes an fmadd: A = A + i * 0.5
|
||
|
def test_impl():
|
||
|
n = 128
|
||
|
A = 0
|
||
|
for i in range(n):
|
||
|
A += i / 2.0
|
||
|
return A
|
||
|
self.prange_tester(test_impl, scheduler_type='unsigned',
|
||
|
check_fastmath=True)
|
||
|
pfunc = self.generate_prange_func(test_impl, None)
|
||
|
cres = self.compile_parallel_fastmath(pfunc, ())
|
||
|
ir = self._get_gufunc_ir(cres)
|
||
|
_id = '%[A-Z_0-9]?(.[0-9]+)+[.]?[i]?'
|
||
|
recipr_str = r'\s+%s = fmul fast double %s, 5.000000e-01'
|
||
|
reciprocal_inst = re.compile(recipr_str % (_id, _id))
|
||
|
fadd_inst = re.compile(r'\s+%s = fadd fast double %s, %s'
|
||
|
% (_id, _id, _id))
|
||
|
# check there is something like:
|
||
|
# %.329 = fmul fast double %.325, 5.000000e-01
|
||
|
# %.337 = fadd fast double %A.07, %.329
|
||
|
found = False
|
||
|
for name, kernel in ir.items():
|
||
|
# make sure to look at the kernel corresponding to the cres/pfunc
|
||
|
if name in cres.library.get_llvm_str():
|
||
|
splitted = kernel.splitlines()
|
||
|
for i, x in enumerate(splitted):
|
||
|
if reciprocal_inst.match(x):
|
||
|
self.assertTrue(fadd_inst.match(splitted[i + 1]))
|
||
|
found = True
|
||
|
break
|
||
|
|
||
|
self.assertTrue(found, "fast instruction pattern was not found.")
|
||
|
|
||
|
def test_parfor_alias1(self):
|
||
|
def test_impl(n):
|
||
|
b = np.zeros((n, n))
|
||
|
a = b[0]
|
||
|
for j in range(n):
|
||
|
a[j] = j + 1
|
||
|
return b.sum()
|
||
|
self.prange_tester(test_impl, 4)
|
||
|
|
||
|
def test_parfor_alias2(self):
|
||
|
def test_impl(n):
|
||
|
b = np.zeros((n, n))
|
||
|
for i in range(n):
|
||
|
a = b[i]
|
||
|
for j in range(n):
|
||
|
a[j] = i + j
|
||
|
return b.sum()
|
||
|
self.prange_tester(test_impl, 4)
|
||
|
|
||
|
def test_parfor_alias3(self):
|
||
|
def test_impl(n):
|
||
|
b = np.zeros((n, n, n))
|
||
|
for i in range(n):
|
||
|
a = b[i]
|
||
|
for j in range(n):
|
||
|
c = a[j]
|
||
|
for k in range(n):
|
||
|
c[k] = i + j + k
|
||
|
return b.sum()
|
||
|
self.prange_tester(test_impl, 4)
|
||
|
|
||
|
def test_parfor_race_1(self):
|
||
|
def test_impl(x, y):
|
||
|
for j in range(y):
|
||
|
k = x
|
||
|
return k
|
||
|
raised_warnings = self.prange_tester(test_impl, 10, 20)
|
||
|
warning_obj = raised_warnings[0]
|
||
|
expected_msg = ("Variable k used in parallel loop may be written to "
|
||
|
"simultaneously by multiple workers and may result "
|
||
|
"in non-deterministic or unintended results.")
|
||
|
self.assertIn(expected_msg, str(warning_obj.message))
|
||
|
|
||
|
def test_nested_parfor_push_call_vars(self):
|
||
|
""" issue 3686: if a prange has something inside it that causes
|
||
|
a nested parfor to be generated and both the inner and outer
|
||
|
parfor use the same call variable defined outside the parfors
|
||
|
then ensure that when that call variable is pushed into the
|
||
|
parfor that the call variable isn't duplicated with the same
|
||
|
name resulting in a redundant type lock.
|
||
|
"""
|
||
|
def test_impl():
|
||
|
B = 0
|
||
|
f = np.negative
|
||
|
for i in range(1):
|
||
|
this_matters = f(1.)
|
||
|
B += f(np.zeros(1,))[0]
|
||
|
for i in range(2):
|
||
|
this_matters = f(1.)
|
||
|
B += f(np.zeros(1,))[0]
|
||
|
|
||
|
return B
|
||
|
self.prange_tester(test_impl)
|
||
|
|
||
|
def test_copy_global_for_parfor(self):
|
||
|
""" issue4903: a global is copied next to a parfor so that
|
||
|
it can be inlined into the parfor and thus not have to be
|
||
|
passed to the parfor (i.e., an unsupported function type).
|
||
|
This global needs to be renamed in the block into which
|
||
|
it is copied.
|
||
|
"""
|
||
|
def test_impl(zz, tc):
|
||
|
lh = np.zeros(len(tc))
|
||
|
lc = np.zeros(len(tc))
|
||
|
for i in range(1):
|
||
|
nt = tc[i]
|
||
|
for t in range(nt):
|
||
|
lh += np.exp(zz[i, t])
|
||
|
for t in range(nt):
|
||
|
lc += np.exp(zz[i, t])
|
||
|
return lh, lc
|
||
|
|
||
|
m = 2
|
||
|
zz = np.ones((m, m, m))
|
||
|
tc = np.ones(m, dtype=np.int_)
|
||
|
self.prange_tester(test_impl, zz, tc, patch_instance=[0])
|
||
|
|
||
|
def test_multiple_call_getattr_object(self):
|
||
|
def test_impl(n):
|
||
|
B = 0
|
||
|
f = np.negative
|
||
|
for i in range(1):
|
||
|
this_matters = f(1.0)
|
||
|
B += f(n)
|
||
|
|
||
|
return B
|
||
|
self.prange_tester(test_impl, 1.0)
|
||
|
|
||
|
def test_argument_alias_recarray_field(self):
|
||
|
# Test for issue4007.
|
||
|
def test_impl(n):
|
||
|
for i in range(len(n)):
|
||
|
n.x[i] = 7.0
|
||
|
return n
|
||
|
X1 = np.zeros(10, dtype=[('x', float), ('y', int), ])
|
||
|
X2 = np.zeros(10, dtype=[('x', float), ('y', int), ])
|
||
|
X3 = np.zeros(10, dtype=[('x', float), ('y', int), ])
|
||
|
v1 = X1.view(np.recarray)
|
||
|
v2 = X2.view(np.recarray)
|
||
|
v3 = X3.view(np.recarray)
|
||
|
|
||
|
# Numpy doesn't seem to support almost equal on recarray.
|
||
|
# So, we convert to list and use assertEqual instead.
|
||
|
python_res = list(test_impl(v1))
|
||
|
njit_res = list(njit(test_impl)(v2))
|
||
|
pa_func = njit(test_impl, parallel=True)
|
||
|
pa_res = list(pa_func(v3))
|
||
|
self.assertEqual(python_res, njit_res)
|
||
|
self.assertEqual(python_res, pa_res)
|
||
|
|
||
|
def test_mutable_list_param(self):
|
||
|
""" issue3699: test that mutable variable to call in loop
|
||
|
is not hoisted. The call in test_impl forces a manual
|
||
|
check here rather than using prange_tester.
|
||
|
"""
|
||
|
@njit
|
||
|
def list_check(X):
|
||
|
""" If the variable X is hoisted in the test_impl prange
|
||
|
then subsequent list_check calls would return increasing
|
||
|
values.
|
||
|
"""
|
||
|
ret = X[-1]
|
||
|
a = X[-1] + 1
|
||
|
X.append(a)
|
||
|
return ret
|
||
|
def test_impl(n):
|
||
|
for i in prange(n):
|
||
|
X = [100]
|
||
|
a = list_check(X)
|
||
|
return a
|
||
|
python_res = test_impl(10)
|
||
|
njit_res = njit(test_impl)(10)
|
||
|
pa_func = njit(test_impl, parallel=True)
|
||
|
pa_res = pa_func(10)
|
||
|
self.assertEqual(python_res, njit_res)
|
||
|
self.assertEqual(python_res, pa_res)
|
||
|
|
||
|
def test_list_comprehension_prange(self):
|
||
|
# issue4569
|
||
|
def test_impl(x):
|
||
|
return np.array([len(x[i]) for i in range(len(x))])
|
||
|
x = [np.array([1,2,3], dtype=int),np.array([1,2], dtype=int)]
|
||
|
self.prange_tester(test_impl, x)
|
||
|
|
||
|
def test_ssa_false_reduction(self):
|
||
|
# issue5698
|
||
|
# SSA for h creates assignments to h that make it look like a
|
||
|
# reduction variable except that it lacks an associated
|
||
|
# reduction operator. Test here that h is excluded as a
|
||
|
# reduction variable.
|
||
|
def test_impl(image, a, b):
|
||
|
empty = np.zeros(image.shape)
|
||
|
for i in range(image.shape[0]):
|
||
|
r = image[i][0] / 255.0
|
||
|
if a == 0:
|
||
|
h = 0
|
||
|
if b == 0:
|
||
|
h = 0
|
||
|
empty[i] = [h, h, h]
|
||
|
return empty
|
||
|
|
||
|
image = np.zeros((3, 3), dtype=np.int32)
|
||
|
self.prange_tester(test_impl, image, 0, 0)
|
||
|
|
||
|
def test_list_setitem_hoisting(self):
|
||
|
# issue5979
|
||
|
# Don't hoist list initialization if list item set.
|
||
|
def test_impl():
|
||
|
n = 5
|
||
|
a = np.empty(n, dtype=np.int64)
|
||
|
for k in range(5):
|
||
|
X = [0]
|
||
|
X[0] = 1
|
||
|
a[k] = X[0]
|
||
|
return a
|
||
|
|
||
|
self.prange_tester(test_impl)
|
||
|
|
||
|
def test_record_array_setitem(self):
|
||
|
# issue6704
|
||
|
state_dtype = np.dtype([('var', np.int32)])
|
||
|
|
||
|
def test_impl(states):
|
||
|
for i in range(1):
|
||
|
states[i]['var'] = 1
|
||
|
|
||
|
def comparer(a, b):
|
||
|
assert(a[0]['var'] == b[0]['var'])
|
||
|
|
||
|
self.prange_tester(test_impl,
|
||
|
np.zeros(shape=1, dtype=state_dtype),
|
||
|
check_arg_equality=[comparer])
|
||
|
|
||
|
def test_record_array_setitem_yield_array(self):
|
||
|
state_dtype = np.dtype([('x', np.intp)])
|
||
|
|
||
|
def test_impl(states):
|
||
|
n = states.size
|
||
|
for i in range(states.size):
|
||
|
states["x"][i] = 7 + i
|
||
|
return states
|
||
|
|
||
|
states = np.zeros(10, dtype=state_dtype)
|
||
|
|
||
|
def comparer(a, b):
|
||
|
np.testing.assert_equal(a, b)
|
||
|
|
||
|
self.prange_tester(test_impl,
|
||
|
states,
|
||
|
check_arg_equality=[comparer])
|
||
|
|
||
|
def test_issue7501(self):
|
||
|
def test_impl(size, case):
|
||
|
result = np.zeros((size,))
|
||
|
if case == 1:
|
||
|
for i in range(size):
|
||
|
result[i] += 1
|
||
|
else:
|
||
|
for i in range(size):
|
||
|
result[i] += 2
|
||
|
return result[0]
|
||
|
|
||
|
self.prange_tester(test_impl, 3, 1)
|
||
|
|
||
|
def test_kde_example(self):
|
||
|
def test_impl(X):
|
||
|
# KDE example
|
||
|
b = 0.5
|
||
|
points = np.array([-1.0, 2.0, 5.0])
|
||
|
N = points.shape[0]
|
||
|
n = X.shape[0]
|
||
|
exps = 0
|
||
|
for i in range(n):
|
||
|
p = X[i]
|
||
|
d = (-(p - points)**2) / (2 * b**2)
|
||
|
m = np.min(d)
|
||
|
exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m)))
|
||
|
return exps
|
||
|
|
||
|
n = 128
|
||
|
X = np.random.ranf(n)
|
||
|
self.prange_tester(test_impl, X)
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
def test_issue_due_to_max_label(self):
|
||
|
# Run the actual test in a new process since it can only reproduce in
|
||
|
# a fresh state.
|
||
|
out = subp.check_output(
|
||
|
[sys.executable, '-m', 'numba.tests.parfors_max_label_error'],
|
||
|
timeout=30,
|
||
|
stderr=subp.STDOUT, # redirect stderr to stdout
|
||
|
)
|
||
|
self.assertIn("TEST PASSED", out.decode())
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
def test_issue7578(self):
|
||
|
def test_impl(x):
|
||
|
A = np.zeros_like(x)
|
||
|
tmp = np.cos(x) # this can be any 1-arity ufunc
|
||
|
|
||
|
for i in range(len(x)):
|
||
|
A[i] = tmp.sum()
|
||
|
|
||
|
return A
|
||
|
|
||
|
x = np.arange(10.)
|
||
|
self.prange_tester(test_impl, x)
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforChunksizing(TestCase):
|
||
|
"""
|
||
|
Tests chunksize handling in ParallelAccelerator.
|
||
|
"""
|
||
|
_numba_parallel_test_ = False
|
||
|
|
||
|
def setUp(self):
|
||
|
set_parallel_chunksize(0)
|
||
|
|
||
|
def tearDown(self):
|
||
|
set_parallel_chunksize(0)
|
||
|
|
||
|
def test_python_parallel_chunksize_basic(self):
|
||
|
# Test basic chunksize operations outside njit.
|
||
|
self.assertEqual(get_parallel_chunksize(), 0)
|
||
|
set_parallel_chunksize(8)
|
||
|
self.assertEqual(get_parallel_chunksize(), 8)
|
||
|
set_parallel_chunksize(0)
|
||
|
self.assertEqual(get_parallel_chunksize(), 0)
|
||
|
|
||
|
def test_python_with_chunksize(self):
|
||
|
# Test "with parallel_chunksize" outside njit.
|
||
|
self.assertEqual(get_parallel_chunksize(), 0)
|
||
|
with parallel_chunksize(8):
|
||
|
self.assertEqual(get_parallel_chunksize(), 8)
|
||
|
self.assertEqual(get_parallel_chunksize(), 0)
|
||
|
|
||
|
def test_njit_parallel_chunksize_basic(self):
|
||
|
# Test basic chunksize operations inside njit.
|
||
|
@njit
|
||
|
def get_cs():
|
||
|
return get_parallel_chunksize()
|
||
|
|
||
|
@njit
|
||
|
def set_cs(x):
|
||
|
return set_parallel_chunksize(x)
|
||
|
|
||
|
self.assertEqual(get_cs(), 0)
|
||
|
set_cs(8)
|
||
|
self.assertEqual(get_cs(), 8)
|
||
|
set_cs(0)
|
||
|
self.assertEqual(get_cs(), 0)
|
||
|
|
||
|
def test_njit_with_chunksize(self):
|
||
|
# Test "with parallel_chunksize" inside njit.
|
||
|
@njit
|
||
|
def test_impl(x):
|
||
|
cs1 = get_parallel_chunksize()
|
||
|
with parallel_chunksize(8):
|
||
|
cs2 = get_parallel_chunksize()
|
||
|
cs3 = get_parallel_chunksize()
|
||
|
return cs1, cs2, cs3
|
||
|
|
||
|
cs1, cs2, cs3 = test_impl(8)
|
||
|
|
||
|
self.assertEqual(cs1, 0)
|
||
|
self.assertEqual(cs2, 8)
|
||
|
self.assertEqual(cs3, 0)
|
||
|
|
||
|
def test_all_iterations_reset_chunksize(self):
|
||
|
""" Test that all the iterations get run if you set the
|
||
|
chunksize. Also check that the chunksize that each
|
||
|
worker thread sees has been reset to 0. """
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def test_impl(cs, n):
|
||
|
res = np.zeros(n)
|
||
|
inner_cs = np.full(n, -13)
|
||
|
with numba.parallel_chunksize(cs):
|
||
|
for i in numba.prange(n):
|
||
|
inner_cs[i] = numba.get_parallel_chunksize()
|
||
|
res[i] = 13
|
||
|
return res, inner_cs
|
||
|
|
||
|
# Test a variety of array and chunk sizes.
|
||
|
# 1000 is a round number, 997 is prime, 943 is product of two
|
||
|
# primes, 961 is square of a prime.
|
||
|
for j in [1000, 997, 943, 961]:
|
||
|
for i in range(15):
|
||
|
res, inner_cs = test_impl(i+1, j)
|
||
|
self.assertTrue(np.all(res == 13))
|
||
|
self.assertTrue(np.all(inner_cs == 0))
|
||
|
|
||
|
def test_njit_parallel_chunksize_negative(self):
|
||
|
# Test negative set_parallel_chunksize inside njit.
|
||
|
with self.assertRaises(ValueError) as raised:
|
||
|
@njit
|
||
|
def neg_test():
|
||
|
set_parallel_chunksize(-1)
|
||
|
|
||
|
neg_test()
|
||
|
|
||
|
msg = "chunksize must be greater than or equal to zero"
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
def test_python_parallel_chunksize_negative(self):
|
||
|
# Test negative set_parallel_chunksize outside njit.
|
||
|
with self.assertRaises(ValueError) as raised:
|
||
|
set_parallel_chunksize(-1)
|
||
|
|
||
|
msg = "chunksize must be greater than or equal to zero"
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
def test_njit_parallel_chunksize_invalid_type(self):
|
||
|
with self.assertRaises(errors.TypingError) as raised:
|
||
|
@njit
|
||
|
def impl():
|
||
|
set_parallel_chunksize('invalid_type')
|
||
|
|
||
|
impl()
|
||
|
|
||
|
msg = "The parallel chunksize must be an integer"
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
def test_python_parallel_chunksize_invalid_type(self):
|
||
|
with self.assertRaises(TypeError) as raised:
|
||
|
set_parallel_chunksize('invalid_type')
|
||
|
|
||
|
msg = "The parallel chunksize must be an integer"
|
||
|
self.assertIn(msg, str(raised.exception))
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
@x86_only
|
||
|
class TestParforsVectorizer(TestPrangeBase):
|
||
|
|
||
|
# env mutating test
|
||
|
_numba_parallel_test_ = False
|
||
|
|
||
|
def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):
|
||
|
|
||
|
fastmath = kwargs.pop('fastmath', False)
|
||
|
cpu_name = kwargs.pop('cpu_name', 'skylake-avx512')
|
||
|
assertions = kwargs.pop('assertions', True)
|
||
|
# force LLVM to use zmm registers for vectorization
|
||
|
# https://reviews.llvm.org/D67259
|
||
|
cpu_features = kwargs.pop('cpu_features', '-prefer-256-bit')
|
||
|
|
||
|
env_opts = {'NUMBA_CPU_NAME': cpu_name,
|
||
|
'NUMBA_CPU_FEATURES': cpu_features,
|
||
|
}
|
||
|
|
||
|
overrides = []
|
||
|
for k, v in env_opts.items():
|
||
|
overrides.append(override_env_config(k, v))
|
||
|
|
||
|
with overrides[0], overrides[1]:
|
||
|
sig = tuple([numba.typeof(x) for x in args])
|
||
|
pfunc_vectorizable = self.generate_prange_func(func, None)
|
||
|
if fastmath == True:
|
||
|
cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig)
|
||
|
else:
|
||
|
cres = self.compile_parallel(pfunc_vectorizable, sig)
|
||
|
|
||
|
# get the gufunc asm
|
||
|
asm = self._get_gufunc_asm(cres)
|
||
|
|
||
|
if assertions:
|
||
|
schedty = re.compile(r'call\s+\w+\*\s+@do_scheduling_(\w+)\(')
|
||
|
matches = schedty.findall(cres.library.get_llvm_str())
|
||
|
self.assertGreaterEqual(len(matches), 1) # at least 1 parfor call
|
||
|
self.assertEqual(matches[0], schedule_type)
|
||
|
self.assertNotEqual(asm, {})
|
||
|
|
||
|
return asm
|
||
|
|
||
|
@linux_only
|
||
|
@TestCase.run_test_in_subprocess
|
||
|
def test_vectorizer_fastmath_asm(self):
|
||
|
""" This checks that if fastmath is set and the underlying hardware
|
||
|
is suitable, and the function supplied is amenable to fastmath based
|
||
|
vectorization, that the vectorizer actually runs.
|
||
|
"""
|
||
|
|
||
|
# This function will benefit from `fastmath` if run on a suitable
|
||
|
# target. The vectorizer should unwind the loop and generate
|
||
|
# packed dtype=double add and sqrt instructions.
|
||
|
def will_vectorize(A):
|
||
|
n = len(A)
|
||
|
acc = 0
|
||
|
for i in range(n):
|
||
|
acc += np.sqrt(i)
|
||
|
return acc
|
||
|
|
||
|
arg = np.zeros(10)
|
||
|
|
||
|
fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
|
||
|
fastmath=True)
|
||
|
slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
|
||
|
fastmath=False)
|
||
|
for v in fast_asm.values():
|
||
|
# should unwind and call vector sqrt then vector add
|
||
|
# all on packed doubles using zmm's
|
||
|
self.assertTrue('vaddpd' in v)
|
||
|
self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
|
||
|
self.assertTrue('zmm' in v)
|
||
|
|
||
|
for v in slow_asm.values():
|
||
|
# vector variants should not be present
|
||
|
self.assertTrue('vaddpd' not in v)
|
||
|
self.assertTrue('vsqrtpd' not in v)
|
||
|
# check scalar variant is present
|
||
|
self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v)
|
||
|
self.assertTrue('vaddsd' in v)
|
||
|
# check no zmm addressing is present
|
||
|
self.assertTrue('zmm' not in v)
|
||
|
|
||
|
@linux_only
|
||
|
@TestCase.run_test_in_subprocess(envvars={'NUMBA_BOUNDSCHECK': '0'})
|
||
|
def test_unsigned_refusal_to_vectorize(self):
|
||
|
""" This checks that if fastmath is set and the underlying hardware
|
||
|
is suitable, and the function supplied is amenable to fastmath based
|
||
|
vectorization, that the vectorizer actually runs.
|
||
|
"""
|
||
|
|
||
|
def will_not_vectorize(A):
|
||
|
n = len(A)
|
||
|
for i in range(-n, 0):
|
||
|
A[i] = np.sqrt(A[i])
|
||
|
return A
|
||
|
|
||
|
def will_vectorize(A):
|
||
|
n = len(A)
|
||
|
for i in range(n):
|
||
|
A[i] = np.sqrt(A[i])
|
||
|
return A
|
||
|
|
||
|
arg = np.zeros(10)
|
||
|
|
||
|
# Boundschecking breaks vectorization
|
||
|
self.assertFalse(config.BOUNDSCHECK)
|
||
|
novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg,
|
||
|
fastmath=True)
|
||
|
|
||
|
vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
|
||
|
fastmath=True)
|
||
|
|
||
|
for v in novec_asm.values():
|
||
|
# vector variant should not be present
|
||
|
self.assertTrue('vsqrtpd' not in v)
|
||
|
# check scalar variant is present
|
||
|
self.assertTrue('vsqrtsd' in v)
|
||
|
# check no zmm addressing is present
|
||
|
self.assertTrue('zmm' not in v)
|
||
|
|
||
|
for v in vec_asm.values():
|
||
|
# should unwind and call vector sqrt then vector mov
|
||
|
# all on packed doubles using zmm's
|
||
|
self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
|
||
|
self.assertTrue('vmovupd' in v)
|
||
|
self.assertTrue('zmm' in v)
|
||
|
|
||
|
@linux_only
|
||
|
# needed as 32bit doesn't have equivalent signed/unsigned instruction
|
||
|
# generation for this function
|
||
|
@TestCase.run_test_in_subprocess(envvars={'NUMBA_BOUNDSCHECK': '0'})
|
||
|
def test_signed_vs_unsigned_vec_asm(self):
|
||
|
""" This checks vectorization for signed vs unsigned variants of a
|
||
|
trivial accumulator, the only meaningful difference should be the
|
||
|
presence of signed vs. unsigned unpack instructions (for the
|
||
|
induction var).
|
||
|
"""
|
||
|
def signed_variant():
|
||
|
n = 4096
|
||
|
A = 0.
|
||
|
for i in range(-n, 0):
|
||
|
A += i
|
||
|
return A
|
||
|
|
||
|
def unsigned_variant():
|
||
|
n = 4096
|
||
|
A = 0.
|
||
|
for i in range(n):
|
||
|
A += i
|
||
|
return A
|
||
|
|
||
|
# Boundschecking breaks the diff check below because of the pickled exception
|
||
|
self.assertFalse(config.BOUNDSCHECK)
|
||
|
signed_asm = self.get_gufunc_asm(signed_variant, 'signed',
|
||
|
fastmath=True)
|
||
|
unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned',
|
||
|
fastmath=True)
|
||
|
|
||
|
def strip_instrs(asm):
|
||
|
acc = []
|
||
|
for x in asm.splitlines():
|
||
|
spd = x.strip()
|
||
|
# filter out anything that isn't a trivial instruction
|
||
|
# and anything with the gufunc id as it contains an address
|
||
|
if spd != '' and not (spd.startswith('.')
|
||
|
or spd.startswith('_')
|
||
|
or spd.startswith('"')
|
||
|
or '__numba_parfor_gufunc' in spd):
|
||
|
acc.append(re.sub('[\t]', '', spd))
|
||
|
return acc
|
||
|
|
||
|
for k, v in signed_asm.items():
|
||
|
signed_instr = strip_instrs(v)
|
||
|
break
|
||
|
|
||
|
for k, v in unsigned_asm.items():
|
||
|
unsigned_instr = strip_instrs(v)
|
||
|
break
|
||
|
|
||
|
from difflib import SequenceMatcher as sm
|
||
|
# make sure that the only difference in instruction (if there is a
|
||
|
# difference) is the char 'u'. For example:
|
||
|
# vcvtsi2sdq vs. vcvtusi2sdq
|
||
|
self.assertEqual(len(signed_instr), len(unsigned_instr))
|
||
|
for a, b in zip(signed_instr, unsigned_instr):
|
||
|
if a == b:
|
||
|
continue
|
||
|
else:
|
||
|
s = sm(lambda x: x == '\t', a, b)
|
||
|
ops = s.get_opcodes()
|
||
|
for op in ops:
|
||
|
if op[0] == 'insert':
|
||
|
self.assertEqual(b[op[-2]:op[-1]], 'u')
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestParforReductionSetNumThreads(TestCase):
|
||
|
"""Test execution correctness on reductions with set_num_threads.
|
||
|
"""
|
||
|
def test_add(self):
|
||
|
N = config.NUMBA_NUM_THREADS
|
||
|
M = 2 * N
|
||
|
mask = N - 1
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def udt(nthreads):
|
||
|
acc = 0
|
||
|
set_num_threads(nthreads)
|
||
|
for i in prange(M):
|
||
|
local_mask = 1 + i % mask
|
||
|
set_num_threads(local_mask)
|
||
|
gnt = get_num_threads()
|
||
|
acc += gnt
|
||
|
return acc
|
||
|
|
||
|
expect = udt.py_func(mask)
|
||
|
got = udt(mask)
|
||
|
self.assertPreciseEqual(expect, got)
|
||
|
|
||
|
def test_mul(self):
|
||
|
# This min will prevent larger thread counts from generating
|
||
|
# overflow in the loop below.
|
||
|
N = min(4, config.NUMBA_NUM_THREADS)
|
||
|
M = 2 * N
|
||
|
mask = N - 1
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def udt(nthreads):
|
||
|
acc = 1
|
||
|
set_num_threads(nthreads)
|
||
|
for i in prange(M):
|
||
|
local_mask = 1 + i % mask
|
||
|
set_num_threads(local_mask)
|
||
|
gnt = get_num_threads()
|
||
|
acc *= gnt
|
||
|
return acc
|
||
|
|
||
|
expect = udt.py_func(mask)
|
||
|
got = udt(mask)
|
||
|
self.assertPreciseEqual(expect, got)
|
||
|
|
||
|
def test_max(self):
|
||
|
N = config.NUMBA_NUM_THREADS
|
||
|
M = 2 * N
|
||
|
mask = N - 1
|
||
|
|
||
|
@njit(parallel=True)
|
||
|
def udt(nthreads):
|
||
|
acc = 1
|
||
|
set_num_threads(nthreads)
|
||
|
for i in prange(M):
|
||
|
local_mask = 1 + i % mask
|
||
|
set_num_threads(local_mask)
|
||
|
gnt = get_num_threads()
|
||
|
acc = max(acc, gnt)
|
||
|
return acc
|
||
|
|
||
|
expect = udt.py_func(mask)
|
||
|
got = udt(mask)
|
||
|
self.assertPreciseEqual(expect, got)
|
||
|
|
||
|
|
||
|
@skip_parfors_unsupported
|
||
|
class TestDiagnosticEnvVar(TestCase):
|
||
|
@TestCase.run_test_in_subprocess()
|
||
|
def test_diagnostics_env_var1(self):
|
||
|
os.environ['NUMBA_PARALLEL_DIAGNOSTICS']='4'
|
||
|
with captured_stdout() as stdout:
|
||
|
@njit(parallel=True)
|
||
|
def impl():
|
||
|
n = 100
|
||
|
b = np.zeros((n), dtype=np.float64)
|
||
|
for i in prange(n):
|
||
|
b[i] = 1
|
||
|
return b
|
||
|
|
||
|
impl()
|
||
|
the_output = stdout.getvalue()
|
||
|
self.assertIn("Parallel Accelerator Optimizing", the_output)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
unittest.main()
|