# # Copyright (c) 2017 Intel Corporation # SPDX-License-Identifier: BSD-2-Clause # import math import os import re import dis import numbers import platform import sys import subprocess import types as pytypes import warnings from functools import reduce import numpy as np from numpy.random import randn import operator from collections import defaultdict, namedtuple import copy from itertools import cycle, chain import subprocess as subp import numba.parfors.parfor from numba import (njit, prange, parallel_chunksize, get_parallel_chunksize, set_parallel_chunksize, set_num_threads, get_num_threads, typeof) from numba.core import (types, errors, ir, rewrites, typed_passes, inline_closurecall, config, compiler, cpu) from numba.extending import (overload_method, register_model, typeof_impl, unbox, NativeValue, models) from numba.core.registry import cpu_target from numba.core.annotations import type_annotations from numba.core.ir_utils import (find_callname, guard, build_definitions, get_definition, is_getitem, is_setitem, index_var_of_get_setitem) from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty from numba.core.compiler import (CompilerBase, DefaultPassBuilder) from numba.core.compiler_machinery import register_pass, AnalysisPass from numba.core.typed_passes import IRLegalization from numba.tests.support import (TestCase, captured_stdout, MemoryLeakMixin, override_env_config, linux_only, tag, skip_parfors_unsupported, _32bit, needs_blas, needs_lapack, disabled_test, skip_unless_scipy, needs_subprocess, skip_ppc64le_invalid_ctr_loop) from numba.core.extending import register_jitable from numba.core.bytecode import _fix_LOAD_GLOBAL_arg from numba.core import utils import cmath import unittest # NOTE: Each parfors test class is run in separate subprocess, this is to reduce # memory pressure in CI settings. The environment variable "SUBPROC_TEST" is # used to determine whether a test is skipped or not, such that if you want to # run any parfors test directly this environment variable can be set. The # subprocesses running the test classes set this environment variable as the new # process starts which enables the tests within the process. The decorator # @needs_subprocess is used to ensure the appropriate test skips are made. @skip_parfors_unsupported class TestParforsRunner(TestCase): _numba_parallel_test_ = False # Each test class can run for 30 minutes before time out. Extend this to an # hour on aarch64 (some public CI systems were timing out). _TIMEOUT = 1800 if platform.machine() != 'aarch64' else 3600 """This is the test runner for all the parfors tests, it runs them in subprocesses as described above. The convention for the test method naming is: `test_` where is the name of the test class in this module. """ def runner(self): themod = self.__module__ test_clazz_name = self.id().split('.')[-1].split('_')[-1] # don't specify a given test, it's an entire class that needs running self.subprocess_test_runner(test_module=themod, test_class=test_clazz_name, timeout=self._TIMEOUT) def test_TestParforBasic(self): self.runner() def test_TestParforNumericalMisc(self): self.runner() def test_TestParforNumPy(self): self.runner() def test_TestParfors(self): self.runner() def test_TestParforsBitMask(self): self.runner() def test_TestParforsDiagnostics(self): self.runner() def test_TestParforsLeaks(self): self.runner() def test_TestParforsMisc(self): self.runner() def test_TestParforsOptions(self): self.runner() def test_TestParforsSlice(self): self.runner() def test_TestParforsVectorizer(self): self.runner() def test_TestPrangeBasic(self): self.runner() def test_TestPrangeSpecific(self): self.runner() x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') _GLOBAL_INT_FOR_TESTING1 = 17 _GLOBAL_INT_FOR_TESTING2 = 5 TestNamedTuple = namedtuple('TestNamedTuple', ('part0', 'part1')) def null_comparer(a, b): """ Used with check_arq_equality to indicate that we do not care whether the value of the parameter at the end of the function has a particular value. """ pass @needs_subprocess class TestParforsBase(TestCase): """ Base class for testing parfors. Provides functions for compilation and three way comparison between python functions, njit'd functions and parfor njit'd functions. """ _numba_parallel_test_ = False def _compile_this(self, func, sig, **flags): # This method originally used `compile_isolated` which returns a # "CompileResult", hence this does the same. return njit(sig, **flags)(func).overloads[sig] def compile_parallel(self, func, sig): return self._compile_this(func, sig, parallel=True) def compile_parallel_fastmath(self, func, sig): return self._compile_this(func, sig, parallel=True, fastmath=True) def compile_njit(self, func, sig): return self._compile_this(func, sig) def compile_all(self, pyfunc, *args, **kwargs): sig = tuple([numba.typeof(x) for x in args]) # compile the prange injected function cpfunc = self.compile_parallel(pyfunc, sig) # compile a standard njit of the original function cfunc = self.compile_njit(pyfunc, sig) return cfunc, cpfunc def check_parfors_vs_others(self, pyfunc, cfunc, cpfunc, *args, **kwargs): """ Checks python, njit and parfor impls produce the same result. Arguments: pyfunc - the python function to test cfunc - CompilerResult from njit of pyfunc cpfunc - CompilerResult from njit(parallel=True) of pyfunc args - arguments for the function being tested Keyword Arguments: scheduler_type - 'signed', 'unsigned' or None, default is None. Supply in cases where the presence of a specific scheduler is to be asserted. fastmath_pcres - a fastmath parallel compile result, if supplied will be run to make sure the result is correct check_arg_equality - some functions need to check that a parameter is modified rather than a certain value returned. If this keyword argument is supplied, it should be a list of comparison functions such that the i'th function in the list is used to compare the i'th parameter of the njit and parallel=True functions against the i'th parameter of the standard Python function, asserting if they differ. The length of this list must be equal to the number of parameters to the function. The null comparator is available for use when you do not desire to test if some particular parameter is changed. Remaining kwargs are passed to np.testing.assert_almost_equal """ scheduler_type = kwargs.pop('scheduler_type', None) check_fastmath = kwargs.pop('check_fastmath', None) fastmath_pcres = kwargs.pop('fastmath_pcres', None) check_scheduling = kwargs.pop('check_scheduling', True) check_args_for_equality = kwargs.pop('check_arg_equality', None) def copy_args(*args): if not args: return tuple() new_args = [] for x in args: if isinstance(x, np.ndarray): new_args.append(x.copy('k')) elif isinstance(x, np.number): new_args.append(x.copy()) elif isinstance(x, numbers.Number): new_args.append(x) elif x is None: new_args.append(x) elif isinstance(x, tuple): new_args.append(copy.deepcopy(x)) elif isinstance(x, list): new_args.append(x[:]) else: raise ValueError('Unsupported argument type encountered') return tuple(new_args) # python result py_args = copy_args(*args) py_expected = pyfunc(*py_args) # njit result njit_args = copy_args(*args) njit_output = cfunc.entry_point(*njit_args) # parfor result parfor_args = copy_args(*args) parfor_output = cpfunc.entry_point(*parfor_args) if check_args_for_equality is None: np.testing.assert_almost_equal(njit_output, py_expected, **kwargs) np.testing.assert_almost_equal(parfor_output, py_expected, **kwargs) self.assertEqual(type(njit_output), type(parfor_output)) else: assert(len(py_args) == len(check_args_for_equality)) for pyarg, njitarg, parforarg, argcomp in zip( py_args, njit_args, parfor_args, check_args_for_equality): argcomp(njitarg, pyarg, **kwargs) argcomp(parforarg, pyarg, **kwargs) if check_scheduling: self.check_scheduling(cpfunc, scheduler_type) # if requested check fastmath variant if fastmath_pcres is not None: parfor_fastmath_output = fastmath_pcres.entry_point(*copy_args(*args)) np.testing.assert_almost_equal(parfor_fastmath_output, py_expected, **kwargs) def check(self, pyfunc, *args, **kwargs): """Checks that pyfunc compiles for *args under parallel=True and njit and asserts that all version execute and produce the same result""" cfunc, cpfunc = self.compile_all(pyfunc, *args) self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) def check_variants(self, impl, arg_gen, **kwargs): """Run self.check(impl, ...) on array data generated from arg_gen. """ for args in arg_gen(): with self.subTest(list(map(typeof, args))): self.check(impl, *args, **kwargs) def count_parfors_variants(self, impl, arg_gen, **kwargs): """Run self.countParfors(impl, ...) on array types generated from arg_gen. """ for args in arg_gen(): with self.subTest(list(map(typeof, args))): argtys = tuple(map(typeof, args)) # At least one parfors self.assertGreaterEqual(countParfors(impl, argtys), 1) def check_scheduling(self, cres, scheduler_type): # make sure parfor set up scheduling scheduler_str = '@do_scheduling' if scheduler_type is not None: if scheduler_type in ['signed', 'unsigned']: scheduler_str += '_' + scheduler_type else: msg = "Unknown scheduler_type specified: %s" raise ValueError(msg % scheduler_type) self.assertIn(scheduler_str, cres.library.get_llvm_str()) def gen_linspace(self, n, ct): """Make *ct* sample 1D arrays of length *n* using np.linspace(). """ def gen(): yield np.linspace(0, 1, n) yield np.linspace(2, 1, n) yield np.linspace(1, 2, n) src = cycle(gen()) return [next(src) for i in range(ct)] def gen_linspace_variants(self, ct): """Make 1D, 2D, 3D variants of the data in C and F orders """ # 1D yield self.gen_linspace(10, ct=ct) # 2D arr2ds = [x.reshape((2, 3)) for x in self.gen_linspace(n=2 * 3, ct=ct)] yield arr2ds # Fortran order yield [np.asfortranarray(x) for x in arr2ds] # 3D arr3ds = [x.reshape((2, 3, 4)) for x in self.gen_linspace(n=2 * 3 * 4, ct=ct)] yield arr3ds # Fortran order yield [np.asfortranarray(x) for x in arr3ds] def _filter_mod(self, mod, magicstr, checkstr=None): """ helper function to filter out modules by name""" filt = [x for x in mod if magicstr in x.name] if checkstr is not None: for x in filt: assert checkstr in str(x) return filt def _get_gufunc_modules(self, cres, magicstr, checkstr=None): """ gets the gufunc LLVM Modules""" _modules = [x for x in cres.library._codegen._engine._ee._modules] # make sure to only use modules that are actually used by cres and # aren't just in the EE by virtue of shared compilation context. potential_matches = self._filter_mod(_modules, magicstr, checkstr=checkstr) lib_asm = cres.library.get_asm_str() ret = [] for mod in potential_matches: if mod.name in lib_asm: ret.append(mod) return ret def _get_gufunc_info(self, cres, fn): """ helper for gufunc IR/asm generation""" # get the gufunc modules magicstr = '__numba_parfor_gufunc' gufunc_mods = self._get_gufunc_modules(cres, magicstr) x = dict() for mod in gufunc_mods: x[mod.name] = fn(mod) return x def _get_gufunc_ir(self, cres): """ Returns the IR of the gufuncs used as parfor kernels as a dict mapping the gufunc name to its IR. Arguments: cres - a CompileResult from `njit(parallel=True, ...)` """ return self._get_gufunc_info(cres, str) def _get_gufunc_asm(self, cres): """ Returns the assembly of the gufuncs used as parfor kernels as a dict mapping the gufunc name to its assembly. Arguments: cres - a CompileResult from `njit(parallel=True, ...)` """ tm = cres.library._codegen._tm def emit_asm(mod): return str(tm.emit_assembly(mod)) return self._get_gufunc_info(cres, emit_asm) def assert_fastmath(self, pyfunc, sig): """ Asserts that the fastmath flag has some effect in that suitable instructions are now labelled as `fast`. Whether LLVM can actually do anything to optimise better now the derestrictions are supplied is another matter! Arguments: pyfunc - a function that contains operations with parallel semantics sig - the type signature of pyfunc """ cres = self.compile_parallel_fastmath(pyfunc, sig) _ir = self._get_gufunc_ir(cres) def _get_fast_instructions(ir): splitted = ir.splitlines() fast_inst = [] for x in splitted: m = re.search(r'\bfast\b', x) # \b for wholeword if m is not None: fast_inst.append(x) return fast_inst def _assert_fast(instrs): ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call') for inst in instrs: count = 0 for op in ops: match = op + ' fast' if match in inst: count += 1 self.assertTrue(count > 0) for name, guir in _ir.items(): inst = _get_fast_instructions(guir) _assert_fast(inst) def blackscholes_impl(sptprice, strike, rate, volatility, timev): # blackscholes example logterm = np.log(sptprice / strike) powterm = 0.5 * volatility * volatility den = volatility * np.sqrt(timev) d1 = (((rate + powterm) * timev) + logterm) / den d2 = d1 - den NofXd1 = 0.5 + 0.5 * 2.0 * d1 NofXd2 = 0.5 + 0.5 * 2.0 * d2 futureValue = strike * np.exp(- rate * timev) c1 = futureValue * NofXd2 call = sptprice * NofXd1 - c1 put = call - futureValue + sptprice return put def lr_impl(Y, X, w, iterations): # logistic regression example for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w def example_kmeans_test(A, numCenter, numIter, init_centroids): centroids = init_centroids N, D = A.shape for l in range(numIter): dist = np.array([[math.sqrt(np.sum((A[i,:]-centroids[j,:])**2)) for j in range(numCenter)] for i in range(N)]) labels = np.array([dist[i,:].argmin() for i in range(N)]) centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) for j in range(D)] for i in range(numCenter)]) return centroids def get_optimized_numba_ir(test_func, args, **kws): typingctx = cpu_target.typing_context targetctx = cpu_target.target_context test_ir = compiler.run_frontend(test_func) if kws: options = cpu.ParallelOptions(kws) else: options = cpu.ParallelOptions(True) tp = TestPipeline(typingctx, targetctx, args, test_ir) typingctx.refresh() targetctx.refresh() inline_pass = inline_closurecall.InlineClosureCallPass(tp.state.func_ir, options, typed=True) inline_pass.run() rewrites.rewrite_registry.apply('before-inference', tp.state) tp.state.typemap, tp.state.return_type, tp.state.calltypes, _ = \ typed_passes.type_inference_stage(tp.state.typingctx, tp.state.targetctx, tp.state.func_ir, tp.state.args, None) type_annotations.TypeAnnotation( func_ir=tp.state.func_ir, typemap=tp.state.typemap, calltypes=tp.state.calltypes, lifted=(), lifted_from=None, args=tp.state.args, return_type=tp.state.return_type, html_output=config.HTML) diagnostics = numba.parfors.parfor.ParforDiagnostics() preparfor_pass = numba.parfors.parfor.PreParforPass( tp.state.func_ir, tp.state.typemap, tp.state.calltypes, tp.state.typingctx, tp.state.targetctx, options, swapped=diagnostics.replaced_fns) preparfor_pass.run() rewrites.rewrite_registry.apply('after-inference', tp.state) flags = compiler.Flags() parfor_pass = numba.parfors.parfor.ParforPass( tp.state.func_ir, tp.state.typemap, tp.state.calltypes, tp.state.return_type, tp.state.typingctx, tp.state.targetctx, options, flags, tp.state.metadata, diagnostics=diagnostics) parfor_pass.run() parfor_pass = numba.parfors.parfor.ParforFusionPass( tp.state.func_ir, tp.state.typemap, tp.state.calltypes, tp.state.return_type, tp.state.typingctx, tp.state.targetctx, options, flags, tp.state.metadata, diagnostics=diagnostics) parfor_pass.run() parfor_pass = numba.parfors.parfor.ParforPreLoweringPass( tp.state.func_ir, tp.state.typemap, tp.state.calltypes, tp.state.return_type, tp.state.typingctx, tp.state.targetctx, options, flags, tp.state.metadata, diagnostics=diagnostics) parfor_pass.run() test_ir._definitions = build_definitions(test_ir.blocks) return test_ir, tp def countParfors(test_func, args, **kws): test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) ret_count = 0 for label, block in test_ir.blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, numba.parfors.parfor.Parfor): ret_count += 1 return ret_count def countArrays(test_func, args, **kws): test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) return _count_arrays_inner(test_ir.blocks, tp.state.typemap) def get_init_block_size(test_func, args, **kws): test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) blocks = test_ir.blocks ret_count = 0 for label, block in blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, numba.parfors.parfor.Parfor): ret_count += len(inst.init_block.body) return ret_count def _count_arrays_inner(blocks, typemap): ret_count = 0 arr_set = set() for label, block in blocks.items(): for i, inst in enumerate(block.body): if isinstance(inst, numba.parfors.parfor.Parfor): parfor_blocks = inst.loop_body.copy() parfor_blocks[0] = inst.init_block ret_count += _count_arrays_inner(parfor_blocks, typemap) if (isinstance(inst, ir.Assign) and isinstance(typemap[inst.target.name], types.ArrayCompatible)): arr_set.add(inst.target.name) ret_count += len(arr_set) return ret_count def countArrayAllocs(test_func, args, **kws): test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) ret_count = 0 for block in test_ir.blocks.values(): ret_count += _count_array_allocs_inner(test_ir, block) return ret_count def _count_array_allocs_inner(func_ir, block): ret_count = 0 for inst in block.body: if isinstance(inst, numba.parfors.parfor.Parfor): ret_count += _count_array_allocs_inner(func_ir, inst.init_block) for b in inst.loop_body.values(): ret_count += _count_array_allocs_inner(func_ir, b) if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and (guard(find_callname, func_ir, inst.value) == ('empty', 'numpy') or guard(find_callname, func_ir, inst.value) == ('empty_inferred', 'numba.np.unsafe.ndarray'))): ret_count += 1 return ret_count def countNonParforArrayAccesses(test_func, args, **kws): test_ir, tp = get_optimized_numba_ir(test_func, args, **kws) return _count_non_parfor_array_accesses_inner(test_ir, test_ir.blocks, tp.state.typemap) def _count_non_parfor_array_accesses_inner(f_ir, blocks, typemap, parfor_indices=None): ret_count = 0 if parfor_indices is None: parfor_indices = set() for label, block in blocks.items(): for stmt in block.body: if isinstance(stmt, numba.parfors.parfor.Parfor): parfor_indices.add(stmt.index_var.name) parfor_blocks = stmt.loop_body.copy() parfor_blocks[0] = stmt.init_block ret_count += _count_non_parfor_array_accesses_inner( f_ir, parfor_blocks, typemap, parfor_indices) # getitem elif (is_getitem(stmt) and isinstance(typemap[stmt.value.value.name], types.ArrayCompatible) and not _uses_indices( f_ir, index_var_of_get_setitem(stmt), parfor_indices)): ret_count += 1 # setitem elif (is_setitem(stmt) and isinstance(typemap[stmt.target.name], types.ArrayCompatible) and not _uses_indices( f_ir, index_var_of_get_setitem(stmt), parfor_indices)): ret_count += 1 # find parfor_index aliases elif (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var) and stmt.value.name in parfor_indices): parfor_indices.add(stmt.target.name) return ret_count def _uses_indices(f_ir, index, index_set): if index.name in index_set: return True ind_def = guard(get_definition, f_ir, index) if isinstance(ind_def, ir.Expr) and ind_def.op == 'build_tuple': varnames = set(v.name for v in ind_def.items) return len(varnames & index_set) != 0 return False class TestPipeline(object): def __init__(self, typingctx, targetctx, args, test_ir): self.state = compiler.StateDict() self.state.typingctx = typingctx self.state.targetctx = targetctx self.state.args = args self.state.func_ir = test_ir self.state.typemap = None self.state.return_type = None self.state.calltypes = None self.state.metadata = {} @skip_parfors_unsupported class TestParforBasic(TestParforsBase): """Smoke tests for the parfors transforms. These tests check the most basic functionality""" def __init__(self, *args): TestParforsBase.__init__(self, *args) # these are used in the mass of simple tests m = np.reshape(np.arange(12.), (3, 4)) self.simple_args = [np.arange(3.), np.arange(4.), m, m.T] def test_simple01(self): def test_impl(): return np.ones(()) with self.assertRaises(AssertionError) as raises: self.check(test_impl) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) def test_simple02(self): def test_impl(): return np.ones((1,)) self.check(test_impl) def test_simple03(self): def test_impl(): return np.ones((1, 2)) self.check(test_impl) def test_simple04(self): def test_impl(): return np.ones(1) self.check(test_impl) def test_simple07(self): def test_impl(): return np.ones((1, 2), dtype=np.complex128) self.check(test_impl) def test_simple08(self): def test_impl(): return np.ones((1, 2)) + np.ones((1, 2)) self.check(test_impl) def test_simple09(self): def test_impl(): return np.ones((1, 1)) self.check(test_impl) def test_simple10(self): def test_impl(): return np.ones((0, 0)) self.check(test_impl) def test_simple11(self): def test_impl(): return np.ones((10, 10)) + 1. self.check(test_impl) def test_simple12(self): def test_impl(): return np.ones((10, 10)) + np.complex128(1.) self.check(test_impl) def test_simple13(self): def test_impl(): return np.complex128(1.) with self.assertRaises(AssertionError) as raises: self.check(test_impl) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) def test_simple14(self): def test_impl(): return np.ones((10, 10))[0::20] self.check(test_impl) def test_simple15(self): def test_impl(v1, v2, m1, m2): return v1 + v1 self.check(test_impl, *self.simple_args) def test_simple16(self): def test_impl(v1, v2, m1, m2): return m1 + m1 self.check(test_impl, *self.simple_args) def test_simple17(self): def test_impl(v1, v2, m1, m2): return m2 + v1 self.check(test_impl, *self.simple_args) @needs_lapack def test_simple18(self): def test_impl(v1, v2, m1, m2): return m1.T + np.linalg.svd(m2)[1] self.check(test_impl, *self.simple_args) @needs_blas def test_simple19(self): def test_impl(v1, v2, m1, m2): return np.dot(m1, v2) self.check(test_impl, *self.simple_args) @needs_blas def test_simple20(self): def test_impl(v1, v2, m1, m2): return np.dot(m1, m2) # gemm is left to BLAS with self.assertRaises(AssertionError) as raises: self.check(test_impl, *self.simple_args) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) @needs_blas def test_simple21(self): def test_impl(v1, v2, m1, m2): return np.dot(v1, v1) self.check(test_impl, *self.simple_args) def test_simple22(self): def test_impl(v1, v2, m1, m2): return np.sum(v1 + v1) self.check(test_impl, *self.simple_args) def test_simple23(self): def test_impl(v1, v2, m1, m2): x = 2 * v1 y = 2 * v1 return 4 * np.sum(x**2 + y**2 < 1) / 10 self.check(test_impl, *self.simple_args) def test_simple24(self): def test_impl(): n = 20 A = np.ones((n, n)) b = np.arange(n) return np.sum(A[:, b]) self.check(test_impl) @disabled_test def test_simple_operator_15(self): """same as corresponding test_simple_ case but using operator.add""" def test_impl(v1, v2, m1, m2): return operator.add(v1, v1) self.check(test_impl, *self.simple_args) @disabled_test def test_simple_operator_16(self): def test_impl(v1, v2, m1, m2): return operator.add(m1, m1) self.check(test_impl, *self.simple_args) @disabled_test def test_simple_operator_17(self): def test_impl(v1, v2, m1, m2): return operator.add(m2, v1) self.check(test_impl, *self.simple_args) def test_inplace_alias(self): # issue7201 def test_impl(a): a += 1 a[:] = 3 def comparer(a, b): np.testing.assert_equal(a, b) x = np.ones(1) self.check(test_impl, x, check_arg_equality=[comparer]) @skip_parfors_unsupported class TestParforNumericalMisc(TestParforsBase): """ Miscellaneous 'classical' numerical tests """ def test_pi(self): def test_impl(n): x = 2 * np.random.ranf(n) - 1 y = 2 * np.random.ranf(n) - 1 return 4 * np.sum(x**2 + y**2 < 1) / n self.check(test_impl, 100000, decimal=1) self.assertEqual(countParfors(test_impl, (types.int64, )), 1) self.assertEqual(countArrays(test_impl, (types.intp,)), 0) def test_blackscholes(self): # blackscholes takes 5 1D float array args args = (numba.float64[:], ) * 5 self.assertEqual(countParfors(blackscholes_impl, args), 1) @needs_blas def test_logistic_regression(self): args = (numba.float64[:], numba.float64[:,:], numba.float64[:], numba.int64) self.assertEqual(countParfors(lr_impl, args), 2) self.assertEqual(countArrayAllocs(lr_impl, args), 1) def test_kmeans(self): np.random.seed(0) N = 1024 D = 10 centers = 3 A = np.random.ranf((N, D)) init_centroids = np.random.ranf((centers, D)) self.check(example_kmeans_test, A, centers, 3, init_centroids, decimal=1) # TODO: count parfors after k-means fusion is working # requires recursive parfor counting arg_typs = (types.Array(types.float64, 2, 'C'), types.intp, types.intp, types.Array(types.float64, 2, 'C')) self.assertEqual( countNonParforArrayAccesses(example_kmeans_test, arg_typs), 0) @skip_parfors_unsupported class TestParforNumPy(TestParforsBase): """Tests NumPy functionality under parfors""" @needs_blas def test_mvdot(self): def test_impl(a, v): return np.dot(a, v) A = np.linspace(0, 1, 20).reshape(2, 10) v = np.linspace(2, 1, 10) self.check(test_impl, A, v) def test_fuse_argmin_argmax_max_min(self): for op in [np.argmin, np.argmax, np.min, np.max]: def test_impl(n): A = np.ones(n) C = op(A) B = A.sum() return B + C self.check(test_impl, 256) self.assertEqual(countParfors(test_impl, (types.int64, )), 1) self.assertEqual(countArrays(test_impl, (types.intp,)), 0) def test_np_random_func_direct_import(self): def test_impl(n): A = randn(n) return A[0] self.assertEqual(countParfors(test_impl, (types.int64, )), 1) def test_arange(self): # test with stop only def test_impl1(n): return np.arange(n) # start and stop def test_impl2(s, n): return np.arange(s, n) # start, step, stop def test_impl3(s, n, t): return np.arange(s, n, t) for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: self.check(test_impl1, arg) self.check(test_impl2, 2, arg) self.check(test_impl3, 2, arg, 2) def test_arange_dtype(self): # test with stop only def test_impl1(n): return np.arange(n, dtype=np.float32) # start and stop def test_impl2(s, n): return np.arange(s, n, dtype=np.float32) # start, step, stop def test_impl3(s, n, t): return np.arange(s, n, t, dtype=np.float32) for arg in [11, 128, 30.0]: self.check(test_impl1, arg) self.check(test_impl2, 2, arg) self.check(test_impl3, 2, arg, 2) def test_linspace(self): # without num def test_impl1(start, stop): return np.linspace(start, stop) # with num def test_impl2(start, stop, num): return np.linspace(start, stop, num) for arg in [11, 128, 30.0, complex(4,5), complex(5,4)]: self.check(test_impl1, 2, arg) self.check(test_impl2, 2, arg, 30) def test_mean(self): def test_impl(A): return A.mean() N = 100 A = np.random.ranf(N) B = np.random.randint(10, size=(N, 3)) self.check(test_impl, A) self.check(test_impl, B) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 1) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 1) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl, data_gen) self.count_parfors_variants(test_impl, data_gen) def test_var(self): def test_impl(A): return A.var() N = 100 A = np.random.ranf(N) B = np.random.randint(10, size=(N, 3)) C = A + 1j * A self.check(test_impl, A) self.check(test_impl, B) self.check(test_impl, C) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), )), 2) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 2, 'C'), )), 2) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl, data_gen) self.count_parfors_variants(test_impl, data_gen) def test_std(self): def test_impl(A): return A.std() N = 100 A = np.random.ranf(N) B = np.random.randint(10, size=(N, 3)) C = A + 1j * A self.check(test_impl, A) self.check(test_impl, B) self.check(test_impl, C) argty = (types.Array(types.float64, 1, 'C'),) self.assertEqual(countParfors(test_impl, argty), 2) self.assertEqual(countParfors(test_impl, argty), 2) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl, data_gen) self.count_parfors_variants(test_impl, data_gen) def test_random_parfor(self): """ Test function with only a random call to make sure a random function like ranf is actually translated to a parfor. """ def test_impl(n): A = np.random.ranf((n, n)) return A self.assertEqual(countParfors(test_impl, (types.int64, )), 1) def test_randoms(self): def test_impl(n): A = np.random.standard_normal(size=(n, n)) B = np.random.randn(n, n) C = np.random.normal(0.0, 1.0, (n, n)) D = np.random.chisquare(1.0, (n, n)) E = np.random.randint(1, high=3, size=(n, n)) F = np.random.triangular(1, 2, 3, (n, n)) return np.sum(A+B+C+D+E+F) n = 128 cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) parfor_output = cpfunc.entry_point(n) py_output = test_impl(n) # check results within 5% since random numbers generated in parallel np.testing.assert_allclose(parfor_output, py_output, rtol=0.05) self.assertEqual(countParfors(test_impl, (types.int64, )), 1) def test_dead_randoms(self): def test_impl(n): A = np.random.standard_normal(size=(n, n)) B = np.random.randn(n, n) C = np.random.normal(0.0, 1.0, (n, n)) D = np.random.chisquare(1.0, (n, n)) E = np.random.randint(1, high=3, size=(n, n)) F = np.random.triangular(1, 2, 3, (n, n)) return 3 n = 128 cpfunc = self.compile_parallel(test_impl, (numba.typeof(n),)) parfor_output = cpfunc.entry_point(n) py_output = test_impl(n) self.assertEqual(parfor_output, py_output) self.assertEqual(countParfors(test_impl, (types.int64, )), 0) def test_min(self): def test_impl1(A): return A.min() def test_impl2(A): return np.min(A) n = 211 A = np.random.ranf(n) B = np.random.randint(10, size=n).astype(np.int32) C = np.random.ranf((n, n)) # test multi-dimensional array D = np.array([np.inf, np.inf]) self.check(test_impl1, A) self.check(test_impl1, B) self.check(test_impl1, C) self.check(test_impl1, D) self.check(test_impl2, A) self.check(test_impl2, B) self.check(test_impl2, C) self.check(test_impl2, D) # checks that 0d array input raises msg = ("zero-size array to reduction operation " "minimum which has no identity") for impl in (test_impl1, test_impl2): pcfunc = self.compile_parallel(impl, (types.int64[:],)) with self.assertRaises(ValueError) as e: pcfunc.entry_point(np.array([], dtype=np.int64)) self.assertIn(msg, str(e.exception)) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl1, data_gen) self.count_parfors_variants(test_impl1, data_gen) self.check_variants(test_impl2, data_gen) self.count_parfors_variants(test_impl2, data_gen) def test_max(self): def test_impl1(A): return A.max() def test_impl2(A): return np.max(A) n = 211 A = np.random.ranf(n) B = np.random.randint(10, size=n).astype(np.int32) C = np.random.ranf((n, n)) # test multi-dimensional array D = np.array([-np.inf, -np.inf]) self.check(test_impl1, A) self.check(test_impl1, B) self.check(test_impl1, C) self.check(test_impl1, D) self.check(test_impl2, A) self.check(test_impl2, B) self.check(test_impl2, C) self.check(test_impl2, D) # checks that 0d array input raises msg = ("zero-size array to reduction operation " "maximum which has no identity") for impl in (test_impl1, test_impl2): pcfunc = self.compile_parallel(impl, (types.int64[:],)) with self.assertRaises(ValueError) as e: pcfunc.entry_point(np.array([], dtype=np.int64)) self.assertIn(msg, str(e.exception)) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl1, data_gen) self.count_parfors_variants(test_impl1, data_gen) self.check_variants(test_impl2, data_gen) self.count_parfors_variants(test_impl2, data_gen) def test_argmax(self): def test_impl1(A): return A.argmax() def test_impl2(A): return np.argmax(A) n = 211 A = np.array([1., 0., 3., 2., 3.]) B = np.random.randint(10, size=n).astype(np.int32) C = np.random.ranf((n, n)) # test multi-dimensional array D = np.array([1., 0., np.nan, 2., 3.]) self.check(test_impl1, A) self.check(test_impl1, B) self.check(test_impl1, C) self.check(test_impl1, D) self.check(test_impl2, A) self.check(test_impl2, B) self.check(test_impl2, C) self.check(test_impl2, D) # checks that 0d array input raises msg = 'attempt to get argmax of an empty sequence' for impl in (test_impl1, test_impl2): pcfunc = self.compile_parallel(impl, (types.int64[:],)) with self.assertRaises(ValueError) as e: pcfunc.entry_point(np.array([], dtype=np.int64)) self.assertIn(msg, str(e.exception)) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl1, data_gen) self.count_parfors_variants(test_impl1, data_gen) self.check_variants(test_impl2, data_gen) self.count_parfors_variants(test_impl2, data_gen) def test_argmin(self): def test_impl1(A): return A.argmin() def test_impl2(A): return np.argmin(A) n = 211 A = np.array([1., 0., 2., 0., 3.]) B = np.random.randint(10, size=n).astype(np.int32) C = np.random.ranf((n, n)) # test multi-dimensional array D = np.array([1., 0., np.nan, 0., 3.]) self.check(test_impl1, A) self.check(test_impl1, B) self.check(test_impl1, C) self.check(test_impl1, D) self.check(test_impl2, A) self.check(test_impl2, B) self.check(test_impl2, C) self.check(test_impl2, D) # checks that 0d array input raises msg = 'attempt to get argmin of an empty sequence' for impl in (test_impl1, test_impl2): pcfunc = self.compile_parallel(impl, (types.int64[:],)) with self.assertRaises(ValueError) as e: pcfunc.entry_point(np.array([], dtype=np.int64)) self.assertIn(msg, str(e.exception)) # Test variants data_gen = lambda: self.gen_linspace_variants(1) self.check_variants(test_impl1, data_gen) self.count_parfors_variants(test_impl1, data_gen) self.check_variants(test_impl2, data_gen) self.count_parfors_variants(test_impl2, data_gen) def test_ndarray_fill(self): def test_impl(x): x.fill(7.0) return x x = np.zeros(10) self.check(test_impl, x) argty = (types.Array(types.float64, 1, 'C'),) self.assertEqual(countParfors(test_impl, argty), 1) def test_ndarray_fill2d(self): def test_impl(x): x.fill(7.0) return x x = np.zeros((2,2)) self.check(test_impl, x) argty = (types.Array(types.float64, 2, 'C'),) self.assertEqual(countParfors(test_impl, argty), 1) def test_reshape_with_neg_one(self): # issue3314 def test_impl(a, b): result_matrix = np.zeros((b, b, 1), dtype=np.float64) sub_a = a[0:b] a = sub_a.size b = a / 1 z = sub_a.reshape(-1, 1) result_data = sub_a / z result_matrix[:,:,0] = result_data return result_matrix a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) b = 3 self.check(test_impl, a, b) def test_reshape_with_large_neg(self): # issue3314 def test_impl(a, b): result_matrix = np.zeros((b, b, 1), dtype=np.float64) sub_a = a[0:b] a = sub_a.size b = a / 1 z = sub_a.reshape(-1307, 1) result_data = sub_a / z result_matrix[:,:,0] = result_data return result_matrix a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) b = 3 self.check(test_impl, a, b) def test_reshape_with_too_many_neg_one(self): # issue3314 with self.assertRaises(errors.UnsupportedRewriteError) as raised: @njit(parallel=True) def test_impl(a, b): rm = np.zeros((b, b, 1), dtype=np.float64) sub_a = a[0:b] a = sub_a.size b = a / 1 z = sub_a.reshape(-1, -1) result_data = sub_a / z rm[:,:,0] = result_data return rm a = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0]) b = 3 test_impl(a, b) msg = ("The reshape API may only include one negative argument.") self.assertIn(msg, str(raised.exception)) def test_0d_array(self): def test_impl(n): return np.sum(n) + np.prod(n) + np.min(n) + np.max(n) + np.var(n) self.check(test_impl, np.array(7), check_scheduling=False) def test_real_imag_attr(self): # See issue 8012 def test_impl(z): return np.sum(z.real ** 2 + z.imag ** 2) z = np.arange(5) * (1 + 1j) self.check(test_impl, z) self.assertEqual(countParfors(test_impl, (types.complex128[::1],)), 1) class TestParforsUnsupported(TestCase): """Tests for unsupported use of parfors""" @unittest.skipIf(not _32bit, "Only impacts 32 bit hardware") @needs_blas def test_unsupported_combination_raises(self): """ This test is in place until issues with the 'parallel' target on 32 bit hardware are fixed. """ with self.assertRaises(errors.UnsupportedParforsError) as raised: @njit(parallel=True) def ddot(a, v): return np.dot(a, v) A = np.linspace(0, 1, 20).reshape(2, 10) v = np.linspace(2, 1, 10) ddot(A, v) msg = ("The 'parallel' target is not currently supported on 32 bit " "hardware") self.assertIn(msg, str(raised.exception)) @skip_parfors_unsupported class TestParfors(TestParforsBase): """ Tests cpython, reduction and various parfors features""" def test_arraymap(self): def test_impl(a, x, y): return a * x + y self.check_variants(test_impl, lambda: self.gen_linspace_variants(3)) def test_0d_broadcast(self): def test_impl(): X = np.array(1) Y = np.ones((10, 12)) return np.sum(X + Y) self.check(test_impl) self.assertEqual(countParfors(test_impl, ()), 1) def test_2d_parfor(self): def test_impl(): X = np.ones((10, 12)) Y = np.zeros((10, 12)) return np.sum(X + Y) self.check(test_impl) self.assertEqual(countParfors(test_impl, ()), 1) def test_nd_parfor(self): def case1(): X = np.ones((10, 12)) Y = np.zeros((10, 12)) yield (X, Y) data_gen = lambda: chain(case1(), self.gen_linspace_variants(2)) def test_impl(X, Y): return np.sum(X + Y) self.check_variants(test_impl, data_gen) self.count_parfors_variants(test_impl, data_gen) def test_np_func_direct_import(self): from numpy import ones # import here becomes FreeVar def test_impl(n): A = ones(n) return A[0] n = 111 self.check(test_impl, n) def test_size_assertion(self): def test_impl(m, n): A = np.ones(m) B = np.ones(n) return np.sum(A + B) self.check(test_impl, 10, 10) with self.assertRaises(AssertionError) as raises: cfunc = njit(parallel=True)(test_impl) cfunc(10, 9) msg = "Sizes of A, B do not match" self.assertIn(msg, str(raises.exception)) def test_cfg(self): # from issue #2477 def test_impl(x, is_positive, N): for i in numba.prange(2): for j in range( i*N//2, (i+1)*N//2 ): is_positive[j] = 0 if x[j] > 0: is_positive[j] = 1 return is_positive N = 100 x = np.random.rand(N) is_positive = np.zeros(N) self.check(test_impl, x, is_positive, N) def test_reduce(self): def test_impl(A): init_val = 10 return reduce(lambda a,b: min(a, b), A, init_val) n = 211 A = np.random.ranf(n) self.check(test_impl, A) A = np.random.randint(10, size=n).astype(np.int32) self.check(test_impl, A) # test checking the number of arguments for the reduce function def test_impl(): g = lambda x: x ** 2 return reduce(g, np.array([1, 2, 3, 4, 5]), 2) with self.assertTypingError(): self.check(test_impl) # test checking reduction over bitarray masked arrays n = 160 A = np.random.randint(10, size=n).astype(np.int32) def test_impl(A): return np.sum(A[A>=3]) self.check(test_impl, A) # TODO: this should fuse # self.assertTrue(countParfors(test_impl, (numba.float64[:],)) == 1) def test_impl(A): B = A[:,0] return np.sum(A[B>=3,1]) self.check(test_impl, A.reshape((16,10))) # TODO: this should also fuse #self.assertTrue(countParfors(test_impl, (numba.float64[:,:],)) == 1) def test_impl(A): B = A[:,0] return np.sum(A[B>=3,1:2]) self.check(test_impl, A.reshape((16,10))) # this doesn't fuse due to mixed indices self.assertEqual(countParfors(test_impl, (numba.float64[:,:],)), 2) def test_impl(A): min_val = np.amin(A) return A - min_val self.check(test_impl, A) # this doesn't fuse due to use of reduction variable self.assertEqual(countParfors(test_impl, (numba.float64[:],)), 2) def test_use_of_reduction_var1(self): def test_impl(): acc = 0 for i in prange(1): acc = cmath.sqrt(acc) return acc # checks that invalid use of reduction variable is detected msg = ("Use of reduction variable acc in an unsupported reduction function.") with self.assertRaises(ValueError) as e: pcfunc = self.compile_parallel(test_impl, ()) self.assertIn(msg, str(e.exception)) def test_unsupported_floordiv1(self): def test_impl(): acc = 100 for i in prange(2): acc //= 2 return acc # checks that invalid use of ifloordiv reduction operator is detected msg = ("Parallel floordiv reductions are not supported. " "If all divisors are integers then a floordiv " "reduction can in some cases be parallelized as " "a multiply reduction followed by a floordiv of " "the resulting product.") with self.assertRaises(errors.NumbaValueError) as e: pcfunc = self.compile_parallel(test_impl, ()) self.assertIn(msg, str(e.exception)) def test_unsupported_xor1(self): def test_impl(): acc = 100 for i in prange(2): acc ^= i + 2 return acc msg = ("Use of reduction variable acc in an unsupported reduction function.") with self.assertRaises(ValueError) as e: pcfunc = self.compile_parallel(test_impl, ()) self.assertIn(msg, str(e.exception)) def test_parfor_array_access1(self): # signed index of the prange generated by sum() should be replaced # resulting in array A to be eliminated (see issue #2846) def test_impl(n): A = np.ones(n) return A.sum() n = 211 self.check(test_impl, n) self.assertEqual(countArrays(test_impl, (types.intp,)), 0) def test_parfor_array_access2(self): # in this test, the prange index has the same name (i) in two loops # thus, i has multiple definitions and is harder to replace def test_impl(n): A = np.ones(n) m = 0 n = 0 for i in numba.prange(len(A)): m += A[i] for i in numba.prange(len(A)): if m == n: # access in another block n += A[i] return m + n n = 211 self.check(test_impl, n) self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) def test_parfor_array_access3(self): def test_impl(n): A = np.ones(n, np.int64) m = 0 for i in numba.prange(len(A)): m += A[i] if m==2: i = m n = 211 with self.assertRaises(errors.UnsupportedRewriteError) as raises: self.check(test_impl, n) self.assertIn("Overwrite of parallel loop index", str(raises.exception)) @needs_blas def test_parfor_array_access4(self): # in this test, one index of a multi-dim access should be replaced # np.dot parallel implementation produces this case def test_impl(A, b): return np.dot(A, b) n = 211 d = 4 A = np.random.ranf((n, d)) b = np.random.ranf(d) self.check(test_impl, A, b) # make sure the parfor index is replaced in build_tuple of access to A test_ir, tp = get_optimized_numba_ir( test_impl, (types.Array(types.float64, 2, 'C'), types.Array(types.float64, 1, 'C'))) # this code should have one basic block after optimization self.assertTrue(len(test_ir.blocks) == 1 and 0 in test_ir.blocks) block = test_ir.blocks[0] parfor_found = False parfor = None for stmt in block.body: if isinstance(stmt, numba.parfors.parfor.Parfor): parfor_found = True parfor = stmt self.assertTrue(parfor_found) build_tuple_found = False # there should be only one build_tuple for bl in parfor.loop_body.values(): for stmt in bl.body: if (isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'build_tuple'): build_tuple_found = True self.assertTrue(parfor.index_var in stmt.value.items) self.assertTrue(build_tuple_found) def test_parfor_dtype_type(self): # test array type replacement creates proper type def test_impl(a): for i in numba.prange(len(a)): a[i] = a.dtype.type(0) return a[4] a = np.ones(10) self.check(test_impl, a) def test_parfor_array_access5(self): # one dim is slice in multi-dim access def test_impl(n): X = np.ones((n, 3)) y = 0 for i in numba.prange(n): y += X[i,:].sum() return y n = 211 self.check(test_impl, n) self.assertEqual(countNonParforArrayAccesses(test_impl, (types.intp,)), 0) @disabled_test # Test itself is problematic, see #3155 def test_parfor_hoist_setitem(self): # Make sure that read of out is not hoisted. def test_impl(out): for i in prange(10): out[0] = 2 * out[0] return out[0] out = np.ones(1) self.check(test_impl, out) @needs_blas def test_parfor_generate_fuse(self): # issue #2857 def test_impl(N, D): w = np.ones(D) X = np.ones((N, D)) Y = np.ones(N) for i in range(3): B = (-Y * np.dot(X, w)) return B n = 211 d = 3 self.check(test_impl, n, d) self.assertEqual(countArrayAllocs(test_impl, (types.intp, types.intp)), 4) self.assertEqual(countParfors(test_impl, (types.intp, types.intp)), 4) def test_ufunc_expr(self): # issue #2885 def test_impl(A, B): return np.bitwise_and(A, B) A = np.ones(3, np.uint8) B = np.ones(3, np.uint8) B[1] = 0 self.check(test_impl, A, B) def test_find_callname_intrinsic(self): def test_impl(n): A = unsafe_empty((n,)) for i in range(n): A[i] = i + 2.0 return A # the unsafe allocation should be found even though it is imported # as a different name self.assertEqual(countArrayAllocs(test_impl, (types.intp,)), 1) def test_reduction_var_reuse(self): # issue #3139 def test_impl(n): acc = 0 for i in prange(n): acc += 1 for i in prange(n): acc += 2 return acc self.check(test_impl, 16) def test_non_identity_initial(self): # issue #7344 def test_impl(A, cond): s = 1 for i in prange(A.shape[0]): if cond[i]: s += 1 return s self.check(test_impl, np.ones(10), np.ones(10).astype('bool')) def test_if_not_else_reduction(self): # issue #7344 def test_impl(A, cond): s = 1 t = 10 for i in prange(A.shape[0]): if cond[i]: s += 1 t += 1 else: s += 2 return s + t self.check(test_impl, np.ones(10), np.ones(10).astype('bool')) def test_two_d_array_reduction_reuse(self): def test_impl(n): shp = (13, 17) size = shp[0] * shp[1] result1 = np.zeros(shp, np.int_) tmp = np.arange(size).reshape(shp) for i in numba.prange(n): result1 += tmp for i in numba.prange(n): result1 += tmp return result1 self.check(test_impl, 100) def test_one_d_array_reduction(self): def test_impl(n): result = np.zeros(1, np.int_) for i in numba.prange(n): result += np.array([i], np.int_) return result self.check(test_impl, 100) def test_two_d_array_reduction(self): def test_impl(n): shp = (13, 17) size = shp[0] * shp[1] result1 = np.zeros(shp, np.int_) tmp = np.arange(size).reshape(shp) for i in numba.prange(n): result1 += tmp return result1 self.check(test_impl, 100) def test_two_d_array_reduction_with_float_sizes(self): # result1 is float32 and tmp is float64. # Tests reduction with differing dtypes. def test_impl(n): shp = (2, 3) result1 = np.zeros(shp, np.float32) tmp = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(shp) for i in numba.prange(n): result1 += tmp return result1 self.check(test_impl, 100) def test_two_d_array_reduction_prod(self): def test_impl(n): shp = (13, 17) result1 = 2 * np.ones(shp, np.int_) tmp = 2 * np.ones_like(result1) for i in numba.prange(n): result1 *= tmp return result1 self.check(test_impl, 100) def test_three_d_array_reduction(self): def test_impl(n): shp = (3, 2, 7) result1 = np.zeros(shp, np.int_) for i in numba.prange(n): result1 += np.ones(shp, np.int_) return result1 self.check(test_impl, 100) def test_preparfor_canonicalize_kws(self): # test canonicalize_array_math typing for calls with kw args def test_impl(A): return A.argsort() + 1 n = 211 A = np.arange(n) self.check(test_impl, A) def test_preparfor_datetime64(self): # test array.dtype transformation for datetime64 def test_impl(A): return A.dtype A = np.empty(1, np.dtype('datetime64[ns]')) cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),)) self.assertEqual(cpfunc.entry_point(A), test_impl(A)) def test_no_hoisting_with_member_function_call(self): def test_impl(X): n = X.shape[0] acc = 0 for i in prange(n): R = {1, 2, 3} R.add(i) tmp = 0 for x in R: tmp += x acc += tmp return acc self.check(test_impl, np.random.ranf(128)) def test_array_compare_scalar(self): """ issue3671: X != 0 becomes an arrayexpr with operator.ne. That is turned into a parfor by devectorizing. Make sure the return type of the devectorized operator.ne on integer types works properly. """ def test_impl(): X = np.zeros(10, dtype=np.int_) return X != 0 self.check(test_impl) def test_array_analysis_optional_def(self): def test_impl(x, half): size = len(x) parr = x[0:size] if half: parr = x[0:size//2] return parr.sum() x = np.ones(20) self.check(test_impl, x, True, check_scheduling=False) def test_prange_side_effects(self): def test_impl(a, b): data = np.empty(len(a), dtype=np.float64) size = len(data) for i in numba.prange(size): data[i] = a[i] for i in numba.prange(size): data[i] = data[i] + b[i] return data x = np.arange(10 ** 2, dtype=float) y = np.arange(10 ** 2, dtype=float) self.check(test_impl, x, y) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), types.Array(types.float64, 1, 'C'))), 1) def test_tuple1(self): def test_impl(a): atup = (3, 4) b = 7 for i in numba.prange(len(a)): a[i] += atup[0] + atup[1] + b return a x = np.arange(10) self.check(test_impl, x) def test_tuple2(self): def test_impl(a): atup = a.shape b = 7 for i in numba.prange(len(a)): a[i] += atup[0] + b return a x = np.arange(10) self.check(test_impl, x) def test_tuple3(self): def test_impl(a): atup = (np.arange(10), 4) b = 7 for i in numba.prange(len(a)): a[i] += atup[0][5] + atup[1] + b return a x = np.arange(10) self.check(test_impl, x) def test_namedtuple1(self): def test_impl(a): antup = TestNamedTuple(part0=3, part1=4) b = 7 for i in numba.prange(len(a)): a[i] += antup.part0 + antup.part1 + b return a x = np.arange(10) self.check(test_impl, x) def test_namedtuple2(self): TestNamedTuple2 = namedtuple('TestNamedTuple2', ('part0', 'part1')) def test_impl(a): antup = TestNamedTuple2(part0=3, part1=4) b = 7 for i in numba.prange(len(a)): a[i] += antup.part0 + antup.part1 + b return a x = np.arange(10) self.check(test_impl, x) def test_namedtuple3(self): # issue5872: test that a.y[:] = 5 is not removed as # deadcode. TestNamedTuple3 = namedtuple(f'TestNamedTuple3',['y']) def test_impl(a): a.y[:] = 5 def comparer(a, b): np.testing.assert_almost_equal(a.y, b.y) x = TestNamedTuple3(y=np.zeros(10)) self.check(test_impl, x, check_arg_equality=[comparer]) def test_inplace_binop(self): def test_impl(a, b): b += a return b X = np.arange(10) + 10 Y = np.arange(10) + 100 self.check(test_impl, X, Y) self.assertEqual(countParfors(test_impl, (types.Array(types.float64, 1, 'C'), types.Array(types.float64, 1, 'C'))), 1) def test_tuple_concat(self): # issue5383 def test_impl(a): n = len(a) array_shape = n, n indices = np.zeros(((1,) + array_shape + (1,)), dtype=np.uint64) k_list = indices[0, :] for i, g in enumerate(a): k_list[i, i] = i return k_list x = np.array([1, 1]) self.check(test_impl, x) def test_tuple_concat_with_reverse_slice(self): # issue5383 def test_impl(a): n = len(a) array_shape = n, n indices = np.zeros(((1,) + array_shape + (1,))[:-1], dtype=np.uint64) k_list = indices[0, :] for i, g in enumerate(a): k_list[i, i] = i return k_list x = np.array([1, 1]) self.check(test_impl, x) def test_array_tuple_concat(self): # issue6399 def test_impl(a): S = (a,) + (a, a) return S[0].sum() x = np.ones((3,3)) self.check(test_impl, x) def test_high_dimension1(self): # issue6749 def test_impl(x): return x * 5.0 x = np.ones((2, 2, 2, 2, 2, 15)) self.check(test_impl, x) def test_tuple_arg(self): def test_impl(x, sz): for i in numba.pndindex(sz): x[i] = 1 return x sz = (10, 5) self.check(test_impl, np.empty(sz), sz) def test_tuple_arg_not_whole_array(self): def test_impl(x, sz): for i in numba.pndindex(sz): x[i] = 1 return x sz = (10, 5) self.check(test_impl, np.zeros(sz), (10, 3)) def test_tuple_for_pndindex(self): def test_impl(x): sz = (10, 5) for i in numba.pndindex(sz): x[i] = 1 return x sz = (10, 5) self.check(test_impl, np.zeros(sz)) def test_tuple_arg_literal(self): def test_impl(x, first): sz = (first, 5) for i in numba.pndindex(sz): x[i] = 1 return x sz = (10, 5) self.check(test_impl, np.zeros(sz), 10) def test_tuple_of_literal_nonliteral(self): # This test has to be done manually as the self.check uses # compile_isolated and one function cannot "see" the other def test_impl(x, sz): for i in numba.pndindex(sz): x[i] = 1 return x def call(x, fn): return fn(x, (10, 3)) # Only want to iterate to the 3rd get_input = lambda: np.zeros((10, 10)) expected = call(get_input(), test_impl) def check(dec): f1 = dec(test_impl) f2 = njit(call) # no parallel semantics in the caller got = f2(get_input(), f1) self.assertPreciseEqual(expected, got) for d in (njit, njit(parallel=True)): check(d) def test_tuple_arg_1d(self): def test_impl(x, sz): for i in numba.pndindex(sz): x[i] = 1 return x sz = (10,) self.check(test_impl, np.zeros(sz), sz) def test_tuple_arg_1d_literal(self): def test_impl(x): sz = (10,) for i in numba.pndindex(sz): x[i] = 1 return x sz = (10,) self.check(test_impl, np.zeros(sz)) def test_int_arg_pndindex(self): def test_impl(x, sz): for i in numba.pndindex(sz): x[i] = 1 return x self.check(test_impl, np.zeros((10, 10)), 3) def test_prange_unknown_call1(self): @register_jitable def issue7854_proc(u, i, even, size): for j in range((even + i + 1) % 2 + 1, size - 1, 2): u[i, j] = u[i + 1, j] + 1 # issue7854 # Forbid fusion in unanalyzable call inside prange. def test_impl(u, size): for i in numba.prange(1, size - 1): issue7854_proc(u, i, 0, size) for i in numba.prange(1, size - 1): issue7854_proc(u, i, 1, size) return u size = 4 u = np.zeros((size, size)) cptypes = (numba.float64[:, ::1], types.int64) self.assertEqual(countParfors(test_impl, cptypes), 2) self.check(test_impl, u, size) def test_prange_index_calc1(self): # Should forbid fusion due to cross-iteration dependency as # detected by loop index calcuation (i+1) as array index. def test_impl(u, size): for i in numba.prange(1, size - 1): for j in range((i + 1) % 2 + 1, size - 1, 2): u[i, j] = u[i + 1, j] + 1 for i in numba.prange(1, size - 1): for j in range(i % 2 + 1, size - 1, 2): u[i, j] = u[i + 1, j] + 1 return u size = 4 u = np.zeros((size, size)) cptypes = (numba.float64[:, ::1], types.int64) self.assertEqual(countParfors(test_impl, cptypes), 2) self.check(test_impl, u, size) def test_prange_reverse_order1(self): # Testing if reversed loop index usage as array index # prevents fusion. def test_impl(a, b, size): for i in numba.prange(size): for j in range(size): a[i, j] = b[i, j] + 1 for i in numba.prange(size): for j in range(size): b[j, i] = 3 return a[0, 0] + b[0, 0] size = 10 a = np.zeros((size, size)) b = np.zeros((size, size)) cptypes = (numba.float64[:, ::1], numba.float64[:, ::1], types.int64) self.assertEqual(countParfors(test_impl, cptypes), 2) self.check(test_impl, a, b, size) def test_prange_parfor_index_then_not(self): # Testing if accessing an array first with a parfor index then # without will prevent fusion. def test_impl(a, size): b = 0 for i in numba.prange(size): a[i] = i for i in numba.prange(size): b += a[5] return b size = 10 a = np.zeros(size) cptypes = (numba.float64[:], types.int64) self.assertEqual(countParfors(test_impl, cptypes), 2) self.check(test_impl, a, size) def test_prange_parfor_index_const_tuple_fusion(self): # Testing if accessing a tuple with prange index # and later with a constant will not prevent fusion. def test_impl(a, tup, size): acc = 0 for i in numba.prange(size): a[i] = i + tup[i] for i in numba.prange(size): acc += a[i] + tup[1] return acc size = 10 a = np.zeros(size) b = tuple(a) cptypes = (numba.float64[:], types.containers.UniTuple(types.float64, size), types.intp) self.assertEqual(countParfors(test_impl, cptypes), 1) self.check(test_impl, a, b, size) def test_prange_non_parfor_index_then_opposite(self): # Testing if accessing an array first without a parfor index then # with will prevent fusion. def test_impl(a, b, size): for i in numba.prange(size): b[i] = a[5] for i in numba.prange(size): a[i] = i # Need this to stop previous prange from being optimized away. b[0] += a[0] return b size = 10 a = np.zeros(size) b = np.zeros(size) cptypes = (numba.float64[:], numba.float64[:], types.int64) self.assertEqual(countParfors(test_impl, cptypes), 2) self.check(test_impl, a, b, size) def test_prange_optional(self): def test_impl(arr, pred=None): for i in prange(1): if pred is not None: arr[i] = 0.0 arr = np.ones(10) self.check(test_impl, arr, None, check_arg_equality=[np.testing.assert_almost_equal, lambda x, y: x == y]) self.assertEqual(arr.sum(), 10.0) def test_untraced_value_tuple(self): # This is a test for issue #6478. def test_impl(): a = (1.2, 1.3) return a[0] with self.assertRaises(AssertionError) as raises: self.check(test_impl) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) def test_recursive_untraced_value_tuple(self): # This is a test for issue #6478. def test_impl(): a = ((1.2, 1.3),) return a[0][0] with self.assertRaises(AssertionError) as raises: self.check(test_impl) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) def test_untraced_value_parfor(self): # This is a test for issue #6478. def test_impl(arr): a = (1.2, 1.3) n1 = len(arr) arr2 = np.empty(n1, np.float64) for i in prange(n1): arr2[i] = arr[i] * a[0] n2 = len(arr2) arr3 = np.empty(n2, np.float64) for j in prange(n2): arr3[j] = arr2[j] - a[1] total = 0.0 n3 = len(arr3) for k in prange(n3): total += arr3[k] return total + a[0] arg = (types.Array(types.int64, 1, 'C'), ) self.assertEqual(countParfors(test_impl, arg), 1) arr = np.arange(10, dtype=np.int64) self.check(test_impl, arr) def test_setitem_2d_one_replaced(self): # issue7843 def test_impl(x): count = 0 for n in range(x.shape[0]): # Useless "if" necessary to trigger bug. if n: n x[count, :] = 1 count += 1 return x self.check(test_impl, np.zeros((3, 1))) def test_1array_control_flow(self): # issue8146 def test_impl(arr, flag1, flag2): inv = np.arange(arr.size) if flag1: return inv.astype(np.float64) if flag2: ret = inv[inv] else: ret = inv[inv - 1] return ret / arr.size arr = np.arange(100) self.check(test_impl, arr, True, False) self.check(test_impl, arr, True, True) self.check(test_impl, arr, False, False) def test_2array_1_control_flow(self): # issue8146 def test_impl(arr, l, flag): inv1 = np.arange(arr.size) inv2 = np.arange(l, arr.size + l) if flag: ret = inv1[inv1] else: ret = inv1[inv1 - 1] return ret / inv2 arr = np.arange(100) self.check(test_impl, arr, 10, True) self.check(test_impl, arr, 10, False) def test_2array_2_control_flow(self): # issue8146 def test_impl(arr, l, flag): inv1 = np.arange(arr.size) inv2 = np.arange(l, arr.size + l) if flag: ret1 = inv1[inv1] ret2 = inv2[inv1] else: ret1 = inv1[inv1 - 1] ret2 = inv2[inv1 - 1] return ret1 / ret2 arr = np.arange(100) self.check(test_impl, arr, 10, True) self.check(test_impl, arr, 10, False) def test_issue8515(self): # issue8515: an array is filled in the first prange and # then accessed with c[i - 1] in the next prange which # should prevent fusion with the previous prange. def test_impl(n): r = np.zeros(n, dtype=np.intp) c = np.zeros(n, dtype=np.intp) for i in prange(n): for j in range(i): c[i] += 1 for i in prange(n): if i == 0: continue r[i] = c[i] - c[i - 1] return r[1:] self.check(test_impl, 15) self.assertEqual(countParfors(test_impl, (types.int64, )), 2) def test_issue9029(self): # issue9029: too many parfors executed in one function # overflowed the stack. def test_impl(i1, i2): N = 30 S = 3 a = np.empty((N,N)) # The stack should overflow if there are 30*30*2 (# of parfors) # iterations. for y in range(N): for x in range(N): values = np.ones(S) v = values[0] p2 = np.empty(S) for i in prange(i1, i2): p2[i] = 1 j = p2[0] a[y,x] = v + j return a # We pass in 0 and 3 so that the function can't analyze the loop # bounds on the prange to generate a signed loop whereas the # np.ones will be an unsigned loop. self.check(test_impl, 0, 3) def test_fusion_no_side_effects(self): def test_impl(a, b): X = np.ones(100) b = math.ceil(b) Y = np.ones(100) c = int(max(a, b)) return X + Y + c self.check(test_impl, 3.7, 4.3) self.assertEqual(countParfors(test_impl, (types.float64, types.float64)), 1) def test_issue9256_lower_sroa_conflict(self): @njit(parallel=True) def def_in_loop(x): c = 0 set_num_threads(1) for i in prange(x): c = i return c self.assertEqual(def_in_loop(10), def_in_loop.py_func(10)) def test_issue9256_lower_sroa_conflict_variant1(self): def def_in_loop(x): c = x set_num_threads(1) for _i in prange(x): if c: # forces 3 SSA versions d = x + 4 return c, d > 0 expected = def_in_loop(4) self.assertEqual(expected, njit(parallel=False)(def_in_loop)(4)) self.assertEqual(expected, njit(parallel=True)(def_in_loop)(4)) def test_issue9256_lower_sroa_conflict_variant2(self): def def_in_loop(x): c = x set_num_threads(1) for _i in prange(x): if c: for _j in range(x): # forces 4 SSA versions d = x + 4 return c, d > 0 expected = def_in_loop(4) self.assertEqual(expected, njit(parallel=False)(def_in_loop)(4)) self.assertEqual(expected, njit(parallel=True)(def_in_loop)(4)) @needs_lapack # use of np.linalg.solve @skip_ppc64le_invalid_ctr_loop def test_issue9490_non_det_ssa_problem(self): cmd = [ sys.executable, "-m", "numba.tests.parfor_iss9490_usecase", ] try: subp.check_output(cmd, env={**os.environ, "PYTHONHASHSEED": "1", "NUMBA_NUM_THREADS": "1"}, stderr=subp.STDOUT, encoding='utf-8') except subp.CalledProcessError as e: msg = f"subprocess failed with output:\n{e.output}" self.fail(msg=msg) @skip_parfors_unsupported class TestParforsLeaks(MemoryLeakMixin, TestParforsBase): def check(self, pyfunc, *args, **kwargs): cfunc, cpfunc = self.compile_all(pyfunc, *args) self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) def test_reduction(self): # issue4299 def test_impl(arr): return arr.sum() arr = np.arange(10).astype(np.float64) self.check(test_impl, arr) def test_multiple_reduction_vars(self): def test_impl(arr): a = 0. b = 1. for i in prange(arr.size): a += arr[i] b += 1. / (arr[i] + 1) return a * b arr = np.arange(10).astype(np.float64) self.check(test_impl, arr) @skip_parfors_unsupported class TestParforsSlice(TestParforsBase): def test_parfor_slice1(self): def test_impl(a): (n,) = a.shape b = a[0:n-2] + a[1:n-1] return b self.check(test_impl, np.ones(10)) def test_parfor_slice2(self): def test_impl(a, m): (n,) = a.shape b = a[0:n-2] + a[1:m] return b # runtime assertion should succeed self.check(test_impl, np.ones(10), 9) # next we expect failure with self.assertRaises(AssertionError) as raises: njit(parallel=True)(test_impl)(np.ones(10),10) self.assertIn("do not match", str(raises.exception)) def test_parfor_slice3(self): def test_impl(a): (m,n) = a.shape b = a[0:m-1,0:n-1] + a[1:m,1:n] return b self.check(test_impl, np.ones((4,3))) def test_parfor_slice4(self): def test_impl(a): (m,n) = a.shape b = a[:,0:n-1] + a[:,1:n] return b self.check(test_impl, np.ones((4,3))) def test_parfor_slice5(self): def test_impl(a): (m,n) = a.shape b = a[0:m-1,:] + a[1:m,:] return b self.check(test_impl, np.ones((4,3))) def test_parfor_slice6(self): def test_impl(a): b = a.transpose() c = a[1,:] + b[:,1] return c self.check(test_impl, np.ones((4,3))) def test_parfor_slice7(self): def test_impl(a): b = a.transpose() c = a[1,:] + b[1,:] return c # runtime check should succeed self.check(test_impl, np.ones((3,3))) # next we expect failure with self.assertRaises(AssertionError) as raises: njit(parallel=True)(test_impl)(np.ones((3,4))) self.assertIn("do not match", str(raises.exception)) @disabled_test def test_parfor_slice8(self): def test_impl(a): (m,n) = a.shape b = a.transpose() b[1:m,1:n] = a[1:m,1:n] return b self.check(test_impl, np.arange(9).reshape((3,3))) @disabled_test def test_parfor_slice9(self): def test_impl(a): (m,n) = a.shape b = a.transpose() b[1:n,1:m] = a[:,1:m] return b self.check(test_impl, np.arange(12).reshape((3,4))) @disabled_test def test_parfor_slice10(self): def test_impl(a): (m,n) = a.shape b = a.transpose() b[2,1:m] = a[2,1:m] return b self.check(test_impl, np.arange(9).reshape((3,3))) def test_parfor_slice11(self): def test_impl(a): (m,n,l) = a.shape b = a.copy() b[:,1,1:l] = a[:,2,1:l] return b self.check(test_impl, np.arange(27).reshape((3,3,3))) def test_parfor_slice12(self): def test_impl(a): (m,n) = a.shape b = a.copy() b[1,1:-1] = a[0,:-2] return b self.check(test_impl, np.arange(12).reshape((3,4))) def test_parfor_slice13(self): def test_impl(a): (m,n) = a.shape b = a.copy() c = -1 b[1,1:c] = a[0,-n:c-1] return b self.check(test_impl, np.arange(12).reshape((3,4))) def test_parfor_slice14(self): def test_impl(a): (m,n) = a.shape b = a.copy() b[1,:-1] = a[0,-3:4] return b self.check(test_impl, np.arange(12).reshape((3,4))) def test_parfor_slice15(self): def test_impl(a): (m,n) = a.shape b = a.copy() b[1,-(n-1):] = a[0,-3:4] return b self.check(test_impl, np.arange(12).reshape((3,4))) @disabled_test def test_parfor_slice16(self): """ This test is disabled because if n is larger than the array size then n and n-1 will both be the end of the array and thus the slices will in fact be of different sizes and unable to fuse. """ def test_impl(a, b, n): assert(a.shape == b.shape) a[1:n] = 10 b[0:(n-1)] = 10 return a * b self.check(test_impl, np.ones(10), np.zeros(10), 8) args = (numba.float64[:], numba.float64[:], numba.int64) self.assertEqual(countParfors(test_impl, args), 2) def test_parfor_slice17(self): def test_impl(m, A): B = np.zeros(m) n = len(A) B[-n:] = A return B self.check(test_impl, 10, np.ones(10)) def test_parfor_slice18(self): # issue 3534 def test_impl(): a = np.zeros(10) a[1:8] = np.arange(0, 7) y = a[3] return y self.check(test_impl) def test_parfor_slice19(self): # issues #3561 and #3554, empty slice binop def test_impl(X): X[:0] += 1 return X self.check(test_impl, np.ones(10)) def test_parfor_slice20(self): # issue #4075, slice size def test_impl(): a = np.ones(10) c = a[1:] s = len(c) return s self.check(test_impl, check_scheduling=False) def test_parfor_slice21(self): def test_impl(x1, x2): x1 = x1.reshape(x1.size, 1) x2 = x2.reshape(x2.size, 1) return x1 >= x2[:-1, :] x1 = np.random.rand(5) x2 = np.random.rand(6) self.check(test_impl, x1, x2) def test_parfor_slice22(self): def test_impl(x1, x2): b = np.zeros((10,)) for i in prange(1): b += x1[:, x2] return b x1 = np.zeros((10,7)) x2 = np.array(4) self.check(test_impl, x1, x2) def test_parfor_slice23(self): # issue #4630 def test_impl(x): x[:0] = 2 return x self.check(test_impl, np.ones(10)) def test_parfor_slice24(self): def test_impl(m, A, n): B = np.zeros(m) C = B[n:] C = A[:len(C)] return B for i in range(-15, 15): self.check(test_impl, 10, np.ones(10), i) def test_parfor_slice25(self): def test_impl(m, A, n): B = np.zeros(m) C = B[:n] C = A[:len(C)] return B for i in range(-15, 15): self.check(test_impl, 10, np.ones(10), i) def test_parfor_slice26(self): def test_impl(a): (n,) = a.shape b = a.copy() b[-(n-1):] = a[-3:4] return b self.check(test_impl, np.arange(4)) def test_parfor_slice27(self): # issue5601: tests array analysis of the slice with # n_valid_vals of unknown size. def test_impl(a): n_valid_vals = 0 for i in prange(a.shape[0]): if a[i] != 0: n_valid_vals += 1 if n_valid_vals: unused = a[:n_valid_vals] return 0 self.check(test_impl, np.arange(3)) def test_parfor_array_access_lower_slice(self): for ts in [slice(1, 3, None), slice(2, None, None), slice(None, 2, -1), slice(None, None, None), slice(None, None, -2)]: def test_impl(n): X = np.arange(n * 4).reshape((n, 4)) y = 0 for i in numba.prange(n): y += X[i, ts].sum() return y n = 10 self.check(test_impl, n) X = np.arange(n * 4).reshape((n, 4)) def test_impl(X): y = 0 for i in numba.prange(X.shape[0]): y += X[i, ts].sum() return y self.check(test_impl, X) @skip_parfors_unsupported class TestParforsOptions(TestParforsBase): def test_parfor_options(self): def test_impl(a): n = a.shape[0] b = np.ones(n) c = np.array([ i for i in range(n) ]) b[:n] = a + b * c for i in prange(n): c[i] = b[i] * a[i] return reduce(lambda x,y:x+y, c, 0) self.check(test_impl, np.ones(10)) args = (numba.float64[:],) # everything should fuse with default option self.assertEqual(countParfors(test_impl, args), 1) # with no fusion self.assertEqual(countParfors(test_impl, args, fusion=False), 6) # with no fusion, comprehension self.assertEqual(countParfors(test_impl, args, fusion=False, comprehension=False), 5) #with no fusion, comprehension, setitem self.assertEqual(countParfors(test_impl, args, fusion=False, comprehension=False, setitem=False), 4) # with no fusion, comprehension, prange self.assertEqual(countParfors(test_impl, args, fusion=False, comprehension=False, setitem=False, prange=False), 3) # with no fusion, comprehension, prange, reduction self.assertEqual(countParfors(test_impl, args, fusion=False, comprehension=False, setitem=False, prange=False, reduction=False), 2) # with no fusion, comprehension, prange, reduction, numpy self.assertEqual(countParfors(test_impl, args, fusion=False, comprehension=False, setitem=False, prange=False, reduction=False, numpy=False), 0) @skip_parfors_unsupported class TestParforsBitMask(TestParforsBase): def test_parfor_bitmask1(self): def test_impl(a, n): b = a > n a[b] = 0 return a self.check(test_impl, np.arange(10), 5) def test_parfor_bitmask2(self): def test_impl(a, b): a[b] = 0 return a a = np.arange(10) b = a > 5 self.check(test_impl, a, b) def test_parfor_bitmask3(self): def test_impl(a, b): a[b] = a[b] return a a = np.arange(10) b = a > 5 self.check(test_impl, a, b) def test_parfor_bitmask4(self): def test_impl(a, b): a[b] = (2 * a)[b] return a a = np.arange(10) b = a > 5 self.check(test_impl, a, b) def test_parfor_bitmask5(self): def test_impl(a, b): a[b] = a[b] * a[b] return a a = np.arange(10) b = a > 5 self.check(test_impl, a, b) def test_parfor_bitmask6(self): def test_impl(a, b, c): a[b] = c return a a = np.arange(10) b = a > 5 c = np.zeros(sum(b)) # expect failure due to lack of parallelism with self.assertRaises(AssertionError) as raises: self.check(test_impl, a, b, c) self.assertIn("\'@do_scheduling\' not found", str(raises.exception)) @skip_parfors_unsupported class TestParforsMisc(TestParforsBase): """ Tests miscellaneous parts of ParallelAccelerator use. """ def test_no_warn_if_cache_set(self): def pyfunc(): arr = np.ones(100) for i in prange(arr.size): arr[i] += i return arr cfunc = njit(parallel=True, cache=True)(pyfunc) with warnings.catch_warnings(record=True) as raised_warnings: warnings.simplefilter('always') warnings.filterwarnings(action="ignore", module="typeguard") # Filter out warnings about TBB interface mismatch warnings.filterwarnings(action='ignore', message=r".*TBB_INTERFACE_VERSION.*", category=numba.errors.NumbaWarning, module=r'numba\.np\.ufunc\.parallel.*') cfunc() self.assertEqual(len(raised_warnings), 0) # Make sure the dynamic globals flag is set has_dynamic_globals = [cres.library.has_dynamic_globals for cres in cfunc.overloads.values()] self.assertEqual(has_dynamic_globals, [False]) def test_statement_reordering_respects_aliasing(self): def impl(): a = np.zeros(10) a[1:8] = np.arange(0, 7) print('a[3]:', a[3]) print('a[3]:', a[3]) return a cres = self.compile_parallel(impl, ()) with captured_stdout() as stdout: cres.entry_point() for line in stdout.getvalue().splitlines(): self.assertEqual('a[3]: 2.0', line) def test_parfor_ufunc_typing(self): def test_impl(A): return np.isinf(A) A = np.array([np.inf, 0.0]) cfunc = njit(parallel=True)(test_impl) # save global state old_seq_flag = numba.parfors.parfor.sequential_parfor_lowering try: numba.parfors.parfor.sequential_parfor_lowering = True np.testing.assert_array_equal(test_impl(A), cfunc(A)) finally: # recover global state numba.parfors.parfor.sequential_parfor_lowering = old_seq_flag def test_init_block_dce(self): # issue4690 def test_impl(): res = 0 arr = [1,2,3,4,5] numba.parfors.parfor.init_prange() dummy = arr for i in numba.prange(5): res += arr[i] return res + dummy[2] self.assertEqual(get_init_block_size(test_impl, ()), 0) def test_alias_analysis_for_parfor1(self): def test_impl(): acc = 0 for _ in range(4): acc += 1 data = np.zeros((acc,)) return data self.check(test_impl) def test_no_state_change_in_gufunc_lowering_on_error(self): # tests #5098, if there's an exception arising in gufunc lowering the # sequential_parfor_lowering global variable should remain as False on # stack unwind. BROKEN_MSG = 'BROKEN_MSG' @register_pass(mutates_CFG=True, analysis_only=False) class BreakParfors(AnalysisPass): _name = "break_parfors" def __init__(self): AnalysisPass.__init__(self) def run_pass(self, state): for blk in state.func_ir.blocks.values(): for stmt in blk.body: if isinstance(stmt, numba.parfors.parfor.Parfor): # races should be a set(), that list is iterable # permits it to get through to the # _create_gufunc_for_parfor_body routine at which # point it needs to be a set so e.g. set.difference # can be computed, this therefore creates an error # in the right location. class Broken(list): def difference(self, other): raise errors.LoweringError(BROKEN_MSG) stmt.races = Broken() return True class BreakParforsCompiler(CompilerBase): def define_pipelines(self): pm = DefaultPassBuilder.define_nopython_pipeline(self.state) pm.add_pass_after(BreakParfors, IRLegalization) pm.finalize() return [pm] @njit(parallel=True, pipeline_class=BreakParforsCompiler) def foo(): x = 1 for _ in prange(1): x += 1 return x # assert default state for global self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) with self.assertRaises(errors.LoweringError) as raises: foo() self.assertIn(BROKEN_MSG, str(raises.exception)) # assert state has not changed self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) def test_issue_5098(self): class DummyType(types.Opaque): pass dummy_type = DummyType("my_dummy") register_model(DummyType)(models.OpaqueModel) class Dummy(object): pass @typeof_impl.register(Dummy) def typeof_Dummy(val, c): return dummy_type @unbox(DummyType) def unbox_index(typ, obj, c): return NativeValue(c.context.get_dummy_value()) @overload_method(DummyType, "method1", jit_options={"parallel":True}) def _get_method1(obj, arr, func): def _foo(obj, arr, func): def baz(a, f): c = a.copy() c[np.isinf(a)] = np.nan return f(c) length = len(arr) output_arr = np.empty(length, dtype=np.float64) for i in prange(length): output_arr[i] = baz(arr[i], func) for i in prange(length - 1): output_arr[i] += baz(arr[i], func) return output_arr return _foo @njit def bar(v): return v.mean() @njit def test1(d): return d.method1(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), bar) save_state = numba.parfors.parfor.sequential_parfor_lowering self.assertFalse(save_state) try: test1(Dummy()) self.assertFalse(numba.parfors.parfor.sequential_parfor_lowering) finally: # always set the sequential_parfor_lowering state back to the # original state numba.parfors.parfor.sequential_parfor_lowering = save_state def test_oversized_tuple_as_arg_to_kernel(self): @njit(parallel=True) def oversize_tuple(idx): big_tup = (1,2,3,4) z = 0 for x in prange(10): z += big_tup[idx] return z with override_env_config('NUMBA_PARFOR_MAX_TUPLE_SIZE', '3'): with self.assertRaises(errors.UnsupportedParforsError) as raises: oversize_tuple(0) errstr = str(raises.exception) self.assertIn("Use of a tuple", errstr) self.assertIn("in a parallel region", errstr) def test_issue5167(self): def ndvi_njit(img_nir, img_red): fillvalue = 0 out_img = np.full(img_nir.shape, fillvalue, dtype=img_nir.dtype) dims = img_nir.shape for y in prange(dims[0]): for x in prange(dims[1]): out_img[y, x] = ((img_nir[y, x] - img_red[y, x]) / (img_nir[y, x] + img_red[y, x])) return out_img tile_shape = (4, 4) array1 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape) array2 = np.random.uniform(low=1.0, high=10000.0, size=tile_shape) self.check(ndvi_njit, array1, array2) def test_issue5065(self): def reproducer(a, dist, dist_args): result = np.zeros((a.shape[0], a.shape[0]), dtype=np.float32) for i in prange(a.shape[0]): for j in range(i + 1, a.shape[0]): d = dist(a[i], a[j], *dist_args) result[i, j] = d result[j, i] = d return result @njit def euclidean(x, y): result = 0.0 for i in range(x.shape[0]): result += (x[i] - y[i]) ** 2 return np.sqrt(result) a = np.random.random(size=(5, 2)) got = njit(parallel=True)(reproducer)(a.copy(), euclidean,()) expected = reproducer(a.copy(), euclidean,()) np.testing.assert_allclose(got, expected) def test_issue5001(self): def test_numba_parallel(myarray): result = [0] * len(myarray) for i in prange(len(myarray)): result[i] = len(myarray[i]) return result myarray = (np.empty(100),np.empty(50)) self.check(test_numba_parallel, myarray) def test_issue3169(self): @njit def foo(grids): pass @njit(parallel=True) def bar(grids): for x in prange(1): foo(grids) # returns nothing, just check it compiles bar(([1],) * 2) @disabled_test def test_issue4846(self): mytype = namedtuple("mytype", ("a", "b")) def outer(mydata): for k in prange(3): inner(k, mydata) return mydata.a @njit(nogil=True) def inner(k, mydata): f = (k, mydata.a) g = (k, mydata.b) mydata = mytype(a="a", b="b") self.check(outer, mydata) def test_issue3748(self): def test1b(): x = (1, 2, 3, 4, 5) a = 0 for i in prange(len(x)): a += x[i] return a self.check(test1b,) def test_issue5277(self): def parallel_test(size, arr): for x in prange(size[0]): for y in prange(size[1]): arr[y][x] = x * 4.5 + y return arr size = (10, 10) arr = np.zeros(size, dtype=int) self.check(parallel_test, size, arr) def test_issue5570_ssa_races(self): @njit(parallel=True) def foo(src, method, out): for i in prange(1): for j in range(1): out[i, j] = 1 if method: out += 1 return out src = np.zeros((5,5)) method = 57 out = np.zeros((2, 2)) self.assertPreciseEqual( foo(src, method, out), foo.py_func(src, method, out) ) def test_issue6095_numpy_max(self): @njit(parallel=True) def find_maxima_3D_jit(args): package = args for index in range(0, 10): z_stack = package[index, :, :] return np.max(z_stack) np.random.seed(0) args = np.random.random((10, 10, 10)) self.assertPreciseEqual( find_maxima_3D_jit(args), find_maxima_3D_jit.py_func(args), ) def test_issue5942_1(self): # issue5942: tests statement reordering of # aliased arguments. def test_impl(gg, gg_next): gs = gg.shape d = gs[0] for i_gg in prange(d): gg_next[i_gg, :] = gg[i_gg, :] gg_next[i_gg, 0] += 1 return gg_next d = 4 k = 2 gg = np.zeros((d, k), dtype = np.int32) gg_next = np.zeros((d, k), dtype = np.int32) self.check(test_impl, gg, gg_next) def test_issue5942_2(self): # issue5942: tests statement reordering def test_impl(d, k): gg = np.zeros((d, k), dtype = np.int32) gg_next = np.zeros((d, k), dtype = np.int32) for i_gg in prange(d): for n in range(k): gg[i_gg, n] = i_gg gg_next[i_gg, :] = gg[i_gg, :] gg_next[i_gg, 0] += 1 return gg_next d = 4 k = 2 self.check(test_impl, d, k) @skip_unless_scipy def test_issue6102(self): # The problem is originally observed on Python3.8 because of the # changes in how loops are represented in 3.8 bytecode. @njit(parallel=True) def f(r): for ir in prange(r.shape[0]): dist = np.inf tr = np.array([0, 0, 0], dtype=np.float32) for i in [1, 0, -1]: dist_t = np.linalg.norm(r[ir, :] + i) if dist_t < dist: dist = dist_t tr = np.array([i, i, i], dtype=np.float32) r[ir, :] += tr return r r = np.array([[0., 0., 0.], [0., 0., 1.]]) self.assertPreciseEqual(f(r), f.py_func(r)) def test_issue6774(self): def test_impl(): n = 5 na_mask = np.ones((n,)) result = np.empty((n - 1,)) for i in prange(len(result)): result[i] = np.sum(na_mask[i:i + 1]) return result self.check(test_impl) def test_issue4963_globals(self): def test_impl(): buf = np.zeros((_GLOBAL_INT_FOR_TESTING1, _GLOBAL_INT_FOR_TESTING2)) return buf self.check(test_impl) def test_issue4963_freevars(self): _FREEVAR_INT_FOR_TESTING1 = 17 _FREEVAR_INT_FOR_TESTING2 = 5 def test_impl(): buf = np.zeros((_FREEVAR_INT_FOR_TESTING1, _FREEVAR_INT_FOR_TESTING2)) return buf self.check(test_impl) def test_issue_9182_recursion_error(self): from numba.types import ListType, Tuple, intp @numba.njit def _sink(x): pass @numba.njit(cache=False, parallel=True) def _ground_node_rule( clauses, nodes, ): for piter in prange(len(nodes)): for clause in clauses: clause_type = clause[0] clause_variables = clause[2] if clause_type == 0: clause_var_1 = clause_variables[0] elif len(clause_variables) == 2: clause_var_1, clause_var_2 = ( clause_variables[0], clause_variables[1], ) elif len(clause_variables) == 4: pass if clause_type == 1: _sink(clause_var_1) _sink(clause_var_2) _ground_node_rule.compile( ( ListType(Tuple([intp, intp, ListType(intp)])), ListType(intp), ) ) def test_lookup_cycle_detection(self): # This test is added due to a bug discovered in the PR 9244 patch. # The cyclic detection was incorrectly flagging cycles. @njit(parallel=True) def foo(): # The following `acc` variable is used in the `lookup()` function # in parfor's reduction code. acc = 0 for n in prange(1): for i in range(1): for j in range(1): acc += 1 return acc self.assertEqual(foo(), foo.py_func()) @skip_parfors_unsupported class TestParforsDiagnostics(TestParforsBase): def check(self, pyfunc, *args, **kwargs): cfunc, cpfunc = self.compile_all(pyfunc, *args) self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) def assert_fusion_equivalence(self, got, expected): a = self._fusion_equivalent(got) b = self._fusion_equivalent(expected) self.assertEqual(a, b) def _fusion_equivalent(self, thing): # parfors indexes the Parfors class instance id's from wherever the # internal state happens to be. To assert fusion equivalence we just # check that the relative difference between fusion adjacency lists # is the same. For example: # {3: [2, 1]} is the same as {13: [12, 11]} # this function strips the indexing etc out returning something suitable # for checking equivalence new = defaultdict(list) min_key = min(thing.keys()) for k in sorted(thing.keys()): new[k - min_key] = [x - min_key for x in thing[k]] return new def assert_diagnostics(self, diagnostics, parfors_count=None, fusion_info=None, nested_fusion_info=None, replaced_fns=None, hoisted_allocations=None): if parfors_count is not None: self.assertEqual(parfors_count, diagnostics.count_parfors()) if fusion_info is not None: self.assert_fusion_equivalence(fusion_info, diagnostics.fusion_info) if nested_fusion_info is not None: self.assert_fusion_equivalence(nested_fusion_info, diagnostics.nested_fusion_info) if replaced_fns is not None: repl = diagnostics.replaced_fns.values() for x in replaced_fns: for replaced in repl: if replaced[0] == x: break else: msg = "Replacement for %s was not found. Had %s" % (x, repl) raise AssertionError(msg) if hoisted_allocations is not None: hoisted_allocs = diagnostics.hoisted_allocations() self.assertEqual(hoisted_allocations, len(hoisted_allocs)) # just make sure that the dump() function doesn't have an issue! with captured_stdout(): for x in range(1, 5): diagnostics.dump(x) def test_array_expr(self): def test_impl(): n = 10 a = np.ones(n) b = np.zeros(n) return a + b self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=1, fusion_info = {3: [4, 5]}) def test_prange(self): def test_impl(): n = 10 a = np.empty(n) for i in prange(n): a[i] = i * 10 return a self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=1) def test_user_varname(self): """make sure original user variable name is used in fusion info """ def test_impl(): n = 10 x = np.ones(n) a = np.sin(x) b = np.cos(a * a) acc = 0 for i in prange(n - 2): for j in prange(n - 1): acc += b[i] + b[j + 1] return acc self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] # make sure original 'n' variable name is used in fusion report for loop # dimension mismatch self.assertTrue( any("slice(0, n, 1)" in r.message for r in diagnostics.fusion_reports)) def test_nested_prange(self): def test_impl(): n = 10 a = np.empty((n, n)) for i in prange(n): for j in prange(n): a[i, j] = i * 10 + j return a self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=2, nested_fusion_info={2: [1]}) def test_function_replacement(self): def test_impl(): n = 10 a = np.ones(n) b = np.argmin(a) return b self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=1, fusion_info={2: [3]}, replaced_fns = [('argmin', 'numpy'),]) def test_reduction(self): def test_impl(): n = 10 a = np.ones(n + 1) # prevent fusion acc = 0 for i in prange(n): acc += a[i] return acc self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=2) def test_setitem(self): def test_impl(): n = 10 a = np.ones(n) a[:] = 7 return a self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, parfors_count=1) def test_allocation_hoisting(self): def test_impl(): n = 10 m = 5 acc = 0 for i in prange(n): temp = np.zeros((m,)) # the np.empty call should get hoisted for j in range(m): temp[j] = i acc += temp[-1] return acc self.check(test_impl,) cpfunc = self.compile_parallel(test_impl, ()) diagnostics = cpfunc.metadata['parfor_diagnostics'] self.assert_diagnostics(diagnostics, hoisted_allocations=1) class TestPrangeBase(TestParforsBase): def generate_prange_func(self, pyfunc, patch_instance): """ This function does the actual code augmentation to enable the explicit testing of `prange` calls in place of `range`. """ pyfunc_code = pyfunc.__code__ prange_names = list(pyfunc_code.co_names) if patch_instance is None: # patch all instances, cheat by just switching # range for prange assert 'range' in pyfunc_code.co_names prange_names = tuple([x if x != 'range' else 'prange' for x in pyfunc_code.co_names]) new_code = bytes(pyfunc_code.co_code) else: # patch specified instances... # find where 'range' is in co_names range_idx = pyfunc_code.co_names.index('range') range_locations = [] # look for LOAD_GLOBALs that point to 'range' for instr in dis.Bytecode(pyfunc_code): if instr.opname == 'LOAD_GLOBAL': if _fix_LOAD_GLOBAL_arg(instr.arg) == range_idx: range_locations.append(instr.offset + 1) # add in 'prange' ref prange_names.append('prange') prange_names = tuple(prange_names) prange_idx = len(prange_names) - 1 if utils.PYVERSION in ((3, 11), (3, 12)): # this is the inverse of _fix_LOAD_GLOBAL_arg prange_idx = 1 + (prange_idx << 1) elif utils.PYVERSION in ((3, 9), (3, 10)): pass else: raise NotImplementedError(utils.PYVERSION) new_code = bytearray(pyfunc_code.co_code) assert len(patch_instance) <= len(range_locations) # patch up the new byte code for i in patch_instance: idx = range_locations[i] new_code[idx] = prange_idx new_code = bytes(new_code) # create code object with prange mutation prange_code = pyfunc_code.replace(co_code=new_code, co_names=prange_names) # get function pfunc = pytypes.FunctionType(prange_code, globals()) return pfunc def prange_tester(self, pyfunc, *args, **kwargs): """ The `prange` tester This is a hack. It basically switches out range calls for prange. It does this by copying the live code object of a function containing 'range' then copying the .co_names and mutating it so that 'range' is replaced with 'prange'. It then creates a new code object containing the mutation and instantiates a function to contain it. At this point three results are created: 1. The result of calling the original python function. 2. The result of calling a njit compiled version of the original python function. 3. The result of calling a njit(parallel=True) version of the mutated function containing `prange`. The three results are then compared and the `prange` based function's llvm_ir is inspected to ensure the scheduler code is present. Arguments: pyfunc - the python function to test args - data arguments to pass to the pyfunc under test Keyword Arguments: patch_instance - iterable containing which instances of `range` to replace. If not present all instance of `range` are replaced. scheduler_type - 'signed', 'unsigned' or None, default is None. Supply in cases where the presence of a specific scheduler is to be asserted. check_fastmath - if True then a check will be performed to ensure the IR contains instructions labelled with 'fast' check_fastmath_result - if True then a check will be performed to ensure the result of running with fastmath on matches that of the pyfunc Remaining kwargs are passed to np.testing.assert_almost_equal Example: def foo(): acc = 0 for x in range(5): for y in range(10): acc +=1 return acc # calling as prange_tester(foo) # will test code equivalent to # def foo(): # acc = 0 # for x in prange(5): # <- changed # for y in prange(10): # <- changed # acc +=1 # return acc # calling as prange_tester(foo, patch_instance=[1]) # will test code equivalent to # def foo(): # acc = 0 # for x in range(5): # <- outer loop (0) unchanged # for y in prange(10): # <- inner loop (1) changed # acc +=1 # return acc """ patch_instance = kwargs.pop('patch_instance', None) check_fastmath = kwargs.pop('check_fastmath', False) check_fastmath_result = kwargs.pop('check_fastmath_result', False) pfunc = self.generate_prange_func(pyfunc, patch_instance) # Compile functions # compile a standard njit of the original function sig = tuple([numba.typeof(x) for x in args]) cfunc = self.compile_njit(pyfunc, sig) # compile the prange injected function with warnings.catch_warnings(record=True) as raised_warnings: warnings.simplefilter('always') cpfunc = self.compile_parallel(pfunc, sig) # if check_fastmath is True then check fast instructions if check_fastmath: self.assert_fastmath(pfunc, sig) # if check_fastmath_result is True then compile a function # so that the parfors checker can assert the result is ok. if check_fastmath_result: fastcpfunc = self.compile_parallel_fastmath(pfunc, sig) kwargs = dict({'fastmath_pcres': fastcpfunc}, **kwargs) self.check_parfors_vs_others(pyfunc, cfunc, cpfunc, *args, **kwargs) return raised_warnings @skip_parfors_unsupported class TestPrangeBasic(TestPrangeBase): """ Tests Prange """ def test_prange01(self): def test_impl(): n = 4 A = np.zeros(n) for i in range(n): A[i] = 2.0 * i return A self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange02(self): def test_impl(): n = 4 A = np.zeros(n - 1) for i in range(1, n): A[i - 1] = 2.0 * i return A self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange03(self): def test_impl(): s = 10 for i in range(10): s += 2 return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange03mul(self): def test_impl(): s = 3 for i in range(10): s *= 2 return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange03sub(self): def test_impl(): s = 100 for i in range(10): s -= 2 return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange03div(self): def test_impl(): s = 10 for i in range(10): s /= 2 return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange04(self): def test_impl(): a = 2 b = 3 A = np.empty(4) for i in range(4): if i == a: A[i] = b else: A[i] = 0 return A self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange05(self): def test_impl(): n = 4 A = np.ones((n), dtype=np.float64) s = 0 for i in range(1, n - 1, 1): s += A[i] return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange06(self): def test_impl(): n = 4 A = np.ones((n), dtype=np.float64) s = 0 for i in range(1, 1, 1): s += A[i] return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange07(self): def test_impl(): n = 4 A = np.ones((n), dtype=np.float64) s = 0 for i in range(n, 1): s += A[i] return s self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange08(self): def test_impl(): n = 4 A = np.ones((n)) acc = 0 for i in range(len(A)): for j in range(len(A)): acc += A[i] return acc self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange08_1(self): def test_impl(): n = 4 A = np.ones((n)) acc = 0 for i in range(4): for j in range(4): acc += A[i] return acc self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange09(self): def test_impl(): n = 4 acc = 0 for i in range(n): for j in range(n): acc += 1 return acc # patch inner loop to 'prange' self.prange_tester(test_impl, patch_instance=[1], scheduler_type='unsigned', check_fastmath=True) def test_prange10(self): def test_impl(): n = 4 acc2 = 0 for j in range(n): acc1 = 0 for i in range(n): acc1 += 1 acc2 += acc1 return acc2 # patch outer loop to 'prange' self.prange_tester(test_impl, patch_instance=[0], scheduler_type='unsigned', check_fastmath=True) @unittest.skip("list append is not thread-safe yet (#2391, #2408)") def test_prange11(self): def test_impl(): n = 4 return [np.sin(j) for j in range(n)] self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange12(self): def test_impl(): acc = 0 n = 4 X = np.ones(n) for i in range(-len(X)): acc += X[i] return acc self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) def test_prange13(self): def test_impl(n): acc = 0 for i in range(n): acc += 1 return acc self.prange_tester(test_impl, np.int32(4), scheduler_type='unsigned', check_fastmath=True) def test_prange14(self): def test_impl(A): s = 3 for i in range(len(A)): s += A[i]*2 return s # this tests reduction detection well since the accumulated variable # is initialized before the parfor and the value accessed from the array # is updated before accumulation self.prange_tester(test_impl, np.random.ranf(4), scheduler_type='unsigned', check_fastmath=True) def test_prange15(self): # from issue 2587 # test parfor type inference when there is multi-dimensional indexing def test_impl(N): acc = 0 for i in range(N): x = np.ones((1, 1)) acc += x[0, 0] return acc self.prange_tester(test_impl, 1024, scheduler_type='unsigned', check_fastmath=True) # Tests for negative ranges def test_prange16(self): def test_impl(N): acc = 0 for i in range(-N, N): acc += 2 return acc self.prange_tester(test_impl, 1024, scheduler_type='signed', check_fastmath=True) def test_prange17(self): def test_impl(N): acc = 0 X = np.ones(N) for i in range(-N, N): acc += X[i] return acc self.prange_tester(test_impl, 9, scheduler_type='signed', check_fastmath=True) def test_prange18(self): def test_impl(N): acc = 0 X = np.ones(N) for i in range(-N, 5): acc += X[i] for j in range(-4, N): acc += X[j] return acc self.prange_tester(test_impl, 9, scheduler_type='signed', check_fastmath=True) def test_prange19(self): def test_impl(N): acc = 0 M = N + 4 X = np.ones((N, M)) for i in range(-N, N): for j in range(-M, M): acc += X[i, j] return acc self.prange_tester(test_impl, 9, scheduler_type='signed', check_fastmath=True) def test_prange20(self): def test_impl(N): acc = 0 X = np.ones(N) for i in range(-1, N): acc += X[i] return acc self.prange_tester(test_impl, 9, scheduler_type='signed', check_fastmath=True) def test_prange21(self): def test_impl(N): acc = 0 for i in range(-3, -1): acc += 3 return acc self.prange_tester(test_impl, 9, scheduler_type='signed', check_fastmath=True) def test_prange22(self): def test_impl(): a = 0 b = 3 A = np.empty(4) for i in range(-2, 2): if i == a: A[i] = b elif i < 1: A[i] = -1 else: A[i] = 7 return A self.prange_tester(test_impl, scheduler_type='signed', check_fastmath=True, check_fastmath_result=True) def test_prange23(self): # test non-contig input def test_impl(A): for i in range(len(A)): A[i] = i return A A = np.zeros(32)[::2] self.prange_tester(test_impl, A, scheduler_type='unsigned', check_fastmath=True, check_fastmath_result=True) def test_prange24(self): # test non-contig input, signed range def test_impl(A): for i in range(-len(A), 0): A[i] = i return A A = np.zeros(32)[::2] self.prange_tester(test_impl, A, scheduler_type='signed', check_fastmath=True, check_fastmath_result=True) def test_prange25(self): def test_impl(A): n = len(A) buf = [np.zeros_like(A) for _ in range(n)] for i in range(n): buf[i] = A + i return buf A = np.ones((10,)) self.prange_tester(test_impl, A, patch_instance=[1], scheduler_type='unsigned', check_fastmath=True, check_fastmath_result=True) cpfunc = self.compile_parallel(test_impl, (numba.typeof(A),)) diagnostics = cpfunc.metadata['parfor_diagnostics'] hoisted_allocs = diagnostics.hoisted_allocations() self.assertEqual(len(hoisted_allocs), 0) def test_prange26(self): def test_impl(A): B = A[::3] for i in range(len(B)): B[i] = i return A A = np.zeros(32)[::2] self.prange_tester(test_impl, A, scheduler_type='unsigned', check_fastmath=True, check_fastmath_result=True) def test_prange27(self): # issue5597: usedef error in parfor def test_impl(a, b, c): for j in range(b[0]-1): for k in range(2): z = np.abs(a[c-1:c+1]) return 0 # patch inner loop to 'prange' self.prange_tester(test_impl, np.arange(20), np.asarray([4,4,4,4,4,4,4,4,4,4]), 0, patch_instance=[1], scheduler_type='unsigned', check_fastmath=True) def test_prange28(self): # issue7105: label conflict in nested parfor def test_impl(x, y): out = np.zeros(len(y)) for idx in range(0, len(y)): i0 = y[idx, 0] i1 = y[idx, 1] Pt1 = x[i0] Pt2 = x[i1] v = Pt1 - Pt2 vl2 = v[0] + v[1] out[idx] = vl2 return out X = np.array([[-1., -1.], [-1., 1.], [ 0., 0.], [ 1., -1.], [ 1., 0.], [ 1., 1.]]) Y = np.array([[0, 1], [1, 2], [2, 3], [3, 4], [4, 5]]) self.prange_tester(test_impl, X, Y, scheduler_type='unsigned', check_fastmath=True, check_fastmath_result=True) def test_prange29(self): # issue7630: SSA renaming in prange header def test_impl(flag): result = 0 if flag: for i in range(1): result += 1 else: for i in range(1): result -= 3 return result self.prange_tester(test_impl, True) self.prange_tester(test_impl, False) def test_prange30(self): # issue7675: broadcast setitem def test_impl(x, par, numthreads): n_par = par.shape[0] n_x = len(x) result = np.zeros((n_par, n_x), dtype=np.float64) chunklen = (len(x) + numthreads - 1) // numthreads for i in range(numthreads): start = i * chunklen stop = (i + 1) * chunklen result[:, start:stop] = x[start:stop] * par[:] return result x = np.array(np.arange(0, 6, 1.0)) par = np.array([1.0, 2.0, 3.0]) self.prange_tester(test_impl, x, par, 2) @skip_parfors_unsupported class TestPrangeSpecific(TestPrangeBase): """ Tests specific features/problems found under prange""" def test_prange_two_instances_same_reduction_var(self): # issue4922 - multiple uses of same reduction variable def test_impl(n): c = 0 for i in range(n): c += 1 if i > 10: c += 1 return c self.prange_tester(test_impl, 9) def test_prange_conflicting_reduction_ops(self): def test_impl(n): c = 0 for i in range(n): c += 1 if i > 10: c *= 1 return c with self.assertRaises(errors.UnsupportedError) as raises: self.prange_tester(test_impl, 9) msg = ('Reduction variable c has multiple conflicting reduction ' 'operators.') self.assertIn(msg, str(raises.exception)) def test_prange_two_conditional_reductions(self): # issue6414 def test_impl(): A = B = 0 for k in range(1): if k == 2: A += 1 else: x = np.zeros((1, 1)) if x[0, 0]: B += 1 return A, B self.prange_tester(test_impl) def test_prange_nested_reduction1(self): def test_impl(): A = 0 for k in range(1): for i in range(1): if i == 0: A += 1 return A self.prange_tester(test_impl) @disabled_test def test_check_error_model(self): def test_impl(): n = 32 A = np.zeros(n) for i in range(n): A[i] = 1 / i # div-by-zero when i = 0 return A with self.assertRaises(ZeroDivisionError) as raises: test_impl() # compile parallel functions pfunc = self.generate_prange_func(test_impl, None) pcres = self.compile_parallel(pfunc, ()) pfcres = self.compile_parallel_fastmath(pfunc, ()) # should raise with self.assertRaises(ZeroDivisionError) as raises: pcres.entry_point() # should not raise result = pfcres.entry_point() self.assertEqual(result[0], np.inf) def test_check_alias_analysis(self): # check alias analysis reports ok def test_impl(A): for i in range(len(A)): B = A[i] B[:] = 1 return A A = np.zeros(32).reshape(4, 8) self.prange_tester(test_impl, A, scheduler_type='unsigned', check_fastmath=True, check_fastmath_result=True) pfunc = self.generate_prange_func(test_impl, None) sig = tuple([numba.typeof(A)]) cres = self.compile_parallel_fastmath(pfunc, sig) _ir = self._get_gufunc_ir(cres) for k, v in _ir.items(): for line in v.splitlines(): # get the fn definition line if 'define' in line and k in line: # there should only be 2x noalias, one on each of the first # 2 args (retptr, excinfo). # Note: used to be 3x no noalias, but env arg is dropped. self.assertEqual(line.count('noalias'), 2) break def test_prange_raises_invalid_step_size(self): def test_impl(N): acc = 0 for i in range(0, N, 2): acc += 2 return acc with self.assertRaises(errors.UnsupportedRewriteError) as raises: self.prange_tester(test_impl, 1024) msg = 'Only constant step size of 1 is supported for prange' self.assertIn(msg, str(raises.exception)) def test_prange_fastmath_check_works(self): # this function will benefit from `fastmath`, the div will # get optimised to a multiply by reciprocal and the accumulator # then becomes an fmadd: A = A + i * 0.5 def test_impl(): n = 128 A = 0 for i in range(n): A += i / 2.0 return A self.prange_tester(test_impl, scheduler_type='unsigned', check_fastmath=True) pfunc = self.generate_prange_func(test_impl, None) cres = self.compile_parallel_fastmath(pfunc, ()) ir = self._get_gufunc_ir(cres) _id = '%[A-Z_0-9]?(.[0-9]+)+[.]?[i]?' recipr_str = r'\s+%s = fmul fast double %s, 5.000000e-01' reciprocal_inst = re.compile(recipr_str % (_id, _id)) fadd_inst = re.compile(r'\s+%s = fadd fast double %s, %s' % (_id, _id, _id)) # check there is something like: # %.329 = fmul fast double %.325, 5.000000e-01 # %.337 = fadd fast double %A.07, %.329 found = False for name, kernel in ir.items(): # make sure to look at the kernel corresponding to the cres/pfunc if name in cres.library.get_llvm_str(): splitted = kernel.splitlines() for i, x in enumerate(splitted): if reciprocal_inst.match(x): self.assertTrue(fadd_inst.match(splitted[i + 1])) found = True break self.assertTrue(found, "fast instruction pattern was not found.") def test_parfor_alias1(self): def test_impl(n): b = np.zeros((n, n)) a = b[0] for j in range(n): a[j] = j + 1 return b.sum() self.prange_tester(test_impl, 4) def test_parfor_alias2(self): def test_impl(n): b = np.zeros((n, n)) for i in range(n): a = b[i] for j in range(n): a[j] = i + j return b.sum() self.prange_tester(test_impl, 4) def test_parfor_alias3(self): def test_impl(n): b = np.zeros((n, n, n)) for i in range(n): a = b[i] for j in range(n): c = a[j] for k in range(n): c[k] = i + j + k return b.sum() self.prange_tester(test_impl, 4) def test_parfor_race_1(self): def test_impl(x, y): for j in range(y): k = x return k raised_warnings = self.prange_tester(test_impl, 10, 20) warning_obj = raised_warnings[0] expected_msg = ("Variable k used in parallel loop may be written to " "simultaneously by multiple workers and may result " "in non-deterministic or unintended results.") self.assertIn(expected_msg, str(warning_obj.message)) def test_nested_parfor_push_call_vars(self): """ issue 3686: if a prange has something inside it that causes a nested parfor to be generated and both the inner and outer parfor use the same call variable defined outside the parfors then ensure that when that call variable is pushed into the parfor that the call variable isn't duplicated with the same name resulting in a redundant type lock. """ def test_impl(): B = 0 f = np.negative for i in range(1): this_matters = f(1.) B += f(np.zeros(1,))[0] for i in range(2): this_matters = f(1.) B += f(np.zeros(1,))[0] return B self.prange_tester(test_impl) def test_copy_global_for_parfor(self): """ issue4903: a global is copied next to a parfor so that it can be inlined into the parfor and thus not have to be passed to the parfor (i.e., an unsupported function type). This global needs to be renamed in the block into which it is copied. """ def test_impl(zz, tc): lh = np.zeros(len(tc)) lc = np.zeros(len(tc)) for i in range(1): nt = tc[i] for t in range(nt): lh += np.exp(zz[i, t]) for t in range(nt): lc += np.exp(zz[i, t]) return lh, lc m = 2 zz = np.ones((m, m, m)) tc = np.ones(m, dtype=np.int_) self.prange_tester(test_impl, zz, tc, patch_instance=[0]) def test_multiple_call_getattr_object(self): def test_impl(n): B = 0 f = np.negative for i in range(1): this_matters = f(1.0) B += f(n) return B self.prange_tester(test_impl, 1.0) def test_argument_alias_recarray_field(self): # Test for issue4007. def test_impl(n): for i in range(len(n)): n.x[i] = 7.0 return n X1 = np.zeros(10, dtype=[('x', float), ('y', int), ]) X2 = np.zeros(10, dtype=[('x', float), ('y', int), ]) X3 = np.zeros(10, dtype=[('x', float), ('y', int), ]) v1 = X1.view(np.recarray) v2 = X2.view(np.recarray) v3 = X3.view(np.recarray) # Numpy doesn't seem to support almost equal on recarray. # So, we convert to list and use assertEqual instead. python_res = list(test_impl(v1)) njit_res = list(njit(test_impl)(v2)) pa_func = njit(test_impl, parallel=True) pa_res = list(pa_func(v3)) self.assertEqual(python_res, njit_res) self.assertEqual(python_res, pa_res) def test_mutable_list_param(self): """ issue3699: test that mutable variable to call in loop is not hoisted. The call in test_impl forces a manual check here rather than using prange_tester. """ @njit def list_check(X): """ If the variable X is hoisted in the test_impl prange then subsequent list_check calls would return increasing values. """ ret = X[-1] a = X[-1] + 1 X.append(a) return ret def test_impl(n): for i in prange(n): X = [100] a = list_check(X) return a python_res = test_impl(10) njit_res = njit(test_impl)(10) pa_func = njit(test_impl, parallel=True) pa_res = pa_func(10) self.assertEqual(python_res, njit_res) self.assertEqual(python_res, pa_res) def test_list_comprehension_prange(self): # issue4569 def test_impl(x): return np.array([len(x[i]) for i in range(len(x))]) x = [np.array([1,2,3], dtype=int),np.array([1,2], dtype=int)] self.prange_tester(test_impl, x) def test_ssa_false_reduction(self): # issue5698 # SSA for h creates assignments to h that make it look like a # reduction variable except that it lacks an associated # reduction operator. Test here that h is excluded as a # reduction variable. def test_impl(image, a, b): empty = np.zeros(image.shape) for i in range(image.shape[0]): r = image[i][0] / 255.0 if a == 0: h = 0 if b == 0: h = 0 empty[i] = [h, h, h] return empty image = np.zeros((3, 3), dtype=np.int32) self.prange_tester(test_impl, image, 0, 0) def test_list_setitem_hoisting(self): # issue5979 # Don't hoist list initialization if list item set. def test_impl(): n = 5 a = np.empty(n, dtype=np.int64) for k in range(5): X = [0] X[0] = 1 a[k] = X[0] return a self.prange_tester(test_impl) def test_record_array_setitem(self): # issue6704 state_dtype = np.dtype([('var', np.int32)]) def test_impl(states): for i in range(1): states[i]['var'] = 1 def comparer(a, b): assert(a[0]['var'] == b[0]['var']) self.prange_tester(test_impl, np.zeros(shape=1, dtype=state_dtype), check_arg_equality=[comparer]) def test_record_array_setitem_yield_array(self): state_dtype = np.dtype([('x', np.intp)]) def test_impl(states): n = states.size for i in range(states.size): states["x"][i] = 7 + i return states states = np.zeros(10, dtype=state_dtype) def comparer(a, b): np.testing.assert_equal(a, b) self.prange_tester(test_impl, states, check_arg_equality=[comparer]) def test_issue7501(self): def test_impl(size, case): result = np.zeros((size,)) if case == 1: for i in range(size): result[i] += 1 else: for i in range(size): result[i] += 2 return result[0] self.prange_tester(test_impl, 3, 1) def test_kde_example(self): def test_impl(X): # KDE example b = 0.5 points = np.array([-1.0, 2.0, 5.0]) N = points.shape[0] n = X.shape[0] exps = 0 for i in range(n): p = X[i] d = (-(p - points)**2) / (2 * b**2) m = np.min(d) exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) return exps n = 128 X = np.random.ranf(n) self.prange_tester(test_impl, X) @skip_parfors_unsupported def test_issue_due_to_max_label(self): # Run the actual test in a new process since it can only reproduce in # a fresh state. out = subp.check_output( [sys.executable, '-m', 'numba.tests.parfors_max_label_error'], timeout=30, stderr=subp.STDOUT, # redirect stderr to stdout ) self.assertIn("TEST PASSED", out.decode()) @skip_parfors_unsupported def test_issue7578(self): def test_impl(x): A = np.zeros_like(x) tmp = np.cos(x) # this can be any 1-arity ufunc for i in range(len(x)): A[i] = tmp.sum() return A x = np.arange(10.) self.prange_tester(test_impl, x) @skip_parfors_unsupported class TestParforChunksizing(TestCase): """ Tests chunksize handling in ParallelAccelerator. """ _numba_parallel_test_ = False def setUp(self): set_parallel_chunksize(0) def tearDown(self): set_parallel_chunksize(0) def test_python_parallel_chunksize_basic(self): # Test basic chunksize operations outside njit. self.assertEqual(get_parallel_chunksize(), 0) set_parallel_chunksize(8) self.assertEqual(get_parallel_chunksize(), 8) set_parallel_chunksize(0) self.assertEqual(get_parallel_chunksize(), 0) def test_python_with_chunksize(self): # Test "with parallel_chunksize" outside njit. self.assertEqual(get_parallel_chunksize(), 0) with parallel_chunksize(8): self.assertEqual(get_parallel_chunksize(), 8) self.assertEqual(get_parallel_chunksize(), 0) def test_njit_parallel_chunksize_basic(self): # Test basic chunksize operations inside njit. @njit def get_cs(): return get_parallel_chunksize() @njit def set_cs(x): return set_parallel_chunksize(x) self.assertEqual(get_cs(), 0) set_cs(8) self.assertEqual(get_cs(), 8) set_cs(0) self.assertEqual(get_cs(), 0) def test_njit_with_chunksize(self): # Test "with parallel_chunksize" inside njit. @njit def test_impl(x): cs1 = get_parallel_chunksize() with parallel_chunksize(8): cs2 = get_parallel_chunksize() cs3 = get_parallel_chunksize() return cs1, cs2, cs3 cs1, cs2, cs3 = test_impl(8) self.assertEqual(cs1, 0) self.assertEqual(cs2, 8) self.assertEqual(cs3, 0) def test_all_iterations_reset_chunksize(self): """ Test that all the iterations get run if you set the chunksize. Also check that the chunksize that each worker thread sees has been reset to 0. """ @njit(parallel=True) def test_impl(cs, n): res = np.zeros(n) inner_cs = np.full(n, -13) with numba.parallel_chunksize(cs): for i in numba.prange(n): inner_cs[i] = numba.get_parallel_chunksize() res[i] = 13 return res, inner_cs # Test a variety of array and chunk sizes. # 1000 is a round number, 997 is prime, 943 is product of two # primes, 961 is square of a prime. for j in [1000, 997, 943, 961]: for i in range(15): res, inner_cs = test_impl(i+1, j) self.assertTrue(np.all(res == 13)) self.assertTrue(np.all(inner_cs == 0)) def test_njit_parallel_chunksize_negative(self): # Test negative set_parallel_chunksize inside njit. with self.assertRaises(ValueError) as raised: @njit def neg_test(): set_parallel_chunksize(-1) neg_test() msg = "chunksize must be greater than or equal to zero" self.assertIn(msg, str(raised.exception)) def test_python_parallel_chunksize_negative(self): # Test negative set_parallel_chunksize outside njit. with self.assertRaises(ValueError) as raised: set_parallel_chunksize(-1) msg = "chunksize must be greater than or equal to zero" self.assertIn(msg, str(raised.exception)) def test_njit_parallel_chunksize_invalid_type(self): with self.assertRaises(errors.TypingError) as raised: @njit def impl(): set_parallel_chunksize('invalid_type') impl() msg = "The parallel chunksize must be an integer" self.assertIn(msg, str(raised.exception)) def test_python_parallel_chunksize_invalid_type(self): with self.assertRaises(TypeError) as raised: set_parallel_chunksize('invalid_type') msg = "The parallel chunksize must be an integer" self.assertIn(msg, str(raised.exception)) @skip_parfors_unsupported @x86_only class TestParforsVectorizer(TestPrangeBase): # env mutating test _numba_parallel_test_ = False def get_gufunc_asm(self, func, schedule_type, *args, **kwargs): fastmath = kwargs.pop('fastmath', False) cpu_name = kwargs.pop('cpu_name', 'skylake-avx512') assertions = kwargs.pop('assertions', True) # force LLVM to use zmm registers for vectorization # https://reviews.llvm.org/D67259 cpu_features = kwargs.pop('cpu_features', '-prefer-256-bit') env_opts = {'NUMBA_CPU_NAME': cpu_name, 'NUMBA_CPU_FEATURES': cpu_features, } overrides = [] for k, v in env_opts.items(): overrides.append(override_env_config(k, v)) with overrides[0], overrides[1]: sig = tuple([numba.typeof(x) for x in args]) pfunc_vectorizable = self.generate_prange_func(func, None) if fastmath == True: cres = self.compile_parallel_fastmath(pfunc_vectorizable, sig) else: cres = self.compile_parallel(pfunc_vectorizable, sig) # get the gufunc asm asm = self._get_gufunc_asm(cres) if assertions: schedty = re.compile(r'call\s+\w+\*\s+@do_scheduling_(\w+)\(') matches = schedty.findall(cres.library.get_llvm_str()) self.assertGreaterEqual(len(matches), 1) # at least 1 parfor call self.assertEqual(matches[0], schedule_type) self.assertNotEqual(asm, {}) return asm @linux_only @TestCase.run_test_in_subprocess def test_vectorizer_fastmath_asm(self): """ This checks that if fastmath is set and the underlying hardware is suitable, and the function supplied is amenable to fastmath based vectorization, that the vectorizer actually runs. """ # This function will benefit from `fastmath` if run on a suitable # target. The vectorizer should unwind the loop and generate # packed dtype=double add and sqrt instructions. def will_vectorize(A): n = len(A) acc = 0 for i in range(n): acc += np.sqrt(i) return acc arg = np.zeros(10) fast_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, fastmath=True) slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, fastmath=False) for v in fast_asm.values(): # should unwind and call vector sqrt then vector add # all on packed doubles using zmm's self.assertTrue('vaddpd' in v) self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v) self.assertTrue('zmm' in v) for v in slow_asm.values(): # vector variants should not be present self.assertTrue('vaddpd' not in v) self.assertTrue('vsqrtpd' not in v) # check scalar variant is present self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v) self.assertTrue('vaddsd' in v) # check no zmm addressing is present self.assertTrue('zmm' not in v) @linux_only @TestCase.run_test_in_subprocess(envvars={'NUMBA_BOUNDSCHECK': '0'}) def test_unsigned_refusal_to_vectorize(self): """ This checks that if fastmath is set and the underlying hardware is suitable, and the function supplied is amenable to fastmath based vectorization, that the vectorizer actually runs. """ def will_not_vectorize(A): n = len(A) for i in range(-n, 0): A[i] = np.sqrt(A[i]) return A def will_vectorize(A): n = len(A) for i in range(n): A[i] = np.sqrt(A[i]) return A arg = np.zeros(10) # Boundschecking breaks vectorization self.assertFalse(config.BOUNDSCHECK) novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg, fastmath=True) vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg, fastmath=True) for v in novec_asm.values(): # vector variant should not be present self.assertTrue('vsqrtpd' not in v) # check scalar variant is present self.assertTrue('vsqrtsd' in v) # check no zmm addressing is present self.assertTrue('zmm' not in v) for v in vec_asm.values(): # should unwind and call vector sqrt then vector mov # all on packed doubles using zmm's self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v) self.assertTrue('vmovupd' in v) self.assertTrue('zmm' in v) @linux_only # needed as 32bit doesn't have equivalent signed/unsigned instruction # generation for this function @TestCase.run_test_in_subprocess(envvars={'NUMBA_BOUNDSCHECK': '0'}) def test_signed_vs_unsigned_vec_asm(self): """ This checks vectorization for signed vs unsigned variants of a trivial accumulator, the only meaningful difference should be the presence of signed vs. unsigned unpack instructions (for the induction var). """ def signed_variant(): n = 4096 A = 0. for i in range(-n, 0): A += i return A def unsigned_variant(): n = 4096 A = 0. for i in range(n): A += i return A # Boundschecking breaks the diff check below because of the pickled exception self.assertFalse(config.BOUNDSCHECK) signed_asm = self.get_gufunc_asm(signed_variant, 'signed', fastmath=True) unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned', fastmath=True) def strip_instrs(asm): acc = [] for x in asm.splitlines(): spd = x.strip() # filter out anything that isn't a trivial instruction # and anything with the gufunc id as it contains an address if spd != '' and not (spd.startswith('.') or spd.startswith('_') or spd.startswith('"') or '__numba_parfor_gufunc' in spd): acc.append(re.sub('[\t]', '', spd)) return acc for k, v in signed_asm.items(): signed_instr = strip_instrs(v) break for k, v in unsigned_asm.items(): unsigned_instr = strip_instrs(v) break from difflib import SequenceMatcher as sm # make sure that the only difference in instruction (if there is a # difference) is the char 'u'. For example: # vcvtsi2sdq vs. vcvtusi2sdq self.assertEqual(len(signed_instr), len(unsigned_instr)) for a, b in zip(signed_instr, unsigned_instr): if a == b: continue else: s = sm(lambda x: x == '\t', a, b) ops = s.get_opcodes() for op in ops: if op[0] == 'insert': self.assertEqual(b[op[-2]:op[-1]], 'u') @skip_parfors_unsupported class TestParforReductionSetNumThreads(TestCase): """Test execution correctness on reductions with set_num_threads. """ def test_add(self): N = config.NUMBA_NUM_THREADS M = 2 * N mask = N - 1 @njit(parallel=True) def udt(nthreads): acc = 0 set_num_threads(nthreads) for i in prange(M): local_mask = 1 + i % mask set_num_threads(local_mask) gnt = get_num_threads() acc += gnt return acc expect = udt.py_func(mask) got = udt(mask) self.assertPreciseEqual(expect, got) def test_mul(self): # This min will prevent larger thread counts from generating # overflow in the loop below. N = min(4, config.NUMBA_NUM_THREADS) M = 2 * N mask = N - 1 @njit(parallel=True) def udt(nthreads): acc = 1 set_num_threads(nthreads) for i in prange(M): local_mask = 1 + i % mask set_num_threads(local_mask) gnt = get_num_threads() acc *= gnt return acc expect = udt.py_func(mask) got = udt(mask) self.assertPreciseEqual(expect, got) def test_max(self): N = config.NUMBA_NUM_THREADS M = 2 * N mask = N - 1 @njit(parallel=True) def udt(nthreads): acc = 1 set_num_threads(nthreads) for i in prange(M): local_mask = 1 + i % mask set_num_threads(local_mask) gnt = get_num_threads() acc = max(acc, gnt) return acc expect = udt.py_func(mask) got = udt(mask) self.assertPreciseEqual(expect, got) @skip_parfors_unsupported class TestDiagnosticEnvVar(TestCase): @TestCase.run_test_in_subprocess() def test_diagnostics_env_var1(self): os.environ['NUMBA_PARALLEL_DIAGNOSTICS']='4' with captured_stdout() as stdout: @njit(parallel=True) def impl(): n = 100 b = np.zeros((n), dtype=np.float64) for i in prange(n): b[i] = 1 return b impl() the_output = stdout.getvalue() self.assertIn("Parallel Accelerator Optimizing", the_output) if __name__ == "__main__": unittest.main()