import math import os import platform import sys import re import numpy as np from numba import njit from numba.core import types from numba.core.runtime import ( rtsys, nrtopt, _nrt_python, nrt, ) from numba.core.extending import intrinsic, include_path from numba.core.typing import signature from numba.core.imputils import impl_ret_untracked from llvmlite import ir import llvmlite.binding as llvm from numba.core.unsafe.nrt import NRT_get_api from numba.tests.support import (EnableNRTStatsMixin, TestCase, temp_directory, import_dynamic, skip_if_32bit, skip_unless_cffi, run_in_subprocess) from numba.core.registry import cpu_target import unittest linux_only = unittest.skipIf(not sys.platform.startswith('linux'), 'linux only test') x86_only = unittest.skipIf(platform.machine() not in ('i386', 'x86_64'), 'x86 only test') class Dummy(object): alive = 0 def __init__(self): type(self).alive += 1 def __del__(self): type(self).alive -= 1 class TestNrtMemInfoNotInitialized(unittest.TestCase): """ Unit test for checking the use of the NRT fails if the initialization sequence has not been run. """ _numba_parallel_test_ = False def test_init_fail(self): methods = {'library': (), 'meminfo_new': ((), ()), 'meminfo_alloc': ((),), } for meth, args in methods.items(): try: with self.assertRaises(RuntimeError) as raises: rtsys._init = False fn = getattr(rtsys, meth) fn(*args) msg = "Runtime must be initialized before use." self.assertIn(msg, str(raises.exception)) finally: rtsys._init = True class TestNrtMemInfo(unittest.TestCase): """ Unit test for core MemInfo functionality """ def setUp(self): # Reset the Dummy class Dummy.alive = 0 # initialize the NRT (in case the tests are run in isolation) rtsys.initialize(cpu_target.target_context) super(TestNrtMemInfo, self).setUp() def test_meminfo_refct_1(self): d = Dummy() self.assertEqual(Dummy.alive, 1) addr = 0xdeadcafe # some made up location mi = rtsys.meminfo_new(addr, d) self.assertEqual(mi.refcount, 1) del d self.assertEqual(Dummy.alive, 1) mi.acquire() self.assertEqual(mi.refcount, 2) self.assertEqual(Dummy.alive, 1) mi.release() self.assertEqual(mi.refcount, 1) del mi self.assertEqual(Dummy.alive, 0) def test_meminfo_refct_2(self): d = Dummy() self.assertEqual(Dummy.alive, 1) addr = 0xdeadcafe # some made up location mi = rtsys.meminfo_new(addr, d) self.assertEqual(mi.refcount, 1) del d self.assertEqual(Dummy.alive, 1) for ct in range(100): mi.acquire() self.assertEqual(mi.refcount, 1 + 100) self.assertEqual(Dummy.alive, 1) for _ in range(100): mi.release() self.assertEqual(mi.refcount, 1) del mi self.assertEqual(Dummy.alive, 0) def test_fake_memoryview(self): d = Dummy() self.assertEqual(Dummy.alive, 1) addr = 0xdeadcafe # some made up location mi = rtsys.meminfo_new(addr, d) self.assertEqual(mi.refcount, 1) mview = memoryview(mi) self.assertEqual(mi.refcount, 1) self.assertEqual(addr, mi.data) self.assertFalse(mview.readonly) self.assertIs(mi, mview.obj) self.assertTrue(mview.c_contiguous) self.assertEqual(mview.itemsize, 1) self.assertEqual(mview.ndim, 1) del d del mi self.assertEqual(Dummy.alive, 1) del mview self.assertEqual(Dummy.alive, 0) def test_memoryview(self): from ctypes import c_uint32, c_void_p, POINTER, cast dtype = np.dtype(np.uint32) bytesize = dtype.itemsize * 10 mi = rtsys.meminfo_alloc(bytesize, safe=True) addr = mi.data c_arr = cast(c_void_p(mi.data), POINTER(c_uint32 * 10)) # Check 0xCB-filling for i in range(10): self.assertEqual(c_arr.contents[i], 0xcbcbcbcb) # Init array with ctypes for i in range(10): c_arr.contents[i] = i + 1 mview = memoryview(mi) self.assertEqual(mview.nbytes, bytesize) self.assertFalse(mview.readonly) self.assertIs(mi, mview.obj) self.assertTrue(mview.c_contiguous) self.assertEqual(mview.itemsize, 1) self.assertEqual(mview.ndim, 1) del mi arr = np.ndarray(dtype=dtype, shape=mview.nbytes // dtype.itemsize, buffer=mview) del mview # Modify array with NumPy np.testing.assert_equal(np.arange(arr.size) + 1, arr) arr += 1 # Check value reflected in ctypes for i in range(10): self.assertEqual(c_arr.contents[i], i + 2) self.assertEqual(arr.ctypes.data, addr) del arr # At this point the memory is zero filled # We can't check this deterministically because the memory could be # consumed by another thread. def test_buffer(self): from ctypes import c_uint32, c_void_p, POINTER, cast dtype = np.dtype(np.uint32) bytesize = dtype.itemsize * 10 mi = rtsys.meminfo_alloc(bytesize, safe=True) self.assertEqual(mi.refcount, 1) addr = mi.data c_arr = cast(c_void_p(addr), POINTER(c_uint32 * 10)) # Check 0xCB-filling for i in range(10): self.assertEqual(c_arr.contents[i], 0xcbcbcbcb) # Init array with ctypes for i in range(10): c_arr.contents[i] = i + 1 arr = np.ndarray(dtype=dtype, shape=bytesize // dtype.itemsize, buffer=mi) self.assertEqual(mi.refcount, 1) del mi # Modify array with NumPy np.testing.assert_equal(np.arange(arr.size) + 1, arr) arr += 1 # Check value reflected in ctypes for i in range(10): self.assertEqual(c_arr.contents[i], i + 2) self.assertEqual(arr.ctypes.data, addr) del arr # At this point the memory is zero filled # We can't check this deterministically because the memory could be # consumed by another thread. @skip_if_32bit def test_allocate_invalid_size(self): # Checks that attempting to allocate too big a region fails gracefully. size = types.size_t.maxval // 8 // 2 for pred in (True, False): with self.assertRaises(MemoryError) as raises: rtsys.meminfo_alloc(size, safe=pred) self.assertIn(f"Requested allocation of {size} bytes failed.", str(raises.exception)) def test_allocate_negative_size(self): # Checks that attempting to allocate negative number of bytes fails # gracefully. size = -10 for pred in (True, False): with self.assertRaises(ValueError) as raises: rtsys.meminfo_alloc(size, safe=pred) msg = f"Cannot allocate a negative number of bytes: {size}." self.assertIn(msg, str(raises.exception)) class TestTracemalloc(unittest.TestCase): """ Test NRT-allocated memory can be tracked by tracemalloc. """ def measure_memory_diff(self, func): try: import tracemalloc except ImportError: self.skipTest("tracemalloc not available") tracemalloc.start() try: before = tracemalloc.take_snapshot() # Keep the result and only delete it after taking a snapshot res = func() after = tracemalloc.take_snapshot() del res return after.compare_to(before, 'lineno') finally: tracemalloc.stop() def test_snapshot(self): N = 1000000 dtype = np.int8 @njit def alloc_nrt_memory(): """ Allocate and return a large array. """ return np.empty(N, dtype) def keep_memory(): return alloc_nrt_memory() def release_memory(): alloc_nrt_memory() alloc_lineno = keep_memory.__code__.co_firstlineno + 1 # Warmup JIT alloc_nrt_memory() # The large NRT-allocated array should appear topmost in the diff diff = self.measure_memory_diff(keep_memory) stat = diff[0] # There is a slight overhead, so the allocated size won't exactly be N self.assertGreaterEqual(stat.size, N) self.assertLess(stat.size, N * 1.015, msg=("Unexpected allocation overhead encountered. " "May be due to difference in CPython " "builds or running under coverage")) frame = stat.traceback[0] self.assertEqual(os.path.basename(frame.filename), "test_nrt.py") self.assertEqual(frame.lineno, alloc_lineno) # If NRT memory is released before taking a snapshot, it shouldn't # appear. diff = self.measure_memory_diff(release_memory) stat = diff[0] # Something else appears, but nothing the magnitude of N self.assertLess(stat.size, N * 0.01) class TestNRTIssue(TestCase): def test_issue_with_refct_op_pruning(self): """ GitHub Issue #1244 https://github.com/numba/numba/issues/1244 """ @njit def calculate_2D_vector_mag(vector): x, y = vector return math.sqrt(x ** 2 + y ** 2) @njit def normalize_2D_vector(vector): normalized_vector = np.empty(2, dtype=np.float64) mag = calculate_2D_vector_mag(vector) x, y = vector normalized_vector[0] = x / mag normalized_vector[1] = y / mag return normalized_vector @njit def normalize_vectors(num_vectors, vectors): normalized_vectors = np.empty((num_vectors, 2), dtype=np.float64) for i in range(num_vectors): vector = vectors[i] normalized_vector = normalize_2D_vector(vector) normalized_vectors[i, 0] = normalized_vector[0] normalized_vectors[i, 1] = normalized_vector[1] return normalized_vectors num_vectors = 10 test_vectors = np.random.random((num_vectors, 2)) got = normalize_vectors(num_vectors, test_vectors) expected = normalize_vectors.py_func(num_vectors, test_vectors) np.testing.assert_almost_equal(expected, got) def test_incref_after_cast(self): # Issue #1427: when casting a value before returning it, the # cast result should be incref'ed, not the original value. def f(): return 0.0, np.zeros(1, dtype=np.int32) # Note the return type isn't the same as the tuple type above: # the first element is a complex rather than a float. cfunc = njit((types.Tuple((types.complex128, types.Array(types.int32, 1, 'C') )))())(f) z, arr = cfunc() self.assertPreciseEqual(z, 0j) self.assertPreciseEqual(arr, np.zeros(1, dtype=np.int32)) def test_refct_pruning_issue_1511(self): @njit def f(): a = np.ones(10, dtype=np.float64) b = np.ones(10, dtype=np.float64) return a, b[:] a, b = f() np.testing.assert_equal(a, b) np.testing.assert_equal(a, np.ones(10, dtype=np.float64)) def test_refct_pruning_issue_1526(self): @njit def udt(image, x, y): next_loc = np.where(image == 1) if len(next_loc[0]) == 0: y_offset = 1 x_offset = 1 else: y_offset = next_loc[0][0] x_offset = next_loc[1][0] next_loc_x = (x - 1) + x_offset next_loc_y = (y - 1) + y_offset return next_loc_x, next_loc_y a = np.array([[1, 0, 1, 0, 1, 0, 0, 1, 0, 0]]) expect = udt.py_func(a, 1, 6) got = udt(a, 1, 6) self.assertEqual(expect, got) @TestCase.run_test_in_subprocess def test_no_nrt_on_njit_decoration(self): # Checks that the NRT is not initialized/compiled as a result of # decorating a function with `@njit`. from numba import njit # check the NRT is not initialized. self.assertFalse(rtsys._init) # decorate @njit def foo(): return 123 # check the NRT is still not initialized self.assertFalse(rtsys._init) # execute self.assertEqual(foo(), foo.py_func()) # check the NRT is still now initialized as execution has definitely # occurred. self.assertTrue(rtsys._init) class TestRefCtPruning(unittest.TestCase): sample_llvm_ir = ''' define i32 @"MyFunction"(i8** noalias nocapture %retptr, { i8*, i32 }** noalias nocapture %excinfo, i8* noalias nocapture readnone %env, double %arg.vt.0, double %arg.vt.1, double %arg.vt.2, double %arg.vt.3, double %arg.bounds.0, double %arg.bounds.1, double %arg.bounds.2, double %arg.bounds.3, i8* %arg.xs.0, i8* nocapture readnone %arg.xs.1, i64 %arg.xs.2, i64 %arg.xs.3, double* nocapture readonly %arg.xs.4, i64 %arg.xs.5.0, i64 %arg.xs.6.0, i8* %arg.ys.0, i8* nocapture readnone %arg.ys.1, i64 %arg.ys.2, i64 %arg.ys.3, double* nocapture readonly %arg.ys.4, i64 %arg.ys.5.0, i64 %arg.ys.6.0, i8* %arg.aggs_and_cols.0.0, i8* nocapture readnone %arg.aggs_and_cols.0.1, i64 %arg.aggs_and_cols.0.2, i64 %arg.aggs_and_cols.0.3, i32* nocapture %arg.aggs_and_cols.0.4, i64 %arg.aggs_and_cols.0.5.0, i64 %arg.aggs_and_cols.0.5.1, i64 %arg.aggs_and_cols.0.6.0, i64 %arg.aggs_and_cols.0.6.1) local_unnamed_addr { entry: tail call void @NRT_incref(i8* %arg.xs.0) tail call void @NRT_incref(i8* %arg.ys.0) tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0) %.251 = icmp sgt i64 %arg.xs.5.0, 0 br i1 %.251, label %B42.preheader, label %B160 B42.preheader: ; preds = %entry %0 = add i64 %arg.xs.5.0, 1 br label %B42 B42: ; preds = %B40.backedge, %B42.preheader %lsr.iv3 = phi i64 [ %lsr.iv.next, %B40.backedge ], [ %0, %B42.preheader ] %lsr.iv1 = phi double* [ %scevgep2, %B40.backedge ], [ %arg.xs.4, %B42.preheader ] %lsr.iv = phi double* [ %scevgep, %B40.backedge ], [ %arg.ys.4, %B42.preheader ] %.381 = load double, double* %lsr.iv1, align 8 %.420 = load double, double* %lsr.iv, align 8 %.458 = fcmp ole double %.381, %arg.bounds.1 %not..432 = fcmp oge double %.381, %arg.bounds.0 %"$phi82.1.1" = and i1 %.458, %not..432 br i1 %"$phi82.1.1", label %B84, label %B40.backedge B84: ; preds = %B42 %.513 = fcmp ole double %.420, %arg.bounds.3 %not..487 = fcmp oge double %.420, %arg.bounds.2 %"$phi106.1.1" = and i1 %.513, %not..487 br i1 %"$phi106.1.1", label %B108.endif.endif.endif, label %B40.backedge B160: ; preds = %B40.backedge, %entry tail call void @NRT_decref(i8* %arg.ys.0) tail call void @NRT_decref(i8* %arg.xs.0) tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0) store i8* null, i8** %retptr, align 8 ret i32 0 B108.endif.endif.endif: ; preds = %B84 %.575 = fmul double %.381, %arg.vt.0 %.583 = fadd double %.575, %arg.vt.1 %.590 = fptosi double %.583 to i64 %.630 = fmul double %.420, %arg.vt.2 %.638 = fadd double %.630, %arg.vt.3 %.645 = fptosi double %.638 to i64 tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0) ; GONE 1 tail call void @NRT_decref(i8* null) ; GONE 2 tail call void @NRT_incref(i8* %arg.aggs_and_cols.0.0), !noalias !0 ; GONE 3 %.62.i.i = icmp slt i64 %.645, 0 %.63.i.i = select i1 %.62.i.i, i64 %arg.aggs_and_cols.0.5.0, i64 0 %.64.i.i = add i64 %.63.i.i, %.645 %.65.i.i = icmp slt i64 %.590, 0 %.66.i.i = select i1 %.65.i.i, i64 %arg.aggs_and_cols.0.5.1, i64 0 %.67.i.i = add i64 %.66.i.i, %.590 %.84.i.i = mul i64 %.64.i.i, %arg.aggs_and_cols.0.5.1 %.87.i.i = add i64 %.67.i.i, %.84.i.i %.88.i.i = getelementptr i32, i32* %arg.aggs_and_cols.0.4, i64 %.87.i.i %.89.i.i = load i32, i32* %.88.i.i, align 4, !noalias !3 %.99.i.i = add i32 %.89.i.i, 1 store i32 %.99.i.i, i32* %.88.i.i, align 4, !noalias !3 tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0), !noalias !0 ; GONE 4 tail call void @NRT_decref(i8* %arg.aggs_and_cols.0.0) ; GONE 5 br label %B40.backedge B40.backedge: ; preds = %B108.endif.endif.endif, %B84, %B42 %scevgep = getelementptr double, double* %lsr.iv, i64 1 %scevgep2 = getelementptr double, double* %lsr.iv1, i64 1 %lsr.iv.next = add i64 %lsr.iv3, -1 %.294 = icmp sgt i64 %lsr.iv.next, 1 br i1 %.294, label %B42, label %B160 } ''' # noqa def test_refct_pruning_op_recognize(self): input_ir = self.sample_llvm_ir input_lines = list(input_ir.splitlines()) before_increfs = [ln for ln in input_lines if 'NRT_incref' in ln] before_decrefs = [ln for ln in input_lines if 'NRT_decref' in ln] # prune output_ir = nrtopt._remove_redundant_nrt_refct(input_ir) output_lines = list(output_ir.splitlines()) after_increfs = [ln for ln in output_lines if 'NRT_incref' in ln] after_decrefs = [ln for ln in output_lines if 'NRT_decref' in ln] # check self.assertNotEqual(before_increfs, after_increfs) self.assertNotEqual(before_decrefs, after_decrefs) pruned_increfs = set(before_increfs) - set(after_increfs) pruned_decrefs = set(before_decrefs) - set(after_decrefs) # the symm difference == or-combined combined = pruned_increfs | pruned_decrefs self.assertEqual(combined, pruned_increfs ^ pruned_decrefs) pruned_lines = '\n'.join(combined) # all GONE lines are pruned for i in [1, 2, 3, 4, 5]: gone = '; GONE {}'.format(i) self.assertIn(gone, pruned_lines) # no other lines self.assertEqual(len(list(pruned_lines.splitlines())), len(combined)) @unittest.skip("Pass removed as it was buggy. Re-enable when fixed.") def test_refct_pruning_with_branches(self): '''testcase from #2350''' @njit def _append_non_na(x, y, agg, field): if not np.isnan(field): agg[y, x] += 1 @njit def _append(x, y, agg, field): if not np.isnan(field): if np.isnan(agg[y, x]): agg[y, x] = field else: agg[y, x] += field @njit def append(x, y, agg, field): _append_non_na(x, y, agg, field) _append(x, y, agg, field) # Disable python wrapper to avoid detecting necessary # refcount inside it @njit(no_cpython_wrapper=True) def extend(arr, field): for i in range(arr.shape[0]): for j in range(arr.shape[1]): append(j, i, arr, field) # Compile extend.compile("(f4[:,::1], f4)") # Test there are no reference count operations llvmir = str(extend.inspect_llvm(extend.signatures[0])) refops = list(re.finditer(r'(NRT_incref|NRT_decref)\([^\)]+\)', llvmir)) self.assertEqual(len(refops), 0) @linux_only @x86_only def test_inline_asm(self): """The InlineAsm class from llvmlite.ir has no 'name' attr the refcount pruning pass should be tolerant to this""" llvm.initialize() llvm.initialize_native_target() llvm.initialize_native_asmprinter() llvm.initialize_native_asmparser() @intrinsic def bar(tyctx, x, y): def codegen(cgctx, builder, sig, args): (arg_0, arg_1) = args fty = ir.FunctionType(ir.IntType(32), [ir.IntType(32), ir.IntType(32)]) mul = builder.asm(fty, "mov $2, $0; imul $1, $0", "=&r,r,r", (arg_0, arg_1), name="asm_mul", side_effect=False) return impl_ret_untracked(cgctx, builder, sig.return_type, mul) return signature(types.int32, types.int32, types.int32), codegen @njit(['int32(int32)']) def foo(x): x += 1 z = bar(x, 2) return z self.assertEqual(foo(10), 22) # expect (10 + 1) * 2 = 22 @skip_unless_cffi class TestNrtExternalCFFI(EnableNRTStatsMixin, TestCase): """Testing the use of externally compiled C code that use NRT """ def setUp(self): # initialize the NRT (in case the tests are run in isolation) cpu_target.target_context super(TestNrtExternalCFFI, self).setUp() def compile_cffi_module(self, name, source, cdef): from cffi import FFI ffi = FFI() ffi.set_source(name, source, include_dirs=[include_path()]) ffi.cdef(cdef) tmpdir = temp_directory("cffi_test_{}".format(name)) ffi.compile(tmpdir=tmpdir) sys.path.append(tmpdir) try: mod = import_dynamic(name) finally: sys.path.remove(tmpdir) return ffi, mod def get_nrt_api_table(self): from cffi import FFI ffi = FFI() nrt_get_api = ffi.cast("void* (*)()", _nrt_python.c_helpers['get_api']) table = nrt_get_api() return table def test_manage_memory(self): name = "{}_test_manage_memory".format(self.__class__.__name__) source = r""" #include #include "numba/core/runtime/nrt_external.h" int status = 0; void my_dtor(void *ptr) { free(ptr); status = 0xdead; } NRT_MemInfo* test_nrt_api(NRT_api_functions *nrt) { void * data = malloc(10); NRT_MemInfo *mi = nrt->manage_memory(data, my_dtor); nrt->acquire(mi); nrt->release(mi); status = 0xa110c; return mi; } """ cdef = """ void* test_nrt_api(void *nrt); extern int status; """ ffi, mod = self.compile_cffi_module(name, source, cdef) # Init status is 0 self.assertEqual(mod.lib.status, 0) table = self.get_nrt_api_table() out = mod.lib.test_nrt_api(table) # status is now 0xa110c self.assertEqual(mod.lib.status, 0xa110c) mi_addr = int(ffi.cast("size_t", out)) mi = nrt.MemInfo(mi_addr) self.assertEqual(mi.refcount, 1) del mi # force deallocation on mi # status is now 0xdead self.assertEqual(mod.lib.status, 0xdead) def test_allocate(self): name = "{}_test_allocate".format(self.__class__.__name__) source = r""" #include #include "numba/core/runtime/nrt_external.h" NRT_MemInfo* test_nrt_api(NRT_api_functions *nrt, size_t n) { size_t *data = NULL; NRT_MemInfo *mi = nrt->allocate(n); data = nrt->get_data(mi); data[0] = 0xded; data[1] = 0xabc; data[2] = 0xdef; return mi; } """ cdef = "void* test_nrt_api(void *nrt, size_t n);" ffi, mod = self.compile_cffi_module(name, source, cdef) table = self.get_nrt_api_table() numbytes = 3 * np.dtype(np.intp).itemsize out = mod.lib.test_nrt_api(table, numbytes) mi_addr = int(ffi.cast("size_t", out)) mi = nrt.MemInfo(mi_addr) self.assertEqual(mi.refcount, 1) buffer = ffi.buffer(ffi.cast("char [{}]".format(numbytes), mi.data)) arr = np.ndarray(shape=(3,), dtype=np.intp, buffer=buffer) np.testing.assert_equal(arr, [0xded, 0xabc, 0xdef]) def test_get_api(self): from cffi import FFI @njit def test_nrt_api(): return NRT_get_api() ffi = FFI() expect = int(ffi.cast('size_t', self.get_nrt_api_table())) got = test_nrt_api() self.assertEqual(expect, got) class TestNrtStatistics(TestCase): def setUp(self): # Store the current stats state self.__stats_state = _nrt_python.memsys_stats_enabled() def tearDown(self): # Set stats state back to whatever it was before the test ran if self.__stats_state: _nrt_python.memsys_enable_stats() else: _nrt_python.memsys_disable_stats() def test_stats_env_var_explicit_on(self): # Checks that explicitly turning the stats on via the env var works. src = """if 1: from numba import njit import numpy as np from numba.core.runtime import rtsys, _nrt_python from numba.core.registry import cpu_target @njit def foo(): return np.arange(10)[0] # initialize the NRT before use rtsys.initialize(cpu_target.target_context) assert _nrt_python.memsys_stats_enabled() orig_stats = rtsys.get_allocation_stats() foo() new_stats = rtsys.get_allocation_stats() total_alloc = new_stats.alloc - orig_stats.alloc total_free = new_stats.free - orig_stats.free total_mi_alloc = new_stats.mi_alloc - orig_stats.mi_alloc total_mi_free = new_stats.mi_free - orig_stats.mi_free expected = 1 assert total_alloc == expected assert total_free == expected assert total_mi_alloc == expected assert total_mi_free == expected """ # Check env var explicitly being set works env = os.environ.copy() env['NUMBA_NRT_STATS'] = "1" run_in_subprocess(src, env=env) def check_env_var_off(self, env): src = """if 1: from numba import njit import numpy as np from numba.core.runtime import rtsys, _nrt_python @njit def foo(): return np.arange(10)[0] assert _nrt_python.memsys_stats_enabled() == False try: rtsys.get_allocation_stats() except RuntimeError as e: assert "NRT stats are disabled." in str(e) """ run_in_subprocess(src, env=env) def test_stats_env_var_explicit_off(self): # Checks that explicitly turning the stats off via the env var works. env = os.environ.copy() env['NUMBA_NRT_STATS'] = "0" self.check_env_var_off(env) def test_stats_env_var_default_off(self): # Checks that the env var not being set is the same as "off", i.e. # default for Numba is off. env = os.environ.copy() env.pop('NUMBA_NRT_STATS', None) self.check_env_var_off(env) def test_stats_status_toggle(self): @njit def foo(): tmp = np.ones(3) return np.arange(5 * tmp[0]) # Switch on stats _nrt_python.memsys_enable_stats() # check the stats are on self.assertTrue(_nrt_python.memsys_stats_enabled()) for i in range(2): # capture the stats state stats_1 = rtsys.get_allocation_stats() # Switch off stats _nrt_python.memsys_disable_stats() # check the stats are off self.assertFalse(_nrt_python.memsys_stats_enabled()) # run something that would move the counters were they enabled foo() # Switch on stats _nrt_python.memsys_enable_stats() # check the stats are on self.assertTrue(_nrt_python.memsys_stats_enabled()) # capture the stats state (should not have changed) stats_2 = rtsys.get_allocation_stats() # run something that will move the counters foo() # capture the stats state (should have changed) stats_3 = rtsys.get_allocation_stats() # check stats_1 == stats_2 self.assertEqual(stats_1, stats_2) # check stats_2 < stats_3 self.assertLess(stats_2, stats_3) def test_rtsys_stats_query_raises_exception_when_disabled(self): # Checks that the standard rtsys.get_allocation_stats() query raises # when stats counters are turned off. _nrt_python.memsys_disable_stats() self.assertFalse(_nrt_python.memsys_stats_enabled()) with self.assertRaises(RuntimeError) as raises: rtsys.get_allocation_stats() self.assertIn("NRT stats are disabled.", str(raises.exception)) def test_nrt_explicit_stats_query_raises_exception_when_disabled(self): # Checks the various memsys_get_stats functions raise if queried when # the stats counters are disabled. method_variations = ('alloc', 'free', 'mi_alloc', 'mi_free') for meth in method_variations: stats_func = getattr(_nrt_python, f'memsys_get_stats_{meth}') with self.subTest(stats_func=stats_func): # Turn stats off _nrt_python.memsys_disable_stats() self.assertFalse(_nrt_python.memsys_stats_enabled()) with self.assertRaises(RuntimeError) as raises: stats_func() self.assertIn("NRT stats are disabled.", str(raises.exception)) if __name__ == '__main__': unittest.main()