import numpy as np from numba.core.utils import PYVERSION from numba.cuda.testing import skip_on_cudasim, CUDATestCase from numba.tests.support import (override_config, captured_stderr, captured_stdout) from numba import cuda, float64 import unittest def simple_cuda(A, B): i = cuda.grid(1) B[i] = A[i] + 1.5 @skip_on_cudasim('Simulator does not produce debug dumps') class TestDebugOutput(CUDATestCase): def compile_simple_cuda(self): with captured_stderr() as err: with captured_stdout() as out: cfunc = cuda.jit((float64[:], float64[:]))(simple_cuda) # Call compiled function (to ensure PTX is generated) # and sanity-check results. A = np.linspace(0, 1, 10).astype(np.float64) B = np.zeros_like(A) cfunc[1, 10](A, B) self.assertTrue(np.allclose(A + 1.5, B)) # stderr shouldn't be affected by debug output self.assertFalse(err.getvalue()) return out.getvalue() def assert_fails(self, *args, **kwargs): self.assertRaises(AssertionError, *args, **kwargs) def check_debug_output(self, out, enabled_dumps): all_dumps = dict.fromkeys(['bytecode', 'cfg', 'ir', 'llvm', 'assembly'], False) for name in enabled_dumps: assert name in all_dumps all_dumps[name] = True for name, enabled in sorted(all_dumps.items()): check_meth = getattr(self, '_check_dump_%s' % name) if enabled: check_meth(out) else: self.assertRaises(AssertionError, check_meth, out) def _check_dump_bytecode(self, out): if PYVERSION in ((3, 11), (3, 12)): # binop with arg=0 is binary add, see CPython dis.py and opcode.py self.assertIn('BINARY_OP(arg=0', out) elif PYVERSION in ((3, 9), (3, 10)): self.assertIn('BINARY_ADD', out) else: raise NotImplementedError(PYVERSION) def _check_dump_cfg(self, out): self.assertIn('CFG dominators', out) def _check_dump_ir(self, out): self.assertIn('--IR DUMP: simple_cuda--', out) self.assertIn('const(float, 1.5)', out) def _check_dump_llvm(self, out): self.assertIn('--LLVM DUMP', out) self.assertIn('!"kernel", i32 1', out) def _check_dump_assembly(self, out): self.assertIn('--ASSEMBLY simple_cuda', out) self.assertIn('Generated by NVIDIA NVVM Compiler', out) def test_dump_bytecode(self): with override_config('DUMP_BYTECODE', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['bytecode']) def test_dump_ir(self): with override_config('DUMP_IR', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['ir']) def test_dump_cfg(self): with override_config('DUMP_CFG', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['cfg']) def test_dump_llvm(self): with override_config('DUMP_LLVM', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['llvm']) def test_dump_assembly(self): with override_config('DUMP_ASSEMBLY', True): out = self.compile_simple_cuda() self.check_debug_output(out, ['assembly']) if __name__ == '__main__': unittest.main()