102 lines
3.5 KiB
Python
102 lines
3.5 KiB
Python
import numpy as np
|
|
|
|
from numba.core.utils import PYVERSION
|
|
from numba.cuda.testing import skip_on_cudasim, CUDATestCase
|
|
from numba.tests.support import (override_config, captured_stderr,
|
|
captured_stdout)
|
|
from numba import cuda, float64
|
|
import unittest
|
|
|
|
|
|
def simple_cuda(A, B):
|
|
i = cuda.grid(1)
|
|
B[i] = A[i] + 1.5
|
|
|
|
|
|
@skip_on_cudasim('Simulator does not produce debug dumps')
|
|
class TestDebugOutput(CUDATestCase):
|
|
|
|
def compile_simple_cuda(self):
|
|
with captured_stderr() as err:
|
|
with captured_stdout() as out:
|
|
cfunc = cuda.jit((float64[:], float64[:]))(simple_cuda)
|
|
# Call compiled function (to ensure PTX is generated)
|
|
# and sanity-check results.
|
|
A = np.linspace(0, 1, 10).astype(np.float64)
|
|
B = np.zeros_like(A)
|
|
cfunc[1, 10](A, B)
|
|
self.assertTrue(np.allclose(A + 1.5, B))
|
|
# stderr shouldn't be affected by debug output
|
|
self.assertFalse(err.getvalue())
|
|
return out.getvalue()
|
|
|
|
def assert_fails(self, *args, **kwargs):
|
|
self.assertRaises(AssertionError, *args, **kwargs)
|
|
|
|
def check_debug_output(self, out, enabled_dumps):
|
|
all_dumps = dict.fromkeys(['bytecode', 'cfg', 'ir', 'llvm',
|
|
'assembly'],
|
|
False)
|
|
for name in enabled_dumps:
|
|
assert name in all_dumps
|
|
all_dumps[name] = True
|
|
for name, enabled in sorted(all_dumps.items()):
|
|
check_meth = getattr(self, '_check_dump_%s' % name)
|
|
if enabled:
|
|
check_meth(out)
|
|
else:
|
|
self.assertRaises(AssertionError, check_meth, out)
|
|
|
|
def _check_dump_bytecode(self, out):
|
|
if PYVERSION in ((3, 11), (3, 12)):
|
|
# binop with arg=0 is binary add, see CPython dis.py and opcode.py
|
|
self.assertIn('BINARY_OP(arg=0', out)
|
|
elif PYVERSION in ((3, 9), (3, 10)):
|
|
self.assertIn('BINARY_ADD', out)
|
|
else:
|
|
raise NotImplementedError(PYVERSION)
|
|
|
|
def _check_dump_cfg(self, out):
|
|
self.assertIn('CFG dominators', out)
|
|
|
|
def _check_dump_ir(self, out):
|
|
self.assertIn('--IR DUMP: simple_cuda--', out)
|
|
self.assertIn('const(float, 1.5)', out)
|
|
|
|
def _check_dump_llvm(self, out):
|
|
self.assertIn('--LLVM DUMP', out)
|
|
self.assertIn('!"kernel", i32 1', out)
|
|
|
|
def _check_dump_assembly(self, out):
|
|
self.assertIn('--ASSEMBLY simple_cuda', out)
|
|
self.assertIn('Generated by NVIDIA NVVM Compiler', out)
|
|
|
|
def test_dump_bytecode(self):
|
|
with override_config('DUMP_BYTECODE', True):
|
|
out = self.compile_simple_cuda()
|
|
self.check_debug_output(out, ['bytecode'])
|
|
|
|
def test_dump_ir(self):
|
|
with override_config('DUMP_IR', True):
|
|
out = self.compile_simple_cuda()
|
|
self.check_debug_output(out, ['ir'])
|
|
|
|
def test_dump_cfg(self):
|
|
with override_config('DUMP_CFG', True):
|
|
out = self.compile_simple_cuda()
|
|
self.check_debug_output(out, ['cfg'])
|
|
|
|
def test_dump_llvm(self):
|
|
with override_config('DUMP_LLVM', True):
|
|
out = self.compile_simple_cuda()
|
|
self.check_debug_output(out, ['llvm'])
|
|
|
|
def test_dump_assembly(self):
|
|
with override_config('DUMP_ASSEMBLY', True):
|
|
out = self.compile_simple_cuda()
|
|
self.check_debug_output(out, ['assembly'])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|