212 lines
7.3 KiB
Python
212 lines
7.3 KiB
Python
|
from numba.tests.support import override_config
|
||
|
from numba.cuda.testing import skip_on_cudasim
|
||
|
from numba import cuda
|
||
|
from numba.core import types
|
||
|
from numba.cuda.testing import CUDATestCase
|
||
|
import itertools
|
||
|
import re
|
||
|
import unittest
|
||
|
|
||
|
|
||
|
@skip_on_cudasim('Simulator does not produce debug dumps')
|
||
|
class TestCudaDebugInfo(CUDATestCase):
|
||
|
"""
|
||
|
These tests only checks the compiled PTX for debuginfo section
|
||
|
"""
|
||
|
def _getasm(self, fn, sig):
|
||
|
fn.compile(sig)
|
||
|
return fn.inspect_asm(sig)
|
||
|
|
||
|
def _check(self, fn, sig, expect):
|
||
|
asm = self._getasm(fn, sig=sig)
|
||
|
re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
|
||
|
match = re_section_dbginfo.search(asm)
|
||
|
assertfn = self.assertIsNotNone if expect else self.assertIsNone
|
||
|
assertfn(match, msg=asm)
|
||
|
|
||
|
def test_no_debuginfo_in_asm(self):
|
||
|
@cuda.jit(debug=False)
|
||
|
def foo(x):
|
||
|
x[0] = 1
|
||
|
|
||
|
self._check(foo, sig=(types.int32[:],), expect=False)
|
||
|
|
||
|
def test_debuginfo_in_asm(self):
|
||
|
@cuda.jit(debug=True, opt=False)
|
||
|
def foo(x):
|
||
|
x[0] = 1
|
||
|
|
||
|
self._check(foo, sig=(types.int32[:],), expect=True)
|
||
|
|
||
|
def test_environment_override(self):
|
||
|
with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
|
||
|
# Using default value
|
||
|
@cuda.jit(opt=False)
|
||
|
def foo(x):
|
||
|
x[0] = 1
|
||
|
|
||
|
self._check(foo, sig=(types.int32[:],), expect=True)
|
||
|
|
||
|
# User override default value
|
||
|
@cuda.jit(debug=False)
|
||
|
def bar(x):
|
||
|
x[0] = 1
|
||
|
|
||
|
self._check(bar, sig=(types.int32[:],), expect=False)
|
||
|
|
||
|
def test_issue_5835(self):
|
||
|
# Invalid debug metadata would segfault NVVM when any function was
|
||
|
# compiled with debug turned on and optimization off. This eager
|
||
|
# compilation should not crash anything.
|
||
|
@cuda.jit((types.int32[::1],), debug=True, opt=False)
|
||
|
def f(x):
|
||
|
x[0] = 0
|
||
|
|
||
|
def test_wrapper_has_debuginfo(self):
|
||
|
sig = (types.int32[::1],)
|
||
|
|
||
|
@cuda.jit(sig, debug=True, opt=0)
|
||
|
def f(x):
|
||
|
x[0] = 1
|
||
|
|
||
|
llvm_ir = f.inspect_llvm(sig)
|
||
|
|
||
|
defines = [line for line in llvm_ir.splitlines()
|
||
|
if 'define void @"_ZN6cudapy' in line]
|
||
|
|
||
|
# Make sure we only found one definition
|
||
|
self.assertEqual(len(defines), 1)
|
||
|
|
||
|
wrapper_define = defines[0]
|
||
|
self.assertIn('!dbg', wrapper_define)
|
||
|
|
||
|
def test_debug_function_calls_internal_impl(self):
|
||
|
# Calling a function in a module generated from an implementation
|
||
|
# internal to Numba requires multiple modules to be compiled with NVVM -
|
||
|
# the internal implementation, and the caller. This example uses two
|
||
|
# modules because the `in (2, 3)` is implemented with:
|
||
|
#
|
||
|
# numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
|
||
|
# UniTuple<long long, 2>,
|
||
|
# int
|
||
|
# )
|
||
|
#
|
||
|
# This is condensed from this reproducer in Issue 5311:
|
||
|
# https://github.com/numba/numba/issues/5311#issuecomment-674206587
|
||
|
|
||
|
@cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
|
||
|
def f(inp, outp):
|
||
|
outp[0] = 1 if inp[0] in (2, 3) else 3
|
||
|
|
||
|
def test_debug_function_calls_device_function(self):
|
||
|
# Calling a device function requires compilation of multiple modules
|
||
|
# with NVVM - one for the caller and one for the callee. This checks
|
||
|
# that we don't cause an NVVM error in this case.
|
||
|
|
||
|
@cuda.jit(device=True, debug=True, opt=0)
|
||
|
def threadid():
|
||
|
return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
|
||
|
|
||
|
@cuda.jit((types.int32[:],), debug=True, opt=0)
|
||
|
def kernel(arr):
|
||
|
i = cuda.grid(1)
|
||
|
if i < len(arr):
|
||
|
arr[i] = threadid()
|
||
|
|
||
|
def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
|
||
|
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
||
|
def f2(x):
|
||
|
return x + 1
|
||
|
|
||
|
@cuda.jit(device=True, debug=f1_debug, opt=False)
|
||
|
def f1(x, y):
|
||
|
return x - f2(y)
|
||
|
|
||
|
@cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
|
||
|
def kernel(x, y):
|
||
|
f1(x, y)
|
||
|
|
||
|
kernel[1, 1](1, 2)
|
||
|
|
||
|
def test_chained_device_function(self):
|
||
|
# Calling a device function that calls another device function from a
|
||
|
# kernel with should succeed regardless of which jit decorators have
|
||
|
# debug=True. See Issue #7159.
|
||
|
|
||
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
||
|
|
||
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
||
|
with self.subTest(kernel_debug=kernel_debug,
|
||
|
f1_debug=f1_debug,
|
||
|
f2_debug=f2_debug):
|
||
|
self._test_chained_device_function(kernel_debug,
|
||
|
f1_debug,
|
||
|
f2_debug)
|
||
|
|
||
|
def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,
|
||
|
f2_debug):
|
||
|
|
||
|
@cuda.jit(device=True, debug=f2_debug, opt=False)
|
||
|
def f2(x):
|
||
|
return x + 1
|
||
|
|
||
|
@cuda.jit(device=True, debug=f1_debug, opt=False)
|
||
|
def f1(x, y):
|
||
|
return x - f2(y)
|
||
|
|
||
|
@cuda.jit(debug=kernel_debug, opt=False)
|
||
|
def kernel(x, y):
|
||
|
f1(x, y)
|
||
|
f2(x)
|
||
|
|
||
|
kernel[1, 1](1, 2)
|
||
|
|
||
|
def test_chained_device_function_two_calls(self):
|
||
|
# Calling a device function that calls a leaf device function from a
|
||
|
# kernel, and calling the leaf device function from the kernel should
|
||
|
# succeed, regardless of which jit decorators have debug=True. See
|
||
|
# Issue #7159.
|
||
|
|
||
|
debug_opts = itertools.product(*[(True, False)] * 3)
|
||
|
|
||
|
for kernel_debug, f1_debug, f2_debug in debug_opts:
|
||
|
with self.subTest(kernel_debug=kernel_debug,
|
||
|
f1_debug=f1_debug,
|
||
|
f2_debug=f2_debug):
|
||
|
self._test_chained_device_function_two_calls(kernel_debug,
|
||
|
f1_debug,
|
||
|
f2_debug)
|
||
|
|
||
|
def test_chained_device_three_functions(self):
|
||
|
# Like test_chained_device_function, but with enough functions (three)
|
||
|
# to ensure that the recursion visits all the way down the call tree
|
||
|
# when fixing linkage of functions for debug.
|
||
|
def three_device_fns(kernel_debug, leaf_debug):
|
||
|
@cuda.jit(device=True, debug=leaf_debug, opt=False)
|
||
|
def f3(x):
|
||
|
return x * x
|
||
|
|
||
|
@cuda.jit(device=True)
|
||
|
def f2(x):
|
||
|
return f3(x) + 1
|
||
|
|
||
|
@cuda.jit(device=True)
|
||
|
def f1(x, y):
|
||
|
return x - f2(y)
|
||
|
|
||
|
@cuda.jit(debug=kernel_debug, opt=False)
|
||
|
def kernel(x, y):
|
||
|
f1(x, y)
|
||
|
|
||
|
kernel[1, 1](1, 2)
|
||
|
|
||
|
# Check when debug on the kernel, on the leaf, and not on any function.
|
||
|
three_device_fns(kernel_debug=True, leaf_debug=True)
|
||
|
three_device_fns(kernel_debug=True, leaf_debug=False)
|
||
|
three_device_fns(kernel_debug=False, leaf_debug=True)
|
||
|
three_device_fns(kernel_debug=False, leaf_debug=False)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|