ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudapy/test_debuginfo.py

from numba.tests.support import override_config
from numba.cuda.testing import skip_on_cudasim
from numba import cuda
from numba.core import types
from numba.cuda.testing import CUDATestCase
import itertools
import re
import unittest


@skip_on_cudasim('Simulator does not produce debug dumps')
class TestCudaDebugInfo(CUDATestCase):
    """
    These tests only checks the compiled PTX for debuginfo section
    """
    def _getasm(self, fn, sig):
        fn.compile(sig)
        return fn.inspect_asm(sig)

    def _check(self, fn, sig, expect):
        asm = self._getasm(fn, sig=sig)
        re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")
        match = re_section_dbginfo.search(asm)
        assertfn = self.assertIsNotNone if expect else self.assertIsNone
        assertfn(match, msg=asm)

    def test_no_debuginfo_in_asm(self):
        @cuda.jit(debug=False)
        def foo(x):
            x[0] = 1

        self._check(foo, sig=(types.int32[:],), expect=False)

    def test_debuginfo_in_asm(self):
        @cuda.jit(debug=True, opt=False)
        def foo(x):
            x[0] = 1

        self._check(foo, sig=(types.int32[:],), expect=True)

    def test_environment_override(self):
        with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
            # Using default value
            @cuda.jit(opt=False)
            def foo(x):
                x[0] = 1

            self._check(foo, sig=(types.int32[:],), expect=True)

            # User override default value
            @cuda.jit(debug=False)
            def bar(x):
                x[0] = 1

            self._check(bar, sig=(types.int32[:],), expect=False)

    def test_issue_5835(self):
        # Invalid debug metadata would segfault NVVM when any function was
        # compiled with debug turned on and optimization off. This eager
        # compilation should not crash anything.
        @cuda.jit((types.int32[::1],), debug=True, opt=False)
        def f(x):
            x[0] = 0

    def test_wrapper_has_debuginfo(self):
        sig = (types.int32[::1],)

        @cuda.jit(sig, debug=True, opt=0)
        def f(x):
            x[0] = 1

        llvm_ir = f.inspect_llvm(sig)

        defines = [line for line in llvm_ir.splitlines()
                   if 'define void @"_ZN6cudapy' in line]

        # Make sure we only found one definition
        self.assertEqual(len(defines), 1)

        wrapper_define = defines[0]
        self.assertIn('!dbg', wrapper_define)

    def test_debug_function_calls_internal_impl(self):
        # Calling a function in a module generated from an implementation
        # internal to Numba requires multiple modules to be compiled with NVVM -
        # the internal implementation, and the caller. This example uses two
        # modules because the `in (2, 3)` is implemented with:
        #
        # numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(
        #     UniTuple<long long, 2>,
        #     int
        # )
        #
        # This is condensed from this reproducer in Issue 5311:
        # https://github.com/numba/numba/issues/5311#issuecomment-674206587

        @cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)
        def f(inp, outp):
            outp[0] = 1 if inp[0] in (2, 3) else 3

    def test_debug_function_calls_device_function(self):
        # Calling a device function requires compilation of multiple modules
        # with NVVM - one for the caller and one for the callee. This checks
        # that we don't cause an NVVM error in this case.

        @cuda.jit(device=True, debug=True, opt=0)
        def threadid():
            return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x

        @cuda.jit((types.int32[:],), debug=True, opt=0)
        def kernel(arr):
            i = cuda.grid(1)
            if i < len(arr):
                arr[i] = threadid()

    def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):
        @cuda.jit(device=True, debug=f2_debug, opt=False)
        def f2(x):
            return x + 1

        @cuda.jit(device=True, debug=f1_debug, opt=False)
        def f1(x, y):
            return x - f2(y)

        @cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)
        def kernel(x, y):
            f1(x, y)

        kernel[1, 1](1, 2)

    def test_chained_device_function(self):
        # Calling a device function that calls another device function from a
        # kernel with should succeed regardless of which jit decorators have
        # debug=True. See Issue #7159.

        debug_opts = itertools.product(*[(True, False)] * 3)

        for kernel_debug, f1_debug, f2_debug in debug_opts:
            with self.subTest(kernel_debug=kernel_debug,
                              f1_debug=f1_debug,
                              f2_debug=f2_debug):
                self._test_chained_device_function(kernel_debug,
                                                   f1_debug,
                                                   f2_debug)

    def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,
                                                f2_debug):

        @cuda.jit(device=True, debug=f2_debug, opt=False)
        def f2(x):
            return x + 1

        @cuda.jit(device=True, debug=f1_debug, opt=False)
        def f1(x, y):
            return x - f2(y)

        @cuda.jit(debug=kernel_debug, opt=False)
        def kernel(x, y):
            f1(x, y)
            f2(x)

        kernel[1, 1](1, 2)

    def test_chained_device_function_two_calls(self):
        # Calling a device function that calls a leaf device function from a
        # kernel, and calling the leaf device function from the kernel should
        # succeed, regardless of which jit decorators have debug=True. See
        # Issue #7159.

        debug_opts = itertools.product(*[(True, False)] * 3)

        for kernel_debug, f1_debug, f2_debug in debug_opts:
            with self.subTest(kernel_debug=kernel_debug,
                              f1_debug=f1_debug,
                              f2_debug=f2_debug):
                self._test_chained_device_function_two_calls(kernel_debug,
                                                             f1_debug,
                                                             f2_debug)

    def test_chained_device_three_functions(self):
        # Like test_chained_device_function, but with enough functions (three)
        # to ensure that the recursion visits all the way down the call tree
        # when fixing linkage of functions for debug.
        def three_device_fns(kernel_debug, leaf_debug):
            @cuda.jit(device=True, debug=leaf_debug, opt=False)
            def f3(x):
                return x * x

            @cuda.jit(device=True)
            def f2(x):
                return f3(x) + 1

            @cuda.jit(device=True)
            def f1(x, y):
                return x - f2(y)

            @cuda.jit(debug=kernel_debug, opt=False)
            def kernel(x, y):
                f1(x, y)

            kernel[1, 1](1, 2)

        # Check when debug on the kernel, on the leaf, and not on any function.
        three_device_fns(kernel_debug=True, leaf_debug=True)
        three_device_fns(kernel_debug=True, leaf_debug=False)
        three_device_fns(kernel_debug=False, leaf_debug=True)
        three_device_fns(kernel_debug=False, leaf_debug=False)


if __name__ == '__main__':
    unittest.main()
first commit 2024-05-03 04:18:51 +03:00			`from numba.tests.support import override_config`
			`from numba.cuda.testing import skip_on_cudasim`
			`from numba import cuda`
			`from numba.core import types`
			`from numba.cuda.testing import CUDATestCase`
			`import itertools`
			`import re`
			`import unittest`


			`@skip_on_cudasim('Simulator does not produce debug dumps')`
			`class TestCudaDebugInfo(CUDATestCase):`
			`"""`
			`These tests only checks the compiled PTX for debuginfo section`
			`"""`
			`def _getasm(self, fn, sig):`
			`fn.compile(sig)`
			`return fn.inspect_asm(sig)`

			`def _check(self, fn, sig, expect):`
			`asm = self._getasm(fn, sig=sig)`
			`re_section_dbginfo = re.compile(r"\.section\s+\.debug_info\s+{")`
			`match = re_section_dbginfo.search(asm)`
			`assertfn = self.assertIsNotNone if expect else self.assertIsNone`
			`assertfn(match, msg=asm)`

			`def test_no_debuginfo_in_asm(self):`
			`@cuda.jit(debug=False)`
			`def foo(x):`
			`x[0] = 1`

			`self._check(foo, sig=(types.int32[:],), expect=False)`

			`def test_debuginfo_in_asm(self):`
			`@cuda.jit(debug=True, opt=False)`
			`def foo(x):`
			`x[0] = 1`

			`self._check(foo, sig=(types.int32[:],), expect=True)`

			`def test_environment_override(self):`
			`with override_config('CUDA_DEBUGINFO_DEFAULT', 1):`
			`# Using default value`
			`@cuda.jit(opt=False)`
			`def foo(x):`
			`x[0] = 1`

			`self._check(foo, sig=(types.int32[:],), expect=True)`

			`# User override default value`
			`@cuda.jit(debug=False)`
			`def bar(x):`
			`x[0] = 1`

			`self._check(bar, sig=(types.int32[:],), expect=False)`

			`def test_issue_5835(self):`
			`# Invalid debug metadata would segfault NVVM when any function was`
			`# compiled with debug turned on and optimization off. This eager`
			`# compilation should not crash anything.`
			`@cuda.jit((types.int32[::1],), debug=True, opt=False)`
			`def f(x):`
			`x[0] = 0`

			`def test_wrapper_has_debuginfo(self):`
			`sig = (types.int32[::1],)`

			`@cuda.jit(sig, debug=True, opt=0)`
			`def f(x):`
			`x[0] = 1`

			`llvm_ir = f.inspect_llvm(sig)`

			`defines = [line for line in llvm_ir.splitlines()`
			`if 'define void @"_ZN6cudapy' in line]`

			`# Make sure we only found one definition`
			`self.assertEqual(len(defines), 1)`

			`wrapper_define = defines[0]`
			`self.assertIn('!dbg', wrapper_define)`

			`def test_debug_function_calls_internal_impl(self):`
			`# Calling a function in a module generated from an implementation`
			`# internal to Numba requires multiple modules to be compiled with NVVM -`
			`# the internal implementation, and the caller. This example uses two`
			# modules because the `in (2, 3)` is implemented with:
			`#`
			`# numba::cpython::listobj::in_seq::$3clocals$3e::seq_contains_impl$242(`
			`# UniTuple<long long, 2>,`
			`# int`
			`# )`
			`#`
			`# This is condensed from this reproducer in Issue 5311:`
			`# https://github.com/numba/numba/issues/5311#issuecomment-674206587`

			`@cuda.jit((types.int32[:], types.int32[:]), debug=True, opt=False)`
			`def f(inp, outp):`
			`outp[0] = 1 if inp[0] in (2, 3) else 3`

			`def test_debug_function_calls_device_function(self):`
			`# Calling a device function requires compilation of multiple modules`
			`# with NVVM - one for the caller and one for the callee. This checks`
			`# that we don't cause an NVVM error in this case.`

			`@cuda.jit(device=True, debug=True, opt=0)`
			`def threadid():`
			`return cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x`

			`@cuda.jit((types.int32[:],), debug=True, opt=0)`
			`def kernel(arr):`
			`i = cuda.grid(1)`
			`if i < len(arr):`
			`arr[i] = threadid()`

			`def _test_chained_device_function(self, kernel_debug, f1_debug, f2_debug):`
			`@cuda.jit(device=True, debug=f2_debug, opt=False)`
			`def f2(x):`
			`return x + 1`

			`@cuda.jit(device=True, debug=f1_debug, opt=False)`
			`def f1(x, y):`
			`return x - f2(y)`

			`@cuda.jit((types.int32, types.int32), debug=kernel_debug, opt=False)`
			`def kernel(x, y):`
			`f1(x, y)`

			`kernel[1, 1](1, 2)`

			`def test_chained_device_function(self):`
			`# Calling a device function that calls another device function from a`
			`# kernel with should succeed regardless of which jit decorators have`
			`# debug=True. See Issue #7159.`

			`debug_opts = itertools.product([(True, False)] 3)`

			`for kernel_debug, f1_debug, f2_debug in debug_opts:`
			`with self.subTest(kernel_debug=kernel_debug,`
			`f1_debug=f1_debug,`
			`f2_debug=f2_debug):`
			`self._test_chained_device_function(kernel_debug,`
			`f1_debug,`
			`f2_debug)`

			`def _test_chained_device_function_two_calls(self, kernel_debug, f1_debug,`
			`f2_debug):`

			`@cuda.jit(device=True, debug=f2_debug, opt=False)`
			`def f2(x):`
			`return x + 1`

			`@cuda.jit(device=True, debug=f1_debug, opt=False)`
			`def f1(x, y):`
			`return x - f2(y)`

			`@cuda.jit(debug=kernel_debug, opt=False)`
			`def kernel(x, y):`
			`f1(x, y)`
			`f2(x)`

			`kernel[1, 1](1, 2)`

			`def test_chained_device_function_two_calls(self):`
			`# Calling a device function that calls a leaf device function from a`
			`# kernel, and calling the leaf device function from the kernel should`
			`# succeed, regardless of which jit decorators have debug=True. See`
			`# Issue #7159.`

			`debug_opts = itertools.product([(True, False)] 3)`

			`for kernel_debug, f1_debug, f2_debug in debug_opts:`
			`with self.subTest(kernel_debug=kernel_debug,`
			`f1_debug=f1_debug,`
			`f2_debug=f2_debug):`
			`self._test_chained_device_function_two_calls(kernel_debug,`
			`f1_debug,`
			`f2_debug)`

			`def test_chained_device_three_functions(self):`
			`# Like test_chained_device_function, but with enough functions (three)`
			`# to ensure that the recursion visits all the way down the call tree`
			`# when fixing linkage of functions for debug.`
			`def three_device_fns(kernel_debug, leaf_debug):`
			`@cuda.jit(device=True, debug=leaf_debug, opt=False)`
			`def f3(x):`
			`return x * x`

			`@cuda.jit(device=True)`
			`def f2(x):`
			`return f3(x) + 1`

			`@cuda.jit(device=True)`
			`def f1(x, y):`
			`return x - f2(y)`

			`@cuda.jit(debug=kernel_debug, opt=False)`
			`def kernel(x, y):`
			`f1(x, y)`

			`kernel[1, 1](1, 2)`

			`# Check when debug on the kernel, on the leaf, and not on any function.`
			`three_device_fns(kernel_debug=True, leaf_debug=True)`
			`three_device_fns(kernel_debug=True, leaf_debug=False)`
			`three_device_fns(kernel_debug=False, leaf_debug=True)`
			`three_device_fns(kernel_debug=False, leaf_debug=False)`


			`if __name__ == '__main__':`
			`unittest.main()`