ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudapy/test_compiler.py

from math import sqrt
from numba import cuda, float32, int16, int32, int64, uint32, void
from numba.cuda import compile_ptx, compile_ptx_for_current_device

from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase


# A test function at the module scope to ensure we get the name right for the C
# ABI whether a function is at module or local scope.
def f_module(x, y):
    return x + y


@skip_on_cudasim('Compilation unsupported in the simulator')
class TestCompileToPTX(unittest.TestCase):
    def test_global_kernel(self):
        def f(r, x, y):
            i = cuda.grid(1)
            if i < len(r):
                r[i] = x[i] + y[i]

        args = (float32[:], float32[:], float32[:])
        ptx, resty = compile_ptx(f, args)

        # Kernels should not have a func_retval parameter
        self.assertNotIn('func_retval', ptx)
        # .visible .func is used to denote a device function
        self.assertNotIn('.visible .func', ptx)
        # .visible .entry would denote the presence of a global function
        self.assertIn('.visible .entry', ptx)
        # Return type for kernels should always be void
        self.assertEqual(resty, void)

    def test_device_function(self):
        def add(x, y):
            return x + y

        args = (float32, float32)
        ptx, resty = compile_ptx(add, args, device=True)

        # Device functions take a func_retval parameter for storing the
        # returned value in by reference
        self.assertIn('func_retval', ptx)
        # .visible .func is used to denote a device function
        self.assertIn('.visible .func', ptx)
        # .visible .entry would denote the presence of a global function
        self.assertNotIn('.visible .entry', ptx)
        # Inferred return type as expected?
        self.assertEqual(resty, float32)

        # Check that function's output matches signature
        sig_int32 = int32(int32, int32)
        ptx, resty = compile_ptx(add, sig_int32, device=True)
        self.assertEqual(resty, int32)

        sig_int16 = int16(int16, int16)
        ptx, resty = compile_ptx(add, sig_int16, device=True)
        self.assertEqual(resty, int16)
        # Using string as signature
        sig_string = "uint32(uint32, uint32)"
        ptx, resty = compile_ptx(add, sig_string, device=True)
        self.assertEqual(resty, uint32)

    def test_fastmath(self):
        def f(x, y, z, d):
            return sqrt((x * y + z) / d)

        args = (float32, float32, float32, float32)
        ptx, resty = compile_ptx(f, args, device=True)

        # Without fastmath, fma contraction is enabled by default, but ftz and
        # approximate div / sqrt is not.
        self.assertIn('fma.rn.f32', ptx)
        self.assertIn('div.rn.f32', ptx)
        self.assertIn('sqrt.rn.f32', ptx)

        ptx, resty = compile_ptx(f, args, device=True, fastmath=True)

        # With fastmath, ftz and approximate div / sqrt are enabled
        self.assertIn('fma.rn.ftz.f32', ptx)
        self.assertIn('div.approx.ftz.f32', ptx)
        self.assertIn('sqrt.approx.ftz.f32', ptx)

    def check_debug_info(self, ptx):
        # A debug_info section should exist in the PTX. Whitespace varies
        # between CUDA toolkit versions.
        self.assertRegex(ptx, '\\.section\\s+\\.debug_info')
        # A .file directive should be produced and include the name of the
        # source. The path and whitespace may vary, so we accept anything
        # ending in the filename of this module.
        self.assertRegex(ptx, '\\.file.*test_compiler.py"')

    def test_device_function_with_debug(self):
        # See Issue #6719 - this ensures that compilation with debug succeeds
        # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM
        # IR version metadata was not added when compiling device functions,
        # and NVVM assumed DBG version 1.0 if not specified, which is
        # incompatible with the 3.0 IR we use. This was specified only for
        # kernels.
        def f():
            pass

        ptx, resty = compile_ptx(f, (), device=True, debug=True)
        self.check_debug_info(ptx)

    def test_kernel_with_debug(self):
        # Inspired by (but not originally affected by) Issue #6719
        def f():
            pass

        ptx, resty = compile_ptx(f, (), debug=True)
        self.check_debug_info(ptx)

    def check_line_info(self, ptx):
        # A .file directive should be produced and include the name of the
        # source. The path and whitespace may vary, so we accept anything
        # ending in the filename of this module.
        self.assertRegex(ptx, '\\.file.*test_compiler.py"')

    def test_device_function_with_line_info(self):
        def f():
            pass

        ptx, resty = compile_ptx(f, (), device=True, lineinfo=True)
        self.check_line_info(ptx)

    def test_kernel_with_line_info(self):
        def f():
            pass

        ptx, resty = compile_ptx(f, (), lineinfo=True)
        self.check_line_info(ptx)

    def test_non_void_return_type(self):
        def f(x, y):
            return x[0] + y[0]

        with self.assertRaisesRegex(TypeError, 'must have void return type'):
            compile_ptx(f, (uint32[::1], uint32[::1]))

    def test_c_abi_disallowed_for_kernel(self):
        def f(x, y):
            return x + y

        with self.assertRaisesRegex(NotImplementedError,
                                    "The C ABI is not supported for kernels"):
            compile_ptx(f, (int32, int32), abi="c")

    def test_unsupported_abi(self):
        def f(x, y):
            return x + y

        with self.assertRaisesRegex(NotImplementedError,
                                    "Unsupported ABI: fastcall"):
            compile_ptx(f, (int32, int32), abi="fastcall")

    def test_c_abi_device_function(self):
        def f(x, y):
            return x + y

        ptx, resty = compile_ptx(f, int32(int32, int32), device=True, abi="c")
        # There should be no more than two parameters
        self.assertNotIn(ptx, "param_2")

        # The function name should match the Python function name (not the
        # qualname, which includes additional info), and its return value
        # should be 32 bits
        self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
                              r"func_retval0\)\s+f\(")

        # If we compile for 64-bit integers, the return type should be 64 bits
        # wide
        ptx, resty = compile_ptx(f, int64(int64, int64), device=True, abi="c")
        self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b64")

    def test_c_abi_device_function_module_scope(self):
        ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,
                                 abi="c")

        # The function name should match the Python function name, and its
        # return value should be 32 bits
        self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
                              r"func_retval0\)\s+f_module\(")

    def test_c_abi_with_abi_name(self):
        abi_info = {'abi_name': '_Z4funcii'}
        ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,
                                 abi="c", abi_info=abi_info)

        # The function name should match the one given in the ABI info, and its
        # return value should be 32 bits
        self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"
                              r"func_retval0\)\s+_Z4funcii\(")


@skip_on_cudasim('Compilation unsupported in the simulator')
class TestCompileToPTXForCurrentDevice(CUDATestCase):
    def test_compile_ptx_for_current_device(self):
        def add(x, y):
            return x + y

        args = (float32, float32)
        ptx, resty = compile_ptx_for_current_device(add, args, device=True)

        # Check we target the current device's compute capability, or the
        # closest compute capability supported by the current toolkit.
        device_cc = cuda.get_current_device().compute_capability
        cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc)
        target = f'.target sm_{cc[0]}{cc[1]}'
        self.assertIn(target, ptx)


@skip_on_cudasim('Compilation unsupported in the simulator')
class TestCompileOnlyTests(unittest.TestCase):
    '''For tests where we can only check correctness by examining the compiler
    output rather than observing the effects of execution.'''

    def test_nanosleep(self):
        def use_nanosleep(x):
            # Sleep for a constant time
            cuda.nanosleep(32)
            # Sleep for a variable time
            cuda.nanosleep(x)

        ptx, resty = compile_ptx(use_nanosleep, (uint32,), cc=(7, 0))

        nanosleep_count = 0
        for line in ptx.split('\n'):
            if 'nanosleep.u32' in line:
                nanosleep_count += 1

        expected = 2
        self.assertEqual(expected, nanosleep_count,
                         (f'Got {nanosleep_count} nanosleep instructions, '
                          f'expected {expected}'))


if __name__ == '__main__':
    unittest.main()
first commit 2024-05-03 04:18:51 +03:00			`from math import sqrt`
			`from numba import cuda, float32, int16, int32, int64, uint32, void`
			`from numba.cuda import compile_ptx, compile_ptx_for_current_device`

			`from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase`


			`# A test function at the module scope to ensure we get the name right for the C`
			`# ABI whether a function is at module or local scope.`
			`def f_module(x, y):`
			`return x + y`


			`@skip_on_cudasim('Compilation unsupported in the simulator')`
			`class TestCompileToPTX(unittest.TestCase):`
			`def test_global_kernel(self):`
			`def f(r, x, y):`
			`i = cuda.grid(1)`
			`if i < len(r):`
			`r[i] = x[i] + y[i]`

			`args = (float32[:], float32[:], float32[:])`
			`ptx, resty = compile_ptx(f, args)`

			`# Kernels should not have a func_retval parameter`
			`self.assertNotIn('func_retval', ptx)`
			`# .visible .func is used to denote a device function`
			`self.assertNotIn('.visible .func', ptx)`
			`# .visible .entry would denote the presence of a global function`
			`self.assertIn('.visible .entry', ptx)`
			`# Return type for kernels should always be void`
			`self.assertEqual(resty, void)`

			`def test_device_function(self):`
			`def add(x, y):`
			`return x + y`

			`args = (float32, float32)`
			`ptx, resty = compile_ptx(add, args, device=True)`

			`# Device functions take a func_retval parameter for storing the`
			`# returned value in by reference`
			`self.assertIn('func_retval', ptx)`
			`# .visible .func is used to denote a device function`
			`self.assertIn('.visible .func', ptx)`
			`# .visible .entry would denote the presence of a global function`
			`self.assertNotIn('.visible .entry', ptx)`
			`# Inferred return type as expected?`
			`self.assertEqual(resty, float32)`

			`# Check that function's output matches signature`
			`sig_int32 = int32(int32, int32)`
			`ptx, resty = compile_ptx(add, sig_int32, device=True)`
			`self.assertEqual(resty, int32)`

			`sig_int16 = int16(int16, int16)`
			`ptx, resty = compile_ptx(add, sig_int16, device=True)`
			`self.assertEqual(resty, int16)`
			`# Using string as signature`
			`sig_string = "uint32(uint32, uint32)"`
			`ptx, resty = compile_ptx(add, sig_string, device=True)`
			`self.assertEqual(resty, uint32)`

			`def test_fastmath(self):`
			`def f(x, y, z, d):`
			`return sqrt((x * y + z) / d)`

			`args = (float32, float32, float32, float32)`
			`ptx, resty = compile_ptx(f, args, device=True)`

			`# Without fastmath, fma contraction is enabled by default, but ftz and`
			`# approximate div / sqrt is not.`
			`self.assertIn('fma.rn.f32', ptx)`
			`self.assertIn('div.rn.f32', ptx)`
			`self.assertIn('sqrt.rn.f32', ptx)`

			`ptx, resty = compile_ptx(f, args, device=True, fastmath=True)`

			`# With fastmath, ftz and approximate div / sqrt are enabled`
			`self.assertIn('fma.rn.ftz.f32', ptx)`
			`self.assertIn('div.approx.ftz.f32', ptx)`
			`self.assertIn('sqrt.approx.ftz.f32', ptx)`

			`def check_debug_info(self, ptx):`
			`# A debug_info section should exist in the PTX. Whitespace varies`
			`# between CUDA toolkit versions.`
			`self.assertRegex(ptx, '\\.section\\s+\\.debug_info')`
			`# A .file directive should be produced and include the name of the`
			`# source. The path and whitespace may vary, so we accept anything`
			`# ending in the filename of this module.`
			`self.assertRegex(ptx, '\\.file.*test_compiler.py"')`

			`def test_device_function_with_debug(self):`
			`# See Issue #6719 - this ensures that compilation with debug succeeds`
			`# with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM`
			`# IR version metadata was not added when compiling device functions,`
			`# and NVVM assumed DBG version 1.0 if not specified, which is`
			`# incompatible with the 3.0 IR we use. This was specified only for`
			`# kernels.`
			`def f():`
			`pass`

			`ptx, resty = compile_ptx(f, (), device=True, debug=True)`
			`self.check_debug_info(ptx)`

			`def test_kernel_with_debug(self):`
			`# Inspired by (but not originally affected by) Issue #6719`
			`def f():`
			`pass`

			`ptx, resty = compile_ptx(f, (), debug=True)`
			`self.check_debug_info(ptx)`

			`def check_line_info(self, ptx):`
			`# A .file directive should be produced and include the name of the`
			`# source. The path and whitespace may vary, so we accept anything`
			`# ending in the filename of this module.`
			`self.assertRegex(ptx, '\\.file.*test_compiler.py"')`

			`def test_device_function_with_line_info(self):`
			`def f():`
			`pass`

			`ptx, resty = compile_ptx(f, (), device=True, lineinfo=True)`
			`self.check_line_info(ptx)`

			`def test_kernel_with_line_info(self):`
			`def f():`
			`pass`

			`ptx, resty = compile_ptx(f, (), lineinfo=True)`
			`self.check_line_info(ptx)`

			`def test_non_void_return_type(self):`
			`def f(x, y):`
			`return x[0] + y[0]`

			`with self.assertRaisesRegex(TypeError, 'must have void return type'):`
			`compile_ptx(f, (uint32[::1], uint32[::1]))`

			`def test_c_abi_disallowed_for_kernel(self):`
			`def f(x, y):`
			`return x + y`

			`with self.assertRaisesRegex(NotImplementedError,`
			`"The C ABI is not supported for kernels"):`
			`compile_ptx(f, (int32, int32), abi="c")`

			`def test_unsupported_abi(self):`
			`def f(x, y):`
			`return x + y`

			`with self.assertRaisesRegex(NotImplementedError,`
			`"Unsupported ABI: fastcall"):`
			`compile_ptx(f, (int32, int32), abi="fastcall")`

			`def test_c_abi_device_function(self):`
			`def f(x, y):`
			`return x + y`

			`ptx, resty = compile_ptx(f, int32(int32, int32), device=True, abi="c")`
			`# There should be no more than two parameters`
			`self.assertNotIn(ptx, "param_2")`

			`# The function name should match the Python function name (not the`
			`# qualname, which includes additional info), and its return value`
			`# should be 32 bits`
			`self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"`
			`r"func_retval0\)\s+f\(")`

			`# If we compile for 64-bit integers, the return type should be 64 bits`
			`# wide`
			`ptx, resty = compile_ptx(f, int64(int64, int64), device=True, abi="c")`
			`self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b64")`

			`def test_c_abi_device_function_module_scope(self):`
			`ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,`
			`abi="c")`

			`# The function name should match the Python function name, and its`
			`# return value should be 32 bits`
			`self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"`
			`r"func_retval0\)\s+f_module\(")`

			`def test_c_abi_with_abi_name(self):`
			`abi_info = {'abi_name': '_Z4funcii'}`
			`ptx, resty = compile_ptx(f_module, int32(int32, int32), device=True,`
			`abi="c", abi_info=abi_info)`

			`# The function name should match the one given in the ABI info, and its`
			`# return value should be 32 bits`
			`self.assertRegex(ptx, r"\.visible\s+\.func\s+\(\.param\s+\.b32\s+"`
			`r"func_retval0\)\s+_Z4funcii\(")`


			`@skip_on_cudasim('Compilation unsupported in the simulator')`
			`class TestCompileToPTXForCurrentDevice(CUDATestCase):`
			`def test_compile_ptx_for_current_device(self):`
			`def add(x, y):`
			`return x + y`

			`args = (float32, float32)`
			`ptx, resty = compile_ptx_for_current_device(add, args, device=True)`

			`# Check we target the current device's compute capability, or the`
			`# closest compute capability supported by the current toolkit.`
			`device_cc = cuda.get_current_device().compute_capability`
			`cc = cuda.cudadrv.nvvm.find_closest_arch(device_cc)`
			`target = f'.target sm_{cc[0]}{cc[1]}'`
			`self.assertIn(target, ptx)`


			`@skip_on_cudasim('Compilation unsupported in the simulator')`
			`class TestCompileOnlyTests(unittest.TestCase):`
			`'''For tests where we can only check correctness by examining the compiler`
			`output rather than observing the effects of execution.'''`

			`def test_nanosleep(self):`
			`def use_nanosleep(x):`
			`# Sleep for a constant time`
			`cuda.nanosleep(32)`
			`# Sleep for a variable time`
			`cuda.nanosleep(x)`

			`ptx, resty = compile_ptx(use_nanosleep, (uint32,), cc=(7, 0))`

			`nanosleep_count = 0`
			`for line in ptx.split('\n'):`
			`if 'nanosleep.u32' in line:`
			`nanosleep_count += 1`

			`expected = 2`
			`self.assertEqual(expected, nanosleep_count,`
			`(f'Got {nanosleep_count} nanosleep instructions, '`
			`f'expected {expected}'))`


			`if __name__ == '__main__':`
			`unittest.main()`