from numba.cuda.testing import CUDATestCase, skip_on_cudasim import subprocess import sys import unittest cuhello_usecase = """\ from numba import cuda @cuda.jit def cuhello(): i = cuda.grid(1) print(i, 999) print(-42) cuhello[2, 3]() cuda.synchronize() """ printfloat_usecase = """\ from numba import cuda @cuda.jit def printfloat(): i = cuda.grid(1) print(i, 23, 34.75, 321) printfloat[1, 1]() cuda.synchronize() """ printstring_usecase = """\ from numba import cuda @cuda.jit def printstring(): i = cuda.grid(1) print(i, "hop!", 999) printstring[1, 3]() cuda.synchronize() """ printempty_usecase = """\ from numba import cuda @cuda.jit def printempty(): print() printempty[1, 1]() cuda.synchronize() """ print_too_many_usecase = """\ from numba import cuda import numpy as np @cuda.jit def print_too_many(r): print(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8], r[9], r[10], r[11], r[12], r[13], r[14], r[15], r[16], r[17], r[18], r[19], r[20], r[21], r[22], r[23], r[24], r[25], r[26], r[27], r[28], r[29], r[30], r[31], r[32]) print_too_many[1, 1](np.arange(33)) cuda.synchronize() """ class TestPrint(CUDATestCase): # Note that in these tests we generally strip the output to avoid dealing # with platform-specific line ending issues, e.g. '\r\n' vs '\n' etc. def run_code(self, code): """Runs code in a subprocess and returns the captured output""" cmd = [sys.executable, "-c", code] cp = subprocess.run(cmd, timeout=60, capture_output=True, check=True) return cp.stdout.decode(), cp.stderr.decode() def test_cuhello(self): output, _ = self.run_code(cuhello_usecase) actual = [line.strip() for line in output.splitlines()] expected = ['-42'] * 6 + ['%d 999' % i for i in range(6)] # The output of GPU threads is intermingled, but each print() # call is still atomic self.assertEqual(sorted(actual), expected) def test_printfloat(self): output, _ = self.run_code(printfloat_usecase) # CUDA and the simulator use different formats for float formatting expected_cases = ["0 23 34.750000 321", "0 23 34.75 321"] self.assertIn(output.strip(), expected_cases) def test_printempty(self): output, _ = self.run_code(printempty_usecase) self.assertEqual(output.strip(), "") def test_string(self): output, _ = self.run_code(printstring_usecase) lines = [line.strip() for line in output.splitlines(True)] expected = ['%d hop! 999' % i for i in range(3)] self.assertEqual(sorted(lines), expected) @skip_on_cudasim('cudasim can print unlimited output') def test_too_many_args(self): # Tests that we emit the format string and warn when there are more # than 32 arguments, in common with CUDA C/C++ printf - this is due to # a limitation in CUDA vprintf, see: # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#limitations output, errors = self.run_code(print_too_many_usecase) # Check that the format string was printed instead of formatted garbage expected_fmt_string = ' '.join(['%lld' for _ in range(33)]) self.assertIn(expected_fmt_string, output) # Check for the expected warning about formatting more than 32 items warn_msg = ('CUDA print() cannot print more than 32 items. The raw ' 'format string will be emitted by the kernel instead.') self.assertIn(warn_msg, errors) if __name__ == '__main__': unittest.main()