ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudadrv/test_streams.py

123 lines
4.1 KiB
Python

import asyncio
import functools
import threading
import numpy as np
from numba import cuda
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
def with_asyncio_loop(f):
@functools.wraps(f)
def runner(*args, **kwds):
loop = asyncio.new_event_loop()
loop.set_debug(True)
try:
return loop.run_until_complete(f(*args, **kwds))
finally:
loop.close()
return runner
@skip_on_cudasim('CUDA Driver API unsupported in the simulator')
class TestCudaStream(CUDATestCase):
def test_add_callback(self):
def callback(stream, status, event):
event.set()
stream = cuda.stream()
callback_event = threading.Event()
stream.add_callback(callback, callback_event)
self.assertTrue(callback_event.wait(1.0))
def test_add_callback_with_default_arg(self):
callback_event = threading.Event()
def callback(stream, status, arg):
self.assertIsNone(arg)
callback_event.set()
stream = cuda.stream()
stream.add_callback(callback)
self.assertTrue(callback_event.wait(1.0))
@with_asyncio_loop
async def test_async_done(self):
stream = cuda.stream()
await stream.async_done()
@with_asyncio_loop
async def test_parallel_tasks(self):
async def async_cuda_fn(value_in: float) -> float:
stream = cuda.stream()
h_src, h_dst = cuda.pinned_array(8), cuda.pinned_array(8)
h_src[:] = value_in
d_ary = cuda.to_device(h_src, stream=stream)
d_ary.copy_to_host(h_dst, stream=stream)
done_result = await stream.async_done()
self.assertEqual(done_result, stream)
return h_dst.mean()
values_in = [1, 2, 3, 4]
tasks = [asyncio.create_task(async_cuda_fn(v)) for v in values_in]
values_out = await asyncio.gather(*tasks)
self.assertTrue(np.allclose(values_in, values_out))
@with_asyncio_loop
async def test_multiple_async_done(self):
stream = cuda.stream()
done_aws = [stream.async_done() for _ in range(4)]
done = await asyncio.gather(*done_aws)
for d in done:
self.assertEqual(d, stream)
@with_asyncio_loop
async def test_multiple_async_done_multiple_streams(self):
streams = [cuda.stream() for _ in range(4)]
done_aws = [stream.async_done() for stream in streams]
done = await asyncio.gather(*done_aws)
# Ensure we got the four original streams in done
self.assertSetEqual(set(done), set(streams))
@with_asyncio_loop
async def test_cancelled_future(self):
stream = cuda.stream()
done1, done2 = stream.async_done(), stream.async_done()
done1.cancel()
await done2
self.assertTrue(done1.cancelled())
self.assertTrue(done2.done())
@skip_on_cudasim('CUDA Driver API unsupported in the simulator')
class TestFailingStream(CUDATestCase):
# This test can only be run in isolation because it corrupts the CUDA
# context, which cannot be recovered from within the same process. It is
# left here so that it can be run manually for debugging / testing purposes
# - or may be re-enabled if in future there is infrastructure added for
# running tests in a separate process (a subprocess cannot be used because
# CUDA will have been initialized before the fork, so it cannot be used in
# the child process).
@unittest.skip
@with_asyncio_loop
async def test_failed_stream(self):
ctx = cuda.current_context()
module = ctx.create_module_ptx("""
.version 6.5
.target sm_30
.address_size 64
.visible .entry failing_kernel() { trap; }
""")
failing_kernel = module.get_function("failing_kernel")
stream = cuda.stream()
failing_kernel.configure((1,), (1,), stream=stream).__call__()
done = stream.async_done()
with self.assertRaises(Exception):
await done
self.assertIsNotNone(done.exception())
if __name__ == '__main__':
unittest.main()