ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudadrv/test_deallocations.py

250 lines
8.2 KiB
Python
Raw Permalink Normal View History

2024-05-03 04:18:51 +03:00
from contextlib import contextmanager
import numpy as np
from numba import cuda
from numba.cuda.testing import (unittest, skip_on_cudasim,
skip_if_external_memmgr, CUDATestCase)
from numba.tests.support import captured_stderr
from numba.core import config
@skip_on_cudasim('not supported on CUDASIM')
@skip_if_external_memmgr('Deallocation specific to Numba memory management')
class TestDeallocation(CUDATestCase):
def test_max_pending_count(self):
# get deallocation manager and flush it
deallocs = cuda.current_context().memory_manager.deallocations
deallocs.clear()
self.assertEqual(len(deallocs), 0)
# deallocate to maximum count
for i in range(config.CUDA_DEALLOCS_COUNT):
cuda.to_device(np.arange(1))
self.assertEqual(len(deallocs), i + 1)
# one more to trigger .clear()
cuda.to_device(np.arange(1))
self.assertEqual(len(deallocs), 0)
def test_max_pending_bytes(self):
# get deallocation manager and flush it
ctx = cuda.current_context()
deallocs = ctx.memory_manager.deallocations
deallocs.clear()
self.assertEqual(len(deallocs), 0)
mi = ctx.get_memory_info()
max_pending = 10**6 # 1MB
old_ratio = config.CUDA_DEALLOCS_RATIO
try:
# change to a smaller ratio
config.CUDA_DEALLOCS_RATIO = max_pending / mi.total
# due to round off error (floor is used in calculating
# _max_pending_bytes) it can be off by 1.
self.assertAlmostEqual(deallocs._max_pending_bytes, max_pending,
delta=1)
# allocate half the max size
# this will not trigger deallocation
cuda.to_device(np.ones(max_pending // 2, dtype=np.int8))
self.assertEqual(len(deallocs), 1)
# allocate another remaining
# this will not trigger deallocation
cuda.to_device(np.ones(deallocs._max_pending_bytes -
deallocs._size, dtype=np.int8))
self.assertEqual(len(deallocs), 2)
# another byte to trigger .clear()
cuda.to_device(np.ones(1, dtype=np.int8))
self.assertEqual(len(deallocs), 0)
finally:
# restore old ratio
config.CUDA_DEALLOCS_RATIO = old_ratio
@skip_on_cudasim("defer_cleanup has no effect in CUDASIM")
@skip_if_external_memmgr('Deallocation specific to Numba memory management')
class TestDeferCleanup(CUDATestCase):
def test_basic(self):
harr = np.arange(5)
darr1 = cuda.to_device(harr)
deallocs = cuda.current_context().memory_manager.deallocations
deallocs.clear()
self.assertEqual(len(deallocs), 0)
with cuda.defer_cleanup():
darr2 = cuda.to_device(harr)
del darr1
self.assertEqual(len(deallocs), 1)
del darr2
self.assertEqual(len(deallocs), 2)
deallocs.clear()
self.assertEqual(len(deallocs), 2)
deallocs.clear()
self.assertEqual(len(deallocs), 0)
def test_nested(self):
harr = np.arange(5)
darr1 = cuda.to_device(harr)
deallocs = cuda.current_context().memory_manager.deallocations
deallocs.clear()
self.assertEqual(len(deallocs), 0)
with cuda.defer_cleanup():
with cuda.defer_cleanup():
darr2 = cuda.to_device(harr)
del darr1
self.assertEqual(len(deallocs), 1)
del darr2
self.assertEqual(len(deallocs), 2)
deallocs.clear()
self.assertEqual(len(deallocs), 2)
deallocs.clear()
self.assertEqual(len(deallocs), 2)
deallocs.clear()
self.assertEqual(len(deallocs), 0)
def test_exception(self):
harr = np.arange(5)
darr1 = cuda.to_device(harr)
deallocs = cuda.current_context().memory_manager.deallocations
deallocs.clear()
self.assertEqual(len(deallocs), 0)
class CustomError(Exception):
pass
with self.assertRaises(CustomError):
with cuda.defer_cleanup():
darr2 = cuda.to_device(harr)
del darr2
self.assertEqual(len(deallocs), 1)
deallocs.clear()
self.assertEqual(len(deallocs), 1)
raise CustomError
deallocs.clear()
self.assertEqual(len(deallocs), 0)
del darr1
self.assertEqual(len(deallocs), 1)
deallocs.clear()
self.assertEqual(len(deallocs), 0)
class TestDeferCleanupAvail(CUDATestCase):
def test_context_manager(self):
# just make sure the API is available
with cuda.defer_cleanup():
pass
@skip_on_cudasim('not supported on CUDASIM')
class TestDel(CUDATestCase):
"""
Ensure resources are deleted properly without ignored exception.
"""
@contextmanager
def check_ignored_exception(self, ctx):
with captured_stderr() as cap:
yield
ctx.deallocations.clear()
self.assertFalse(cap.getvalue())
def test_stream(self):
ctx = cuda.current_context()
stream = ctx.create_stream()
with self.check_ignored_exception(ctx):
del stream
def test_event(self):
ctx = cuda.current_context()
event = ctx.create_event()
with self.check_ignored_exception(ctx):
del event
def test_pinned_memory(self):
ctx = cuda.current_context()
mem = ctx.memhostalloc(32)
with self.check_ignored_exception(ctx):
del mem
def test_mapped_memory(self):
ctx = cuda.current_context()
mem = ctx.memhostalloc(32, mapped=True)
with self.check_ignored_exception(ctx):
del mem
def test_device_memory(self):
ctx = cuda.current_context()
mem = ctx.memalloc(32)
with self.check_ignored_exception(ctx):
del mem
def test_managed_memory(self):
ctx = cuda.current_context()
mem = ctx.memallocmanaged(32)
with self.check_ignored_exception(ctx):
del mem
def test_pinned_contextmanager(self):
# Check that temporarily pinned memory is unregistered immediately,
# such that it can be re-pinned at any time
class PinnedException(Exception):
pass
arr = np.zeros(1)
ctx = cuda.current_context()
ctx.deallocations.clear()
with self.check_ignored_exception(ctx):
with cuda.pinned(arr):
pass
with cuda.pinned(arr):
pass
# Should also work inside a `defer_cleanup` block
with cuda.defer_cleanup():
with cuda.pinned(arr):
pass
with cuda.pinned(arr):
pass
# Should also work when breaking out of the block due to an
# exception
try:
with cuda.pinned(arr):
raise PinnedException
except PinnedException:
with cuda.pinned(arr):
pass
def test_mapped_contextmanager(self):
# Check that temporarily mapped memory is unregistered immediately,
# such that it can be re-mapped at any time
class MappedException(Exception):
pass
arr = np.zeros(1)
ctx = cuda.current_context()
ctx.deallocations.clear()
with self.check_ignored_exception(ctx):
with cuda.mapped(arr):
pass
with cuda.mapped(arr):
pass
# Should also work inside a `defer_cleanup` block
with cuda.defer_cleanup():
with cuda.mapped(arr):
pass
with cuda.mapped(arr):
pass
# Should also work when breaking out of the block due to an
# exception
try:
with cuda.mapped(arr):
raise MappedException
except MappedException:
with cuda.mapped(arr):
pass
if __name__ == '__main__':
unittest.main()