177 lines
5.0 KiB
Python
177 lines
5.0 KiB
Python
|
import numpy as np
|
||
|
|
||
|
from numba import cuda, complex64, int32, float64
|
||
|
from numba.cuda.testing import unittest, CUDATestCase
|
||
|
from numba.core.config import ENABLE_CUDASIM
|
||
|
|
||
|
CONST_EMPTY = np.array([])
|
||
|
CONST1D = np.arange(10, dtype=np.float64) / 2.
|
||
|
CONST2D = np.asfortranarray(
|
||
|
np.arange(100, dtype=np.int32).reshape(10, 10))
|
||
|
CONST3D = ((np.arange(5 * 5 * 5, dtype=np.complex64).reshape(5, 5, 5) + 1j) /
|
||
|
2j)
|
||
|
CONST3BYTES = np.arange(3, dtype=np.uint8)
|
||
|
|
||
|
CONST_RECORD_EMPTY = np.array(
|
||
|
[],
|
||
|
dtype=[('x', float), ('y', int)])
|
||
|
CONST_RECORD = np.array(
|
||
|
[(1.0, 2), (3.0, 4)],
|
||
|
dtype=[('x', float), ('y', int)])
|
||
|
CONST_RECORD_ALIGN = np.array(
|
||
|
[(1, 2, 3, 0xDEADBEEF, 8), (4, 5, 6, 0xBEEFDEAD, 10)],
|
||
|
dtype=np.dtype(
|
||
|
dtype=[
|
||
|
('a', np.uint8),
|
||
|
('b', np.uint8),
|
||
|
('x', np.uint8),
|
||
|
('y', np.uint32),
|
||
|
('z', np.uint8),
|
||
|
],
|
||
|
align=True))
|
||
|
|
||
|
|
||
|
def cuconstEmpty(A):
|
||
|
C = cuda.const.array_like(CONST_EMPTY)
|
||
|
i = cuda.grid(1)
|
||
|
A[i] = len(C)
|
||
|
|
||
|
|
||
|
def cuconst(A):
|
||
|
C = cuda.const.array_like(CONST1D)
|
||
|
i = cuda.grid(1)
|
||
|
|
||
|
# +1 or it'll be loaded & stored as a u32
|
||
|
A[i] = C[i] + 1.0
|
||
|
|
||
|
|
||
|
def cuconst2d(A):
|
||
|
C = cuda.const.array_like(CONST2D)
|
||
|
i, j = cuda.grid(2)
|
||
|
A[i, j] = C[i, j]
|
||
|
|
||
|
|
||
|
def cuconst3d(A):
|
||
|
C = cuda.const.array_like(CONST3D)
|
||
|
i = cuda.threadIdx.x
|
||
|
j = cuda.threadIdx.y
|
||
|
k = cuda.threadIdx.z
|
||
|
A[i, j, k] = C[i, j, k]
|
||
|
|
||
|
|
||
|
def cuconstRecEmpty(A):
|
||
|
C = cuda.const.array_like(CONST_RECORD_EMPTY)
|
||
|
i = cuda.grid(1)
|
||
|
A[i] = len(C)
|
||
|
|
||
|
|
||
|
def cuconstRec(A, B):
|
||
|
C = cuda.const.array_like(CONST_RECORD)
|
||
|
i = cuda.grid(1)
|
||
|
A[i] = C[i]['x']
|
||
|
B[i] = C[i]['y']
|
||
|
|
||
|
|
||
|
def cuconstRecAlign(A, B, C, D, E):
|
||
|
Z = cuda.const.array_like(CONST_RECORD_ALIGN)
|
||
|
i = cuda.grid(1)
|
||
|
A[i] = Z[i]['a']
|
||
|
B[i] = Z[i]['b']
|
||
|
C[i] = Z[i]['x']
|
||
|
D[i] = Z[i]['y']
|
||
|
E[i] = Z[i]['z']
|
||
|
|
||
|
|
||
|
def cuconstAlign(z):
|
||
|
a = cuda.const.array_like(CONST3BYTES)
|
||
|
b = cuda.const.array_like(CONST1D)
|
||
|
i = cuda.grid(1)
|
||
|
z[i] = a[i] + b[i]
|
||
|
|
||
|
|
||
|
class TestCudaConstantMemory(CUDATestCase):
|
||
|
def test_const_array(self):
|
||
|
sig = (float64[:],)
|
||
|
jcuconst = cuda.jit(sig)(cuconst)
|
||
|
A = np.zeros_like(CONST1D)
|
||
|
jcuconst[2, 5](A)
|
||
|
self.assertTrue(np.all(A == CONST1D + 1))
|
||
|
|
||
|
if not ENABLE_CUDASIM:
|
||
|
self.assertIn(
|
||
|
'ld.const.f64',
|
||
|
jcuconst.inspect_asm(sig),
|
||
|
"as we're adding to it, load as a double")
|
||
|
|
||
|
def test_const_empty(self):
|
||
|
jcuconstEmpty = cuda.jit('void(int64[:])')(cuconstEmpty)
|
||
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
||
|
jcuconstEmpty[1, 1](A)
|
||
|
self.assertTrue(np.all(A == 0))
|
||
|
|
||
|
def test_const_align(self):
|
||
|
jcuconstAlign = cuda.jit('void(float64[:])')(cuconstAlign)
|
||
|
A = np.full(3, fill_value=np.nan, dtype=float)
|
||
|
jcuconstAlign[1, 3](A)
|
||
|
self.assertTrue(np.all(A == (CONST3BYTES + CONST1D[:3])))
|
||
|
|
||
|
def test_const_array_2d(self):
|
||
|
sig = (int32[:,:],)
|
||
|
jcuconst2d = cuda.jit(sig)(cuconst2d)
|
||
|
A = np.zeros_like(CONST2D, order='C')
|
||
|
jcuconst2d[(2, 2), (5, 5)](A)
|
||
|
self.assertTrue(np.all(A == CONST2D))
|
||
|
|
||
|
if not ENABLE_CUDASIM:
|
||
|
self.assertIn(
|
||
|
'ld.const.u32',
|
||
|
jcuconst2d.inspect_asm(sig),
|
||
|
"load the ints as ints")
|
||
|
|
||
|
def test_const_array_3d(self):
|
||
|
sig = (complex64[:,:,:],)
|
||
|
jcuconst3d = cuda.jit(sig)(cuconst3d)
|
||
|
A = np.zeros_like(CONST3D, order='F')
|
||
|
jcuconst3d[1, (5, 5, 5)](A)
|
||
|
self.assertTrue(np.all(A == CONST3D))
|
||
|
|
||
|
if not ENABLE_CUDASIM:
|
||
|
asm = jcuconst3d.inspect_asm(sig)
|
||
|
complex_load = 'ld.const.v2.f32'
|
||
|
description = 'Load the complex as a vector of 2x f32'
|
||
|
self.assertIn(complex_load, asm, description)
|
||
|
|
||
|
def test_const_record_empty(self):
|
||
|
jcuconstRecEmpty = cuda.jit('void(int64[:])')(cuconstRecEmpty)
|
||
|
A = np.full(1, fill_value=-1, dtype=np.int64)
|
||
|
jcuconstRecEmpty[1, 1](A)
|
||
|
self.assertTrue(np.all(A == 0))
|
||
|
|
||
|
def test_const_record(self):
|
||
|
A = np.zeros(2, dtype=float)
|
||
|
B = np.zeros(2, dtype=int)
|
||
|
jcuconst = cuda.jit(cuconstRec).specialize(A, B)
|
||
|
|
||
|
jcuconst[2, 1](A, B)
|
||
|
np.testing.assert_allclose(A, CONST_RECORD['x'])
|
||
|
np.testing.assert_allclose(B, CONST_RECORD['y'])
|
||
|
|
||
|
def test_const_record_align(self):
|
||
|
A = np.zeros(2, dtype=np.float64)
|
||
|
B = np.zeros(2, dtype=np.float64)
|
||
|
C = np.zeros(2, dtype=np.float64)
|
||
|
D = np.zeros(2, dtype=np.float64)
|
||
|
E = np.zeros(2, dtype=np.float64)
|
||
|
jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)
|
||
|
|
||
|
jcuconst[2, 1](A, B, C, D, E)
|
||
|
np.testing.assert_allclose(A, CONST_RECORD_ALIGN['a'])
|
||
|
np.testing.assert_allclose(B, CONST_RECORD_ALIGN['b'])
|
||
|
np.testing.assert_allclose(C, CONST_RECORD_ALIGN['x'])
|
||
|
np.testing.assert_allclose(D, CONST_RECORD_ALIGN['y'])
|
||
|
np.testing.assert_allclose(E, CONST_RECORD_ALIGN['z'])
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|