ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudapy/test_nondet.py

50 lines
1.3 KiB
Python

import numpy as np
from numba import cuda, float32, void
from numba.cuda.testing import unittest, CUDATestCase
def generate_input(n):
A = np.array(np.arange(n * n).reshape(n, n), dtype=np.float32)
B = np.array(np.arange(n) + 0, dtype=A.dtype)
return A, B
class TestCudaNonDet(CUDATestCase):
def test_for_pre(self):
"""Test issue with loop not running due to bad sign-extension at the for
loop precondition.
"""
@cuda.jit(void(float32[:, :], float32[:, :], float32[:]))
def diagproduct(c, a, b):
startX, startY = cuda.grid(2)
gridX = cuda.gridDim.x * cuda.blockDim.x
gridY = cuda.gridDim.y * cuda.blockDim.y
height = c.shape[0]
width = c.shape[1]
for x in range(startX, width, (gridX)):
for y in range(startY, height, (gridY)):
c[y, x] = a[y, x] * b[x]
N = 8
A, B = generate_input(N)
F = np.empty(A.shape, dtype=A.dtype)
blockdim = (32, 8)
griddim = (1, 1)
dA = cuda.to_device(A)
dB = cuda.to_device(B)
dF = cuda.to_device(F, copy=False)
diagproduct[griddim, blockdim](dF, dA, dB)
E = np.dot(A, np.diag(B))
np.testing.assert_array_almost_equal(dF.copy_to_host(), E)
if __name__ == '__main__':
unittest.main()