ai-content-maker/.venv/Lib/site-packages/numba/cuda/tests/cudapy/test_minmax.py

114 lines
2.4 KiB
Python

import numpy as np
from numba import cuda, float64
from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
def builtin_max(A, B, C):
i = cuda.grid(1)
if i >= len(C):
return
C[i] = float64(max(A[i], B[i]))
def builtin_min(A, B, C):
i = cuda.grid(1)
if i >= len(C):
return
C[i] = float64(min(A[i], B[i]))
@skip_on_cudasim('Tests PTX emission')
class TestCudaMinMax(CUDATestCase):
def _run(
self,
kernel,
numpy_equivalent,
ptx_instruction,
dtype_left,
dtype_right,
n=5):
kernel = cuda.jit(kernel)
c = np.zeros(n, dtype=np.float64)
a = np.arange(n, dtype=dtype_left) + .5
b = np.full(n, fill_value=2, dtype=dtype_right)
kernel[1, c.shape](a, b, c)
np.testing.assert_allclose(c, numpy_equivalent(a, b))
ptx = next(p for p in kernel.inspect_asm().values())
self.assertIn(ptx_instruction, ptx)
def test_max_f8f8(self):
self._run(
builtin_max,
np.maximum,
'max.f64',
np.float64,
np.float64)
def test_max_f4f8(self):
self._run(
builtin_max,
np.maximum,
'max.f64',
np.float32,
np.float64)
def test_max_f8f4(self):
self._run(
builtin_max,
np.maximum,
'max.f64',
np.float64,
np.float32)
def test_max_f4f4(self):
self._run(
builtin_max,
np.maximum,
'max.f32',
np.float32,
np.float32)
def test_min_f8f8(self):
self._run(
builtin_min,
np.minimum,
'min.f64',
np.float64,
np.float64)
def test_min_f4f8(self):
self._run(
builtin_min,
np.minimum,
'min.f64',
np.float32,
np.float64)
def test_min_f8f4(self):
self._run(
builtin_min,
np.minimum,
'min.f64',
np.float64,
np.float32)
def test_min_f4f4(self):
self._run(
builtin_min,
np.minimum,
'min.f32',
np.float32,
np.float32)
if __name__ == '__main__':
unittest.main()