ai-content-maker/.venv/Lib/site-packages/numba/cuda/libdevicefuncs.py

1058 lines
37 KiB
Python

from collections import namedtuple
from textwrap import indent
from numba.types import float32, float64, int16, int32, int64, void, Tuple
from numba.core.typing.templates import signature
arg = namedtuple("arg", ("name", "ty", "is_ptr"))
functions = {
"__nv_abs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_acos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_acosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_acosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_acoshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_asin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_asinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_asinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_asinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_atan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_atan2": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_atan2f": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_atanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_atanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_atanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_brev": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_brevll": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
"__nv_byte_perm": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
arg(name="z", ty=int32, is_ptr=False),
],
),
"__nv_cbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_cbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_ceil": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_ceilf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_clz": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_clzll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
"__nv_copysign": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_copysignf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_cos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_cosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_coshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_cospi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_cospif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_dadd_rd": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dadd_rn": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dadd_ru": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dadd_rz": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_ddiv_rd": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_ddiv_rn": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_ddiv_ru": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_ddiv_rz": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dmul_rd": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dmul_rn": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dmul_ru": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_dmul_rz": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_double2float_rd": (
float32,
[arg(name="d", ty=float64, is_ptr=False)],
),
"__nv_double2float_rn": (
float32,
[arg(name="d", ty=float64, is_ptr=False)],
),
"__nv_double2float_ru": (
float32,
[arg(name="d", ty=float64, is_ptr=False)],
),
"__nv_double2float_rz": (
float32,
[arg(name="d", ty=float64, is_ptr=False)],
),
"__nv_double2hiint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2int_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2int_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2int_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2int_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2ll_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ll_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ll_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ll_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2loint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2uint_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2uint_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2uint_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2uint_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
"__nv_double2ull_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ull_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ull_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double2ull_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_double_as_longlong": (
int64,
[arg(name="x", ty=float64, is_ptr=False)],
),
"__nv_drcp_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_drcp_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_drcp_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_drcp_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_dsqrt_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_dsqrt_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_dsqrt_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_dsqrt_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erfc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erfcf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_erfcinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erfcinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_erfcx": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erfcxf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_erff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_erfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_erfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_exp": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_exp10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_exp2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_exp2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_expm1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_expm1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fabs": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_fabsf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_fadd_rd": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fadd_rn": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fadd_ru": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fadd_rz": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fast_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_fdividef": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fast_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_powf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fast_sincosf": (
void,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="sptr", ty=float32, is_ptr=True),
arg(name="cptr", ty=float32, is_ptr=True),
],
),
"__nv_fast_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fast_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fdim": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_fdimf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fdiv_rd": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fdiv_rn": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fdiv_ru": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fdiv_rz": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_ffs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_ffsll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
"__nv_finitef": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_float2half_rn": (int16, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2int_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2int_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2int_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2int_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2ll_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ll_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ll_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ll_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2uint_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2uint_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2uint_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2uint_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
"__nv_float2ull_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ull_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ull_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float2ull_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_float_as_int": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_floor": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
"__nv_floorf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
"__nv_fma": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="z", ty=float64, is_ptr=False),
],
),
"__nv_fma_rd": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="z", ty=float64, is_ptr=False),
],
),
"__nv_fma_rn": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="z", ty=float64, is_ptr=False),
],
),
"__nv_fma_ru": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="z", ty=float64, is_ptr=False),
],
),
"__nv_fma_rz": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="z", ty=float64, is_ptr=False),
],
),
"__nv_fmaf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="z", ty=float32, is_ptr=False),
],
),
"__nv_fmaf_rd": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="z", ty=float32, is_ptr=False),
],
),
"__nv_fmaf_rn": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="z", ty=float32, is_ptr=False),
],
),
"__nv_fmaf_ru": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="z", ty=float32, is_ptr=False),
],
),
"__nv_fmaf_rz": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="z", ty=float32, is_ptr=False),
],
),
"__nv_fmax": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_fmaxf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmin": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_fminf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmod": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_fmodf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmul_rd": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmul_rn": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmul_ru": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fmul_rz": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_frcp_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_frcp_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_frcp_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_frcp_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_frexp": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="b", ty=int32, is_ptr=True),
],
),
"__nv_frexpf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="b", ty=int32, is_ptr=True),
],
),
"__nv_frsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fsqrt_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fsqrt_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fsqrt_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_fsub_rd": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fsub_rn": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fsub_ru": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_fsub_rz": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_hadd": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_half2float": (float32, [arg(name="h", ty=int16, is_ptr=False)]),
"__nv_hiloint2double": (
float64,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_hypot": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_hypotf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_ilogb": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_ilogbf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_int2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
"__nv_int2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_int2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_int2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_int2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_int_as_float": (float32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_isfinited": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_isinfd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_isinff": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_isnand": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_isnanf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_j0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_j0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_j1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_j1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_jn": (
float64,
[
arg(name="n", ty=int32, is_ptr=False),
arg(name="x", ty=float64, is_ptr=False),
],
),
"__nv_jnf": (
float32,
[
arg(name="n", ty=int32, is_ptr=False),
arg(name="x", ty=float32, is_ptr=False),
],
),
"__nv_ldexp": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_ldexpf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_lgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_lgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_ll2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ll2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_llabs": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
"__nv_llmax": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_llmin": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_llrint": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_llrintf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_llround": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_llroundf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_log": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_log10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_log1p": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_log1pf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_log2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_logb": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_logbf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_longlong_as_double": (
float64,
[arg(name="x", ty=int64, is_ptr=False)],
),
"__nv_max": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_min": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_modf": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="b", ty=float64, is_ptr=True),
],
),
"__nv_modff": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="b", ty=float32, is_ptr=True),
],
),
"__nv_mul24": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_mul64hi": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_mulhi": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
# __nv_nan and __nv_nanf are excluded - they return a representation of a
# quiet NaN, but the argument they take seems to be undocumented, and
# follows a strange form - it is not an output like every other pointer
# argument. If a NaN is required, one can be obtained in CUDA Python by
# other means, e.g. `math.nan`. They are left in this list for completeness
# / reference.
# "__nv_nan": (float64, [arg(name="tagp", ty=int8, is_ptr=True)]),
# "__nv_nanf": (float32, [arg(name="tagp", ty=int8, is_ptr=True)]),
"__nv_nearbyint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_nearbyintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_nextafter": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_nextafterf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_normcdf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_normcdff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_normcdfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_normcdfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_popc": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
"__nv_popcll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
"__nv_pow": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_powf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_powi": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_powif": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_rcbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_rcbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_remainder": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
],
),
"__nv_remainderf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
],
),
"__nv_remquo": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=float64, is_ptr=False),
arg(name="c", ty=int32, is_ptr=True),
],
),
"__nv_remquof": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=float32, is_ptr=False),
arg(name="quo", ty=int32, is_ptr=True),
],
),
"__nv_rhadd": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_rint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_rintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_round": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_roundf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_rsqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_rsqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_sad": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
arg(name="z", ty=int32, is_ptr=False),
],
),
"__nv_saturatef": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_scalbn": (
float64,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_scalbnf": (
float32,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_signbitd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_signbitf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_sin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_sincos": (
void,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="sptr", ty=float64, is_ptr=True),
arg(name="cptr", ty=float64, is_ptr=True),
],
),
"__nv_sincosf": (
void,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="sptr", ty=float32, is_ptr=True),
arg(name="cptr", ty=float32, is_ptr=True),
],
),
"__nv_sincospi": (
void,
[
arg(name="x", ty=float64, is_ptr=False),
arg(name="sptr", ty=float64, is_ptr=True),
arg(name="cptr", ty=float64, is_ptr=True),
],
),
"__nv_sincospif": (
void,
[
arg(name="x", ty=float32, is_ptr=False),
arg(name="sptr", ty=float32, is_ptr=True),
arg(name="cptr", ty=float32, is_ptr=True),
],
),
"__nv_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_sinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_sinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_sinpi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_sinpif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_sqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_sqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_tan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_tanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_tanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_tgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_tgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_trunc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_truncf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_uhadd": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_uint2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
"__nv_uint2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_uint2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_uint2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_uint2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
"__nv_ull2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ull2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
"__nv_ullmax": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_ullmin": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_umax": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_umin": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_umul24": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_umul64hi": (
int64,
[
arg(name="x", ty=int64, is_ptr=False),
arg(name="y", ty=int64, is_ptr=False),
],
),
"__nv_umulhi": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_urhadd": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
],
),
"__nv_usad": (
int32,
[
arg(name="x", ty=int32, is_ptr=False),
arg(name="y", ty=int32, is_ptr=False),
arg(name="z", ty=int32, is_ptr=False),
],
),
"__nv_y0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_y0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_y1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
"__nv_y1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
"__nv_yn": (
float64,
[
arg(name="n", ty=int32, is_ptr=False),
arg(name="x", ty=float64, is_ptr=False),
],
),
"__nv_ynf": (
float32,
[
arg(name="n", ty=int32, is_ptr=False),
arg(name="x", ty=float32, is_ptr=False),
],
),
}
def create_signature(retty, args):
"""
Given the return type and arguments for a libdevice function, return the
signature of the stub function used to call it from CUDA Python.
"""
# Any pointer arguments should be part of the return type.
return_types = [arg.ty for arg in args if arg.is_ptr]
# If the return type is void, there is no point adding it to the list of
# return types.
if retty != void:
return_types.insert(0, retty)
if len(return_types) > 1:
retty = Tuple(return_types)
else:
retty = return_types[0]
argtypes = [arg.ty for arg in args if not arg.is_ptr]
return signature(retty, *argtypes)
# The following code generates the stubs for libdevice functions.
#
# Stubs can be regenerated (e.g. if the functions dict above is modified) with:
#
# python -c "from numba.cuda.libdevicefuncs import generate_stubs; \
# generate_stubs()" > numba/cuda/libdevice.py
docstring_template = """
See https://docs.nvidia.com/cuda/libdevice-users-guide/{func}.html
{param_types}
:rtype: {retty}
"""
param_template = """\
:param {a.name}: Argument.
:type {a.name}: {a.ty}"""
def generate_stubs():
for name, (retty, args) in functions.items():
# Some libdevice functions have arguments called `in`, which causes a
# syntax error in Python, so we rename these to `x`.
def argname(arg):
if arg.name == "in":
return "x"
else:
return arg.name
argnames = [argname(a) for a in args if not a.is_ptr]
argstr = ", ".join(argnames)
signature = create_signature(retty, args)
param_types = "\n".join(
[param_template.format(a=a) for a in args if not a.is_ptr]
)
docstring = docstring_template.format(
param_types=param_types, retty=signature.return_type, func=name
)
docstring = indent(docstring, " ")
print(f'def {name[5:]}({argstr}):\n """{docstring}"""\n\n')