608 lines
23 KiB
Python
608 lines
23 KiB
Python
|
"""
|
||
|
Enum values for CUDA driver. Information about the values
|
||
|
can be found on the official NVIDIA documentation website.
|
||
|
ref: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html
|
||
|
anchor: #group__CUDA__TYPES
|
||
|
"""
|
||
|
|
||
|
|
||
|
# Error codes
|
||
|
|
||
|
CUDA_SUCCESS = 0
|
||
|
CUDA_ERROR_INVALID_VALUE = 1
|
||
|
CUDA_ERROR_OUT_OF_MEMORY = 2
|
||
|
CUDA_ERROR_NOT_INITIALIZED = 3
|
||
|
CUDA_ERROR_DEINITIALIZED = 4
|
||
|
CUDA_ERROR_PROFILER_DISABLED = 5
|
||
|
CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6
|
||
|
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7
|
||
|
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8
|
||
|
CUDA_ERROR_STUB_LIBRARY = 34
|
||
|
CUDA_ERROR_DEVICE_UNAVAILABLE = 46
|
||
|
CUDA_ERROR_NO_DEVICE = 100
|
||
|
CUDA_ERROR_INVALID_DEVICE = 101
|
||
|
CUDA_ERROR_DEVICE_NOT_LICENSED = 102
|
||
|
CUDA_ERROR_INVALID_IMAGE = 200
|
||
|
CUDA_ERROR_INVALID_CONTEXT = 201
|
||
|
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202
|
||
|
CUDA_ERROR_MAP_FAILED = 205
|
||
|
CUDA_ERROR_UNMAP_FAILED = 206
|
||
|
CUDA_ERROR_ARRAY_IS_MAPPED = 207
|
||
|
CUDA_ERROR_ALREADY_MAPPED = 208
|
||
|
CUDA_ERROR_NO_BINARY_FOR_GPU = 209
|
||
|
CUDA_ERROR_ALREADY_ACQUIRED = 210
|
||
|
CUDA_ERROR_NOT_MAPPED = 211
|
||
|
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212
|
||
|
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213
|
||
|
CUDA_ERROR_ECC_UNCORRECTABLE = 214
|
||
|
CUDA_ERROR_UNSUPPORTED_LIMIT = 215
|
||
|
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216
|
||
|
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217
|
||
|
CUDA_ERROR_INVALID_PTX = 218
|
||
|
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219
|
||
|
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220
|
||
|
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221
|
||
|
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222
|
||
|
CUDA_ERROR_JIT_COMPILATION_DISABLED = 223
|
||
|
CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224
|
||
|
CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225
|
||
|
CUDA_ERROR_INVALID_SOURCE = 300
|
||
|
CUDA_ERROR_FILE_NOT_FOUND = 301
|
||
|
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302
|
||
|
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303
|
||
|
CUDA_ERROR_OPERATING_SYSTEM = 304
|
||
|
CUDA_ERROR_INVALID_HANDLE = 400
|
||
|
CUDA_ERROR_ILLEGAL_STATE = 401
|
||
|
CUDA_ERROR_NOT_FOUND = 500
|
||
|
CUDA_ERROR_NOT_READY = 600
|
||
|
CUDA_ERROR_LAUNCH_FAILED = 700
|
||
|
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701
|
||
|
CUDA_ERROR_LAUNCH_TIMEOUT = 702
|
||
|
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703
|
||
|
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704
|
||
|
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705
|
||
|
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708
|
||
|
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709
|
||
|
CUDA_ERROR_ASSERT = 710
|
||
|
CUDA_ERROR_TOO_MANY_PEERS = 711
|
||
|
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712
|
||
|
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713
|
||
|
CUDA_ERROR_HARDWARE_STACK_ERROR = 714
|
||
|
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715
|
||
|
CUDA_ERROR_MISALIGNED_ADDRESS = 716
|
||
|
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717
|
||
|
CUDA_ERROR_INVALID_PC = 718
|
||
|
CUDA_ERROR_LAUNCH_FAILED = 719
|
||
|
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720
|
||
|
CUDA_ERROR_NOT_PERMITTED = 800
|
||
|
CUDA_ERROR_NOT_SUPPORTED = 801
|
||
|
CUDA_ERROR_SYSTEM_NOT_READY = 802
|
||
|
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803
|
||
|
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804
|
||
|
CUDA_ERROR_MPS_CONNECTION_FAILED = 805
|
||
|
CUDA_ERROR_MPS_RPC_FAILURE = 806
|
||
|
CUDA_ERROR_MPS_SERVER_NOT_READY = 807
|
||
|
CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808
|
||
|
CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809
|
||
|
CUDA_ERROR_MPS_CLIENT_TERMINATED = 810
|
||
|
CUDA_ERROR_CDP_NOT_SUPPORTED = 811
|
||
|
CUDA_ERROR_CDP_VERSION_MISMATCH = 812
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900
|
||
|
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901
|
||
|
CUDA_ERROR_STREAM_CAPTURE_MERGE = 902
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904
|
||
|
CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905
|
||
|
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906
|
||
|
CUDA_ERROR_CAPTURED_EVENT = 907
|
||
|
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908
|
||
|
CUDA_ERROR_TIMEOUT = 909
|
||
|
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910
|
||
|
CUDA_ERROR_EXTERNAL_DEVICE = 911
|
||
|
CUDA_ERROR_INVALID_CLUSTER_SIZE = 912
|
||
|
CUDA_ERROR_UNKNOWN = 999
|
||
|
|
||
|
|
||
|
# Function cache configurations
|
||
|
|
||
|
# no preference for shared memory or L1 (default)
|
||
|
CU_FUNC_CACHE_PREFER_NONE = 0x00
|
||
|
# prefer larger shared memory and smaller L1 cache
|
||
|
CU_FUNC_CACHE_PREFER_SHARED = 0x01
|
||
|
# prefer larger L1 cache and smaller shared memory
|
||
|
CU_FUNC_CACHE_PREFER_L1 = 0x02
|
||
|
# prefer equal sized L1 cache and shared memory
|
||
|
CU_FUNC_CACHE_PREFER_EQUAL = 0x03
|
||
|
|
||
|
|
||
|
# Context creation flags
|
||
|
|
||
|
# Automatic scheduling
|
||
|
CU_CTX_SCHED_AUTO = 0x00
|
||
|
# Set spin as default scheduling
|
||
|
CU_CTX_SCHED_SPIN = 0x01
|
||
|
# Set yield as default scheduling
|
||
|
CU_CTX_SCHED_YIELD = 0x02
|
||
|
# Set blocking synchronization as default scheduling
|
||
|
CU_CTX_SCHED_BLOCKING_SYNC = 0x04
|
||
|
|
||
|
CU_CTX_SCHED_MASK = 0x07
|
||
|
# Support mapped pinned allocations
|
||
|
# This flag was deprecated as of CUDA 11.0 and it no longer has effect.
|
||
|
# All contexts as of CUDA 3.2 behave as though the flag is enabled.
|
||
|
CU_CTX_MAP_HOST = 0x08
|
||
|
# Keep local memory allocation after launch
|
||
|
CU_CTX_LMEM_RESIZE_TO_MAX = 0x10
|
||
|
# Trigger coredumps from exceptions in this context
|
||
|
CU_CTX_COREDUMP_ENABLE = 0x20
|
||
|
# Enable user pipe to trigger coredumps in this context
|
||
|
CU_CTX_USER_COREDUMP_ENABLE = 0x40
|
||
|
# Force synchronous blocking on cudaMemcpy/cudaMemset
|
||
|
CU_CTX_SYNC_MEMOPS = 0x80
|
||
|
|
||
|
CU_CTX_FLAGS_MASK = 0xff
|
||
|
|
||
|
|
||
|
# DEFINES
|
||
|
|
||
|
# If set, host memory is portable between CUDA contexts.
|
||
|
# Flag for cuMemHostAlloc()
|
||
|
CU_MEMHOSTALLOC_PORTABLE = 0x01
|
||
|
|
||
|
# If set, host memory is mapped into CUDA address space and
|
||
|
# cuMemHostGetDevicePointer() may be called on the host pointer.
|
||
|
# Flag for cuMemHostAlloc()
|
||
|
CU_MEMHOSTALLOC_DEVICEMAP = 0x02
|
||
|
|
||
|
# If set, host memory is allocated as write-combined - fast to write,
|
||
|
# faster to DMA, slow to read except via SSE4 streaming load instruction
|
||
|
# (MOVNTDQA).
|
||
|
# Flag for cuMemHostAlloc()
|
||
|
CU_MEMHOSTALLOC_WRITECOMBINED = 0x04
|
||
|
|
||
|
|
||
|
# If set, host memory is portable between CUDA contexts.
|
||
|
# Flag for cuMemHostRegister()
|
||
|
CU_MEMHOSTREGISTER_PORTABLE = 0x01
|
||
|
|
||
|
# If set, host memory is mapped into CUDA address space and
|
||
|
# cuMemHostGetDevicePointer() may be called on the host pointer.
|
||
|
# Flag for cuMemHostRegister()
|
||
|
CU_MEMHOSTREGISTER_DEVICEMAP = 0x02
|
||
|
|
||
|
# If set, the passed memory pointer is treated as pointing to some
|
||
|
# memory-mapped I/O space, e.g. belonging to a third-party PCIe device.
|
||
|
# On Windows the flag is a no-op. On Linux that memory is marked
|
||
|
# as non cache-coherent for the GPU and is expected
|
||
|
# to be physically contiguous. It may return CUDA_ERROR_NOT_PERMITTED
|
||
|
# if run as an unprivileged user, CUDA_ERROR_NOT_SUPPORTED on older
|
||
|
# Linux kernel versions. On all other platforms, it is not supported
|
||
|
# and CUDA_ERROR_NOT_SUPPORTED is returned.
|
||
|
# Flag for cuMemHostRegister()
|
||
|
CU_MEMHOSTREGISTER_IOMEMORY = 0x04
|
||
|
|
||
|
# If set, the passed memory pointer is treated as pointing to memory
|
||
|
# that is considered read-only by the device. On platforms without
|
||
|
# CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
|
||
|
# this flag is required in order to register memory mapped
|
||
|
# to the CPU as read-only. Support for the use of this flag can be
|
||
|
# queried from the device attribute
|
||
|
# CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED.
|
||
|
# Using this flag with a current context associated with a device
|
||
|
# that does not have this attribute set will cause cuMemHostRegister
|
||
|
# to error with CUDA_ERROR_NOT_SUPPORTED.
|
||
|
CU_MEMHOSTREGISTER_READ_ONLY = 0x08
|
||
|
|
||
|
|
||
|
# CUDA Mem Attach Flags
|
||
|
|
||
|
# If set, managed memory is accessible from all streams on all devices.
|
||
|
CU_MEM_ATTACH_GLOBAL = 0x01
|
||
|
|
||
|
# If set on a platform where the device attribute
|
||
|
# cudaDevAttrConcurrentManagedAccess is zero, then managed memory is
|
||
|
# only accessible on the host (unless explicitly attached to a stream
|
||
|
# with cudaStreamAttachMemAsync, in which case it can be used in kernels
|
||
|
# launched on that stream).
|
||
|
CU_MEM_ATTACH_HOST = 0x02
|
||
|
|
||
|
# If set on a platform where the device attribute
|
||
|
# cudaDevAttrConcurrentManagedAccess is zero, then managed memory accesses
|
||
|
# on the associated device must only be from a single stream.
|
||
|
CU_MEM_ATTACH_SINGLE = 0x04
|
||
|
|
||
|
|
||
|
# Event creation flags
|
||
|
|
||
|
# Default event flag
|
||
|
CU_EVENT_DEFAULT = 0x0
|
||
|
# Event uses blocking synchronization
|
||
|
CU_EVENT_BLOCKING_SYNC = 0x1
|
||
|
# Event will not record timing data
|
||
|
CU_EVENT_DISABLE_TIMING = 0x2
|
||
|
# Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set
|
||
|
CU_EVENT_INTERPROCESS = 0x4
|
||
|
|
||
|
|
||
|
# Pointer information
|
||
|
|
||
|
# The CUcontext on which a pointer was allocated or registered
|
||
|
CU_POINTER_ATTRIBUTE_CONTEXT = 1
|
||
|
# The CUmemorytype describing the physical location of a pointer
|
||
|
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2
|
||
|
# The address at which a pointer's memory may be accessed on the device
|
||
|
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
|
||
|
# The address at which a pointer's memory may be accessed on the host
|
||
|
CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
|
||
|
# A pair of tokens for use with the nv-p2p.h Linux kernel interface
|
||
|
CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5
|
||
|
# Synchronize every synchronous memory operation initiated on this region
|
||
|
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6
|
||
|
# A process-wide unique ID for an allocated memory region
|
||
|
CU_POINTER_ATTRIBUTE_BUFFER_ID = 7
|
||
|
# Indicates if the pointer points to managed memory
|
||
|
CU_POINTER_ATTRIBUTE_IS_MANAGED = 8
|
||
|
# A device ordinal of a device on which a pointer was allocated or registered
|
||
|
CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9
|
||
|
# 1 if this pointer maps to an allocation
|
||
|
# that is suitable for cudaIpcGetMemHandle, 0 otherwise
|
||
|
CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10
|
||
|
# Starting address for this requested pointer
|
||
|
CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11
|
||
|
# Size of the address range for this requested pointer
|
||
|
CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12
|
||
|
# 1 if this pointer is in a valid address range
|
||
|
# that is mapped to a backing allocation, 0 otherwise
|
||
|
CU_POINTER_ATTRIBUTE_MAPPED = 13
|
||
|
# Bitmask of allowed CUmemAllocationHandleType for this allocation
|
||
|
CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14
|
||
|
# 1 if the memory this pointer is referencing
|
||
|
# can be used with the GPUDirect RDMA API
|
||
|
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15
|
||
|
# Returns the access flags the device associated
|
||
|
# with the current context has on the corresponding
|
||
|
# memory referenced by the pointer given
|
||
|
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16
|
||
|
# Returns the mempool handle for the allocation
|
||
|
# if it was allocated from a mempool. Otherwise returns NULL
|
||
|
CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17
|
||
|
# Size of the actual underlying mapping that the pointer belongs to
|
||
|
CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18
|
||
|
# The start address of the mapping that the pointer belongs to
|
||
|
CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19
|
||
|
# A process-wide unique id corresponding to the
|
||
|
# physical allocation the pointer belongs to
|
||
|
CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20
|
||
|
|
||
|
|
||
|
# Memory types
|
||
|
|
||
|
# Host memory
|
||
|
CU_MEMORYTYPE_HOST = 0x01
|
||
|
# Device memory
|
||
|
CU_MEMORYTYPE_DEVICE = 0x02
|
||
|
# Array memory
|
||
|
CU_MEMORYTYPE_ARRAY = 0x03
|
||
|
# Unified device or host memory
|
||
|
CU_MEMORYTYPE_UNIFIED = 0x04
|
||
|
|
||
|
|
||
|
# Device code formats
|
||
|
|
||
|
# Compiled device-class-specific device code
|
||
|
# Applicable options: none
|
||
|
CU_JIT_INPUT_CUBIN = 0
|
||
|
|
||
|
# PTX source code
|
||
|
# Applicable options: PTX compiler options
|
||
|
CU_JIT_INPUT_PTX = 1
|
||
|
|
||
|
# Bundle of multiple cubins and/or PTX of some device code
|
||
|
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||
|
CU_JIT_INPUT_FATBINARY = 2
|
||
|
|
||
|
# Host object with embedded device code
|
||
|
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||
|
CU_JIT_INPUT_OBJECT = 3
|
||
|
|
||
|
# Archive of host objects with embedded device code
|
||
|
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
||
|
CU_JIT_INPUT_LIBRARY = 4
|
||
|
|
||
|
CU_JIT_NUM_INPUT_TYPES = 6
|
||
|
|
||
|
|
||
|
# Online compiler and linker options
|
||
|
|
||
|
# Max number of registers that a thread may use.
|
||
|
# Option type: unsigned int
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_MAX_REGISTERS = 0
|
||
|
|
||
|
# IN: Specifies minimum number of threads per block to target compilation
|
||
|
# for
|
||
|
# OUT: Returns the number of threads the compiler actually targeted.
|
||
|
# This restricts the resource utilization fo the compiler (e.g. max
|
||
|
# registers) such that a block with the given number of threads should be
|
||
|
# able to launch based on register limitations. Note, this option does not
|
||
|
# currently take into account any other resource limitations, such as
|
||
|
# shared memory utilization.
|
||
|
# Cannot be combined with ::CU_JIT_TARGET.
|
||
|
# Option type: unsigned int
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_THREADS_PER_BLOCK = 1
|
||
|
|
||
|
# Overwrites the option value with the total wall clock time, in
|
||
|
# milliseconds, spent in the compiler and linker
|
||
|
# Option type: float
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_WALL_TIME = 2
|
||
|
|
||
|
# Pointer to a buffer in which to print any log messages
|
||
|
# that are informational in nature (the buffer size is specified via
|
||
|
# option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)
|
||
|
# Option type: char *
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_INFO_LOG_BUFFER = 3
|
||
|
|
||
|
# IN: Log buffer size in bytes. Log messages will be capped at this size
|
||
|
# (including null terminator)
|
||
|
# OUT: Amount of log buffer filled with messages
|
||
|
# Option type: unsigned int
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
|
||
|
|
||
|
# Pointer to a buffer in which to print any log messages that
|
||
|
# reflect errors (the buffer size is specified via option
|
||
|
# ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)
|
||
|
# Option type: char *
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_ERROR_LOG_BUFFER = 5
|
||
|
|
||
|
# IN: Log buffer size in bytes. Log messages will be capped at this size
|
||
|
# (including null terminator)
|
||
|
# OUT: Amount of log buffer filled with messages
|
||
|
# Option type: unsigned int
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
|
||
|
|
||
|
# Level of optimizations to apply to generated code (0 - 4), with 4
|
||
|
# being the default and highest level of optimizations.
|
||
|
# Option type: unsigned int
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_OPTIMIZATION_LEVEL = 7
|
||
|
|
||
|
# No option value required. Determines the target based on the current
|
||
|
# attached context (default)
|
||
|
# Option type: No option value needed
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_TARGET_FROM_CUCONTEXT = 8
|
||
|
|
||
|
# Target is chosen based on supplied ::CUjit_target. Cannot be
|
||
|
# combined with ::CU_JIT_THREADS_PER_BLOCK.
|
||
|
# Option type: unsigned int for enumerated type ::CUjit_target
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_TARGET = 9
|
||
|
|
||
|
# Specifies choice of fallback strategy if matching cubin is not found.
|
||
|
# Choice is based on supplied ::CUjit_fallback.
|
||
|
# Option type: unsigned int for enumerated type ::CUjit_fallback
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_FALLBACK_STRATEGY = 10
|
||
|
|
||
|
# Specifies whether to create debug information in output (-g)
|
||
|
# (0: false, default)
|
||
|
# Option type: int
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_GENERATE_DEBUG_INFO = 11
|
||
|
|
||
|
# Generate verbose log messages (0: false, default)
|
||
|
# Option type: int
|
||
|
# Applies to: compiler and linker
|
||
|
CU_JIT_LOG_VERBOSE = 12
|
||
|
|
||
|
# Generate line number information (-lineinfo) (0: false, default)
|
||
|
# Option type: int
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_GENERATE_LINE_INFO = 13
|
||
|
|
||
|
# Specifies whether to enable caching explicitly (-dlcm)
|
||
|
# Choice is based on supplied ::CUjit_cacheMode_enum.
|
||
|
# Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum
|
||
|
# Applies to: compiler only
|
||
|
CU_JIT_CACHE_MODE = 14
|
||
|
|
||
|
|
||
|
# CUfunction_attribute
|
||
|
|
||
|
# The maximum number of threads per block, beyond which a launch of the
|
||
|
# function would fail. This number depends on both the function and the
|
||
|
# device on which the function is currently loaded.
|
||
|
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0
|
||
|
|
||
|
# The size in bytes of statically-allocated shared memory required by
|
||
|
# this function. This does not include dynamically-allocated shared
|
||
|
# memory requested by the user at runtime.
|
||
|
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1
|
||
|
|
||
|
# The size in bytes of user-allocated constant memory required by this
|
||
|
# function.
|
||
|
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2
|
||
|
|
||
|
# The size in bytes of local memory used by each thread of this function.
|
||
|
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3
|
||
|
|
||
|
# The number of registers used by each thread of this function.
|
||
|
CU_FUNC_ATTRIBUTE_NUM_REGS = 4
|
||
|
|
||
|
# The PTX virtual architecture version for which the function was
|
||
|
# compiled. This value is the major PTX version * 10 + the minor PTX
|
||
|
# version, so a PTX version 1.3 function would return the value 13.
|
||
|
# Note that this may return the undefined value of 0 for cubins
|
||
|
# compiled prior to CUDA 3.0.
|
||
|
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5
|
||
|
|
||
|
# The binary architecture version for which the function was compiled.
|
||
|
# This value is the major binary version * 10 + the minor binary version,
|
||
|
# so a binary version 1.3 function would return the value 13. Note that
|
||
|
# this will return a value of 10 for legacy cubins that do not have a
|
||
|
# properly-encoded binary architecture version.
|
||
|
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6
|
||
|
|
||
|
# The attribute to indicate whether the function has been compiled
|
||
|
# with user specified option "-Xptxas --dlcm=ca" set
|
||
|
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7
|
||
|
|
||
|
# The maximum size in bytes of dynamically-allocated shared memory
|
||
|
# that can be used by this function. If the user-specified
|
||
|
# dynamic shared memory size is larger than this value,
|
||
|
# the launch will fail. See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8
|
||
|
|
||
|
# On devices where the L1 cache and shared memory use the same
|
||
|
# hardware resources, this sets the shared memory carveout preference,
|
||
|
# in percent of the total shared memory. Refer to
|
||
|
# CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
|
||
|
# This is only a hint, and the driver can choose a different ratio
|
||
|
# if required to execute the function.
|
||
|
# See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9
|
||
|
|
||
|
# If this attribute is set, the kernel must launch with a valid cluster
|
||
|
# size specified. See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = 10
|
||
|
|
||
|
# The required cluster width in blocks. The values must either all be 0
|
||
|
# or all be positive. The validity of the cluster dimensions
|
||
|
# is otherwise checked at launch time. If the value is set during
|
||
|
# compile time, it cannot be set at runtime.
|
||
|
# Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
|
||
|
# See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = 11
|
||
|
|
||
|
# The required cluster height in blocks. The values must either all be 0
|
||
|
# or all be positive. The validity of the cluster dimensions
|
||
|
# is otherwise checked at launch time.If the value is set during
|
||
|
# compile time, it cannot be set at runtime.
|
||
|
# Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
|
||
|
# See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = 12
|
||
|
|
||
|
# The required cluster depth in blocks. The values must either all be 0
|
||
|
# or all be positive. The validity of the cluster dimensions
|
||
|
# is otherwise checked at launch time.If the value is set during
|
||
|
# compile time, it cannot be set at runtime.
|
||
|
# Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
|
||
|
# See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = 13
|
||
|
|
||
|
# Whether the function can be launched with non-portable cluster size.
|
||
|
# 1 is allowed, 0 is disallowed. A non-portable cluster size may only
|
||
|
# function on the specific SKUs the program is tested on.
|
||
|
# The launch might fail if the program is run on a different hardware platform.
|
||
|
# For more details refer to link :
|
||
|
# https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES
|
||
|
CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = 14
|
||
|
|
||
|
# The block scheduling policy of a function.
|
||
|
# The value type is CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy.
|
||
|
# See cuFuncSetAttribute, cuKernelSetAttribute
|
||
|
CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 15
|
||
|
|
||
|
|
||
|
# Device attributes
|
||
|
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
|
||
|
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9
|
||
|
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12
|
||
|
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
|
||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14
|
||
|
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15
|
||
|
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
|
||
|
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17
|
||
|
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_WIDTH = 21
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_WIDTH = 22
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_HEIGHT = 23
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH = 24
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT = 25
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH = 26
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_WIDTH = 27
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_HEIGHT = 28
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_LAYERS = 29
|
||
|
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30
|
||
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31
|
||
|
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34
|
||
|
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
|
||
|
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
|
||
|
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37
|
||
|
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTI_PROCESSOR = 39
|
||
|
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40
|
||
|
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_WIDTH = 42
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_LAYERS = 43
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_WIDTH = 45
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_HEIGHT = 46
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH_ALT = 47
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT_ALT = 48
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH_ALT = 49
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50
|
||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_WIDTH = 52
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_WIDTH = 53
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_LAYERS = 54
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_WIDTH = 55
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_WIDTH = 56
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_HEIGHT = 57
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_WIDTH = 58
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_HEIGHT = 59
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_DEPTH = 60
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_WIDTH = 61
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_LAYERS = 62
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_WIDTH = 63
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_HEIGHT = 64
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_LAYERS = 65
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_WIDTH = 66
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_WIDTH = 67
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_LAYERS = 68
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LINEAR_WIDTH = 69
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_WIDTH = 70
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_HEIGHT = 71
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_PITCH = 72
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_WIDTH = 73
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_MAX_TEXTURE_2D_MIPMAPPED_HEIGHT = 74
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_MIPMAPPED_WIDTH = 77
|
||
|
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78
|
||
|
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79
|
||
|
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
|
||
|
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83
|
||
|
CU_DEVICE_ATTRIBUTE_IS_MULTI_GPU_BOARD = 84
|
||
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85
|
||
|
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86
|
||
|
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87
|
||
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
|
||
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91
|
||
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95
|
||
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97
|