ai-content-maker/.venv/Lib/site-packages/torch/include/ATen/dlpack.h

/*!
 *  Copyright (c) 2017 by Contributors
 * \file dlpack.h
 * \brief The common header of DLPack.
 */
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_

/**
 * \brief Compatibility with C++
 */
#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif

/*! \brief The current version of dlpack */
#define DLPACK_VERSION 80

/*! \brief The current ABI version of dlpack */
#define DLPACK_ABI_VERSION 1

/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif

#include <stdint.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif
/*!
 * \brief The device type in DLDevice.
 */
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
  /*! \brief CPU device */
  kDLCPU = 1,
  /*! \brief CUDA GPU device */
  kDLCUDA = 2,
  /*!
   * \brief Pinned CUDA CPU memory by cudaMallocHost
   */
  kDLCUDAHost = 3,
  /*! \brief OpenCL devices. */
  kDLOpenCL = 4,
  /*! \brief Vulkan buffer for next generation graphics. */
  kDLVulkan = 7,
  /*! \brief Metal for Apple GPU. */
  kDLMetal = 8,
  /*! \brief Verilog simulator buffer */
  kDLVPI = 9,
  /*! \brief ROCm GPUs for AMD GPUs */
  kDLROCM = 10,
  /*!
   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
   */
  kDLROCMHost = 11,
  /*!
   * \brief Reserved extension device type,
   * used for quickly test extension device
   * The semantics can differ depending on the implementation.
   */
  kDLExtDev = 12,
  /*!
   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
   */
  kDLCUDAManaged = 13,
  /*!
   * \brief Unified shared memory allocated on a oneAPI non-partititioned
   * device. Call to oneAPI runtime is required to determine the device
   * type, the USM allocation type and the sycl context it is bound to.
   *
   */
  kDLOneAPI = 14,
  /*! \brief GPU support for next generation WebGPU standard. */
  kDLWebGPU = 15,
  /*! \brief Qualcomm Hexagon DSP */
  kDLHexagon = 16,
} DLDeviceType;

/*!
 * \brief A Device for Tensor and operator.
 */
typedef struct {
  /*! \brief The device type used in the device. */
  DLDeviceType device_type;
  /*!
   * \brief The device index.
   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
   */
  int32_t device_id;
} DLDevice;

/*!
 * \brief The type code options DLDataType.
 */
typedef enum {
  /*! \brief signed integer */
  kDLInt = 0U,
  /*! \brief unsigned integer */
  kDLUInt = 1U,
  /*! \brief IEEE floating point */
  kDLFloat = 2U,
  /*!
   * \brief Opaque handle type, reserved for testing purposes.
   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
   */
  kDLOpaqueHandle = 3U,
  /*! \brief bfloat16 */
  kDLBfloat = 4U,
  /*!
   * \brief complex number
   * (C/C++/Python layout: compact struct per complex number)
   */
  kDLComplex = 5U,
  /*! \brief boolean */
  kDLBool = 6U,
} DLDataTypeCode;

/*!
 * \brief The data type the tensor can hold. The data type is assumed to follow the
 * native endian-ness. An explicit error message should be raised when attempting to
 * export an array with non-native endianness
 *
 *  Examples
 *   - float: type_code = 2, bits = 32, lanes = 1
 *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
 *   - int8: type_code = 0, bits = 8, lanes = 1
 *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
 *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
 */
typedef struct {
  /*!
   * \brief Type code of base types.
   * We keep it uint8_t instead of DLDataTypeCode for minimal memory
   * footprint, but the value should be one of DLDataTypeCode enum values.
   * */
  uint8_t code;
  /*!
   * \brief Number of bits, common choices are 8, 16, 32.
   */
  uint8_t bits;
  /*! \brief Number of lanes in the type, used for vector types. */
  uint16_t lanes;
} DLDataType;

/*!
 * \brief Plain C Tensor object, does not manage memory.
 */
typedef struct {
  /*!
   * \brief The data pointer points to the allocated data. This will be CUDA
   * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
   * types. This pointer is always aligned to 256 bytes as in CUDA. The
   * `byte_offset` field should be used to point to the beginning of the data.
   *
   * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
   * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
   * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
   * (after which this note will be updated); at the moment it is recommended
   * to not rely on the data pointer being correctly aligned.
   *
   * For given DLTensor, the size of memory required to store the contents of
   * data is calculated as follows:
   *
   * \code{.c}
   * static inline size_t GetDataSize(const DLTensor* t) {
   *   size_t size = 1;
   *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
   *     size *= t->shape[i];
   *   }
   *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
   *   return size;
   * }
   * \endcode
   */
  void* data;
  /*! \brief The device of the tensor */
  DLDevice device;
  /*! \brief Number of dimensions */
  int32_t ndim;
  /*! \brief The data type of the pointer*/
  DLDataType dtype;
  /*! \brief The shape of the tensor */
  const int64_t* shape;
  /*!
   * \brief strides of the tensor (in number of elements, not bytes)
   *  can be NULL, indicating tensor is compact and row-majored.
   */
  const int64_t* strides;
  /*! \brief The offset in bytes to the beginning pointer to data */
  uint64_t byte_offset;
} DLTensor;

/*!
 * \brief C Tensor object, manage memory of DLTensor. This data structure is
 *  intended to facilitate the borrowing of DLTensor by another framework. It is
 *  not meant to transfer the tensor. When the borrowing framework doesn't need
 *  the tensor, it should call the deleter to notify the host that the resource
 *  is no longer needed.
 */
typedef struct DLManagedTensor {
  /*! \brief DLTensor which is being memory managed */
  DLTensor dl_tensor;
  /*! \brief the context of the original host framework of DLManagedTensor in
   *   which DLManagedTensor is used in the framework. It can also be NULL.
   */
  void * manager_ctx;
  /*! \brief Destructor signature void (*)(void*) - this should be called
   *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
   *   if there is no way for the caller to provide a reasonable destructor.
   *   The destructors deletes the argument self as well.
   */
  void (*deleter)(struct DLManagedTensor * self);
} DLManagedTensor;
#ifdef __cplusplus
}  // DLPACK_EXTERN_C
#endif
#endif  // DLPACK_DLPACK_H_
first commit 2024-05-03 04:18:51 +03:00			`/*!`
			`* Copyright (c) 2017 by Contributors`
			`* \file dlpack.h`
			`* \brief The common header of DLPack.`
			`*/`
			`#ifndef DLPACK_DLPACK_H_`
			`#define DLPACK_DLPACK_H_`

			`/**`
			`* \brief Compatibility with C++`
			`*/`
			`#ifdef __cplusplus`
			`#define DLPACK_EXTERN_C extern "C"`
			`#else`
			`#define DLPACK_EXTERN_C`
			`#endif`

			`/! \brief The current version of dlpack /`
			`#define DLPACK_VERSION 80`

			`/! \brief The current ABI version of dlpack /`
			`#define DLPACK_ABI_VERSION 1`

			`/! \brief DLPACK_DLL prefix for windows /`
			`#ifdef _WIN32`
			`#ifdef DLPACK_EXPORTS`
			`#define DLPACK_DLL __declspec(dllexport)`
			`#else`
			`#define DLPACK_DLL __declspec(dllimport)`
			`#endif`
			`#else`
			`#define DLPACK_DLL`
			`#endif`

			`#include <stdint.h>`
			`#include <stddef.h>`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`
			`/*!`
			`* \brief The device type in DLDevice.`
			`*/`
			`#ifdef __cplusplus`
			`typedef enum : int32_t {`
			`#else`
			`typedef enum {`
			`#endif`
			`/! \brief CPU device /`
			`kDLCPU = 1,`
			`/! \brief CUDA GPU device /`
			`kDLCUDA = 2,`
			`/*!`
			`* \brief Pinned CUDA CPU memory by cudaMallocHost`
			`*/`
			`kDLCUDAHost = 3,`
			`/! \brief OpenCL devices. /`
			`kDLOpenCL = 4,`
			`/! \brief Vulkan buffer for next generation graphics. /`
			`kDLVulkan = 7,`
			`/! \brief Metal for Apple GPU. /`
			`kDLMetal = 8,`
			`/! \brief Verilog simulator buffer /`
			`kDLVPI = 9,`
			`/! \brief ROCm GPUs for AMD GPUs /`
			`kDLROCM = 10,`
			`/*!`
			`* \brief Pinned ROCm CPU memory allocated by hipMallocHost`
			`*/`
			`kDLROCMHost = 11,`
			`/*!`
			`* \brief Reserved extension device type,`
			`* used for quickly test extension device`
			`* The semantics can differ depending on the implementation.`
			`*/`
			`kDLExtDev = 12,`
			`/*!`
			`* \brief CUDA managed/unified memory allocated by cudaMallocManaged`
			`*/`
			`kDLCUDAManaged = 13,`
			`/*!`
			`* \brief Unified shared memory allocated on a oneAPI non-partititioned`
			`* device. Call to oneAPI runtime is required to determine the device`
			`* type, the USM allocation type and the sycl context it is bound to.`
			`*`
			`*/`
			`kDLOneAPI = 14,`
			`/! \brief GPU support for next generation WebGPU standard. /`
			`kDLWebGPU = 15,`
			`/! \brief Qualcomm Hexagon DSP /`
			`kDLHexagon = 16,`
			`} DLDeviceType;`

			`/*!`
			`* \brief A Device for Tensor and operator.`
			`*/`
			`typedef struct {`
			`/! \brief The device type used in the device. /`
			`DLDeviceType device_type;`
			`/*!`
			`* \brief The device index.`
			`* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.`
			`*/`
			`int32_t device_id;`
			`} DLDevice;`

			`/*!`
			`* \brief The type code options DLDataType.`
			`*/`
			`typedef enum {`
			`/! \brief signed integer /`
			`kDLInt = 0U,`
			`/! \brief unsigned integer /`
			`kDLUInt = 1U,`
			`/! \brief IEEE floating point /`
			`kDLFloat = 2U,`
			`/*!`
			`* \brief Opaque handle type, reserved for testing purposes.`
			`* Frameworks need to agree on the handle data type for the exchange to be well-defined.`
			`*/`
			`kDLOpaqueHandle = 3U,`
			`/! \brief bfloat16 /`
			`kDLBfloat = 4U,`
			`/*!`
			`* \brief complex number`
			`* (C/C++/Python layout: compact struct per complex number)`
			`*/`
			`kDLComplex = 5U,`
			`/! \brief boolean /`
			`kDLBool = 6U,`
			`} DLDataTypeCode;`

			`/*!`
			`* \brief The data type the tensor can hold. The data type is assumed to follow the`
			`* native endian-ness. An explicit error message should be raised when attempting to`
			`* export an array with non-native endianness`
			`*`
			`* Examples`
			`* - float: type_code = 2, bits = 32, lanes = 1`
			`* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4`
			`* - int8: type_code = 0, bits = 8, lanes = 1`
			`* - std::complex<float>: type_code = 5, bits = 64, lanes = 1`
			`* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)`
			`*/`
			`typedef struct {`
			`/*!`
			`* \brief Type code of base types.`
			`* We keep it uint8_t instead of DLDataTypeCode for minimal memory`
			`* footprint, but the value should be one of DLDataTypeCode enum values.`
			`* */`
			`uint8_t code;`
			`/*!`
			`* \brief Number of bits, common choices are 8, 16, 32.`
			`*/`
			`uint8_t bits;`
			`/! \brief Number of lanes in the type, used for vector types. /`
			`uint16_t lanes;`
			`} DLDataType;`

			`/*!`
			`* \brief Plain C Tensor object, does not manage memory.`
			`*/`
			`typedef struct {`
			`/*!`
			`* \brief The data pointer points to the allocated data. This will be CUDA`
			`* device pointer or cl_mem handle in OpenCL. It may be opaque on some device`
			`* types. This pointer is always aligned to 256 bytes as in CUDA. The`
			* `byte_offset` field should be used to point to the beginning of the data.
			`*`
			`* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,`
			`* TVM, perhaps others) do not adhere to this 256 byte aligment requirement`
			* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
			`* (after which this note will be updated); at the moment it is recommended`
			`* to not rely on the data pointer being correctly aligned.`
			`*`
			`* For given DLTensor, the size of memory required to store the contents of`
			`* data is calculated as follows:`
			`*`
			`* \code{.c}`
			`* static inline size_t GetDataSize(const DLTensor* t) {`
			`* size_t size = 1;`
			`* for (tvm_index_t i = 0; i < t->ndim; ++i) {`
			`* size *= t->shape[i];`
			`* }`
			`* size = (t->dtype.bits t->dtype.lanes + 7) / 8;`
			`* return size;`
			`* }`
			`* \endcode`
			`*/`
			`void* data;`
			`/! \brief The device of the tensor /`
			`DLDevice device;`
			`/! \brief Number of dimensions /`
			`int32_t ndim;`
			`/! \brief The data type of the pointer/`
			`DLDataType dtype;`
			`/! \brief The shape of the tensor /`
			`const int64_t* shape;`
			`/*!`
			`* \brief strides of the tensor (in number of elements, not bytes)`
			`* can be NULL, indicating tensor is compact and row-majored.`
			`*/`
			`const int64_t* strides;`
			`/! \brief The offset in bytes to the beginning pointer to data /`
			`uint64_t byte_offset;`
			`} DLTensor;`

			`/*!`
			`* \brief C Tensor object, manage memory of DLTensor. This data structure is`
			`* intended to facilitate the borrowing of DLTensor by another framework. It is`
			`* not meant to transfer the tensor. When the borrowing framework doesn't need`
			`* the tensor, it should call the deleter to notify the host that the resource`
			`* is no longer needed.`
			`*/`
			`typedef struct DLManagedTensor {`
			`/! \brief DLTensor which is being memory managed /`
			`DLTensor dl_tensor;`
			`/*! \brief the context of the original host framework of DLManagedTensor in`
			`* which DLManagedTensor is used in the framework. It can also be NULL.`
			`*/`
			`void * manager_ctx;`
			`/! \brief Destructor signature void ()(void*) - this should be called`
			`* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL`
			`* if there is no way for the caller to provide a reasonable destructor.`
			`* The destructors deletes the argument self as well.`
			`*/`
			`void (deleter)(struct DLManagedTensor self);`
			`} DLManagedTensor;`
			`#ifdef __cplusplus`
			`} // DLPACK_EXTERN_C`
			`#endif`
			`#endif // DLPACK_DLPACK_H_`