153 lines
3.5 KiB
C
153 lines
3.5 KiB
C
|
#pragma once
|
||
|
|
||
|
/* This file defines math functions compatible across different gpu
|
||
|
* platforms (currently CUDA and HIP).
|
||
|
*/
|
||
|
#if defined(__CUDACC__) || defined(__HIPCC__)
|
||
|
|
||
|
#include <c10/macros/Macros.h>
|
||
|
#include <c10/util/Exception.h>
|
||
|
|
||
|
#ifdef __HIPCC__
|
||
|
#define __MATH_FUNCTIONS_DECL__ inline C10_DEVICE
|
||
|
#else /* __HIPCC__ */
|
||
|
#ifdef __CUDACC_RTC__
|
||
|
#define __MATH_FUNCTIONS_DECL__ C10_HOST_DEVICE
|
||
|
#else /* __CUDACC_RTC__ */
|
||
|
#define __MATH_FUNCTIONS_DECL__ static inline C10_HOST_DEVICE
|
||
|
#endif /* __CUDACC_RTC__ */
|
||
|
#endif /* __HIPCC__ */
|
||
|
|
||
|
namespace c10::cuda::compat {
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float abs(float x) {
|
||
|
return ::fabsf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double abs(double x) {
|
||
|
return ::fabs(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float exp(float x) {
|
||
|
return ::expf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double exp(double x) {
|
||
|
return ::exp(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float ceil(float x) {
|
||
|
return ::ceilf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double ceil(double x) {
|
||
|
return ::ceil(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float copysign(float x, float y) {
|
||
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||
|
return ::copysignf(x, y);
|
||
|
#else
|
||
|
// std::copysign gets ICE/Segfaults with gcc 7.5/8 on arm64
|
||
|
// (e.g. Jetson), see PyTorch PR #51834
|
||
|
// This host function needs to be here for the compiler but is never used
|
||
|
TORCH_INTERNAL_ASSERT(
|
||
|
false, "CUDAMathCompat copysign should not run on the CPU");
|
||
|
#endif
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double copysign(double x, double y) {
|
||
|
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
|
||
|
return ::copysign(x, y);
|
||
|
#else
|
||
|
// see above
|
||
|
TORCH_INTERNAL_ASSERT(
|
||
|
false, "CUDAMathCompat copysign should not run on the CPU");
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float floor(float x) {
|
||
|
return ::floorf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double floor(double x) {
|
||
|
return ::floor(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float log(float x) {
|
||
|
return ::logf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double log(double x) {
|
||
|
return ::log(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float log1p(float x) {
|
||
|
return ::log1pf(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ double log1p(double x) {
|
||
|
return ::log1p(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float max(float x, float y) {
|
||
|
return ::fmaxf(x, y);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double max(double x, double y) {
|
||
|
return ::fmax(x, y);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float min(float x, float y) {
|
||
|
return ::fminf(x, y);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double min(double x, double y) {
|
||
|
return ::fmin(x, y);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float pow(float x, float y) {
|
||
|
return ::powf(x, y);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double pow(double x, double y) {
|
||
|
return ::pow(x, y);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ void sincos(float x, float* sptr, float* cptr) {
|
||
|
return ::sincosf(x, sptr, cptr);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ void sincos(double x, double* sptr, double* cptr) {
|
||
|
return ::sincos(x, sptr, cptr);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float sqrt(float x) {
|
||
|
return ::sqrtf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double sqrt(double x) {
|
||
|
return ::sqrt(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float rsqrt(float x) {
|
||
|
return ::rsqrtf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double rsqrt(double x) {
|
||
|
return ::rsqrt(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float tan(float x) {
|
||
|
return ::tanf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double tan(double x) {
|
||
|
return ::tan(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float tanh(float x) {
|
||
|
return ::tanhf(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double tanh(double x) {
|
||
|
return ::tanh(x);
|
||
|
}
|
||
|
|
||
|
__MATH_FUNCTIONS_DECL__ float normcdf(float x) {
|
||
|
return ::normcdff(x);
|
||
|
}
|
||
|
__MATH_FUNCTIONS_DECL__ double normcdf(double x) {
|
||
|
return ::normcdf(x);
|
||
|
}
|
||
|
|
||
|
} // namespace c10::cuda::compat
|
||
|
|
||
|
#endif
|