116 lines
3.4 KiB
C
116 lines
3.4 KiB
C
|
// Copyright 2023-present Facebook. All Rights Reserved.
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <c10/macros/Export.h>
|
||
|
#include <array>
|
||
|
#include <chrono>
|
||
|
#include <cstddef>
|
||
|
#include <cstdint>
|
||
|
#include <ctime>
|
||
|
#include <functional>
|
||
|
#include <type_traits>
|
||
|
|
||
|
#if defined(C10_IOS) && defined(C10_MOBILE)
|
||
|
#include <sys/time.h> // for gettimeofday()
|
||
|
#endif
|
||
|
|
||
|
#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__)
|
||
|
#define C10_RDTSC
|
||
|
#if defined(_MSC_VER)
|
||
|
#include <intrin.h>
|
||
|
#elif defined(__CUDACC__) || defined(__HIPCC__)
|
||
|
#undef C10_RDTSC
|
||
|
#elif defined(__clang__)
|
||
|
// `__rdtsc` is available by default.
|
||
|
// NB: This has to be first, because Clang will also define `__GNUC__`
|
||
|
#elif defined(__GNUC__)
|
||
|
#include <x86intrin.h>
|
||
|
#else
|
||
|
#undef C10_RDTSC
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
namespace c10 {
|
||
|
|
||
|
using time_t = int64_t;
|
||
|
using steady_clock_t = std::conditional_t<
|
||
|
std::chrono::high_resolution_clock::is_steady,
|
||
|
std::chrono::high_resolution_clock,
|
||
|
std::chrono::steady_clock>;
|
||
|
|
||
|
inline time_t getTimeSinceEpoch() {
|
||
|
auto now = std::chrono::system_clock::now().time_since_epoch();
|
||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(now).count();
|
||
|
}
|
||
|
|
||
|
inline time_t getTime(bool allow_monotonic = false) {
|
||
|
#if defined(C10_IOS) && defined(C10_MOBILE)
|
||
|
// clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS
|
||
|
// can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime
|
||
|
// is implemented or not
|
||
|
struct timeval now;
|
||
|
gettimeofday(&now, NULL);
|
||
|
return static_cast<time_t>(now.tv_sec) * 1000000000 +
|
||
|
static_cast<time_t>(now.tv_usec) * 1000;
|
||
|
#elif defined(_WIN32) || defined(__MACH__)
|
||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||
|
steady_clock_t::now().time_since_epoch())
|
||
|
.count();
|
||
|
#else
|
||
|
// clock_gettime is *much* faster than std::chrono implementation on Linux
|
||
|
struct timespec t {};
|
||
|
auto mode = CLOCK_REALTIME;
|
||
|
if (allow_monotonic) {
|
||
|
mode = CLOCK_MONOTONIC;
|
||
|
}
|
||
|
clock_gettime(mode, &t);
|
||
|
return static_cast<time_t>(t.tv_sec) * 1000000000 +
|
||
|
static_cast<time_t>(t.tv_nsec);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// We often do not need to capture true wall times. If a fast mechanism such
|
||
|
// as TSC is available we can use that instead and convert back to epoch time
|
||
|
// during post processing. This greatly reduce the clock's contribution to
|
||
|
// profiling.
|
||
|
// http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
|
||
|
// https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io
|
||
|
// TODO: We should use
|
||
|
// `https://github.com/google/benchmark/blob/main/src/cycleclock.h`
|
||
|
inline auto getApproximateTime() {
|
||
|
#if defined(C10_RDTSC)
|
||
|
return static_cast<uint64_t>(__rdtsc());
|
||
|
#else
|
||
|
return getTime();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
using approx_time_t = decltype(getApproximateTime());
|
||
|
static_assert(
|
||
|
std::is_same_v<approx_time_t, int64_t> ||
|
||
|
std::is_same_v<approx_time_t, uint64_t>,
|
||
|
"Expected either int64_t (`getTime`) or uint64_t (some TSC reads).");
|
||
|
|
||
|
// Convert `getCount` results to Nanoseconds since unix epoch.
|
||
|
class C10_API ApproximateClockToUnixTimeConverter final {
|
||
|
public:
|
||
|
ApproximateClockToUnixTimeConverter();
|
||
|
std::function<time_t(approx_time_t)> makeConverter();
|
||
|
|
||
|
struct UnixAndApproximateTimePair {
|
||
|
time_t t_;
|
||
|
approx_time_t approx_t_;
|
||
|
};
|
||
|
static UnixAndApproximateTimePair measurePair();
|
||
|
|
||
|
private:
|
||
|
static constexpr size_t replicates = 1001;
|
||
|
using time_pairs = std::array<UnixAndApproximateTimePair, replicates>;
|
||
|
time_pairs measurePairs();
|
||
|
|
||
|
time_pairs start_times_;
|
||
|
};
|
||
|
|
||
|
} // namespace c10
|