// Copyright 2023-present Facebook. All Rights Reserved. #pragma once #include #include #include #include #include #include #include #include #if defined(C10_IOS) && defined(C10_MOBILE) #include // for gettimeofday() #endif #if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) #define C10_RDTSC #if defined(_MSC_VER) #include #elif defined(__CUDACC__) || defined(__HIPCC__) #undef C10_RDTSC #elif defined(__clang__) // `__rdtsc` is available by default. // NB: This has to be first, because Clang will also define `__GNUC__` #elif defined(__GNUC__) #include #else #undef C10_RDTSC #endif #endif namespace c10 { using time_t = int64_t; using steady_clock_t = std::conditional_t< std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock>; inline time_t getTimeSinceEpoch() { auto now = std::chrono::system_clock::now().time_since_epoch(); return std::chrono::duration_cast(now).count(); } inline time_t getTime(bool allow_monotonic = false) { #if defined(C10_IOS) && defined(C10_MOBILE) // clock_gettime is only available on iOS 10.0 or newer. Unlike OS X, iOS // can't rely on CLOCK_REALTIME, as it is defined no matter if clock_gettime // is implemented or not struct timeval now; gettimeofday(&now, NULL); return static_cast(now.tv_sec) * 1000000000 + static_cast(now.tv_usec) * 1000; #elif defined(_WIN32) || defined(__MACH__) return std::chrono::duration_cast( steady_clock_t::now().time_since_epoch()) .count(); #else // clock_gettime is *much* faster than std::chrono implementation on Linux struct timespec t {}; auto mode = CLOCK_REALTIME; if (allow_monotonic) { mode = CLOCK_MONOTONIC; } clock_gettime(mode, &t); return static_cast(t.tv_sec) * 1000000000 + static_cast(t.tv_nsec); #endif } // We often do not need to capture true wall times. If a fast mechanism such // as TSC is available we can use that instead and convert back to epoch time // during post processing. This greatly reduce the clock's contribution to // profiling. // http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/ // https://quick-bench.com/q/r8opkkGZSJMu9wM_XTbDouq-0Io // TODO: We should use // `https://github.com/google/benchmark/blob/main/src/cycleclock.h` inline auto getApproximateTime() { #if defined(C10_RDTSC) return static_cast(__rdtsc()); #else return getTime(); #endif } using approx_time_t = decltype(getApproximateTime()); static_assert( std::is_same_v || std::is_same_v, "Expected either int64_t (`getTime`) or uint64_t (some TSC reads)."); // Convert `getCount` results to Nanoseconds since unix epoch. class C10_API ApproximateClockToUnixTimeConverter final { public: ApproximateClockToUnixTimeConverter(); std::function makeConverter(); struct UnixAndApproximateTimePair { time_t t_; approx_time_t approx_t_; }; static UnixAndApproximateTimePair measurePair(); private: static constexpr size_t replicates = 1001; using time_pairs = std::array; time_pairs measurePairs(); time_pairs start_times_; }; } // namespace c10