69 #include <unordered_set>
77 #include <sys/resource.h>
80 #include <sys/types.h>
87 #if defined( __APPLE__ )
88 # include <sys/sysctl.h>
91 #if defined(__linux__)
96 #if defined( _WIN32 ) || defined( _WIN64 )|| defined( _MSC_VER )
100 #elif defined( __GNUC__ ) || defined( __clang__ )
105 # ifndef GENESIS_INCLUDED_CPUID_H_
106 # define GENESIS_INCLUDED_CPUID_H_
111 # include <sys/ioctl.h>
112 # include <sys/types.h>
118 #if !(defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
123 #if defined( _WIN32 ) || defined( _WIN64 )|| defined( _MSC_VER )
127 # include <windows.h>
128 #elif defined( __APPLE__ )
129 # include <mach/mach_error.h>
130 # include <mach/mach_host.h>
131 # include <mach/mach_init.h>
132 # include <mach/mach_types.h>
133 # include <mach/mach.h>
134 # include <mach/task_info.h>
135 # include <mach/vm_map.h>
136 # include <mach/vm_statistics.h>
137 # include <sys/resource.h>
138 # include <sys/time.h>
139 # include <sys/types.h>
141 #elif defined( __GNUC__ ) || defined( __clang__ ) || defined(__linux__)
142 # include <sys/types.h>
143 # include <sys/sysinfo.h>
147 # include <sys/times.h>
151 #ifdef GENESIS_OPENMP
163 #if defined( DEBUG ) && defined( NDEBUG )
164 static_assert(
false,
"Cannot compile with both DEBUG and NDEBUG flags set." );
167 #if ! defined( DEBUG ) && ! defined( NDEBUG )
168 static_assert(
false,
"Cannot compile with neiher DEBUG nor NDEBUG flag set." );
175 static bool initialized =
false;
196 #elif defined( NDEBUG )
207 #elif defined __linux__
209 #elif defined __APPLE__
211 #elif defined __unix__
218 #if defined(__clang__)
220 #elif defined(__ICC) || defined(__INTEL_COMPILER)
222 #elif defined(__GNUC__) || defined(__GNUG__)
224 #elif defined(__HP_cc) || defined(__HP_aCC)
226 #elif defined(__IBMCPP__)
228 #elif defined(_MSC_VER)
232 #elif defined(__SUNPRO_CC)
239 #if defined(__clang__)
241 #elif defined(__ICC) || defined(__INTEL_COMPILER)
243 #elif defined(__GNUC__) || defined(__GNUG__)
249 #elif defined(__HP_cc) || defined(__HP_aCC)
251 #elif defined(__IBMCPP__)
253 #elif defined(_MSC_VER)
257 #elif defined(__SUNPRO_CC)
266 #elif defined(_MSVC_LANG) && _MSVC_LANG >= 201703L
273 #ifdef GENESIS_OPENMP
299 static std::unordered_map<std::string,std::string> result;
300 static bool initialized =
false;
368 res +=
"Compiler Information\n";
369 res +=
"=============================================\n\n";
370 res +=
"Platform = " + info_comp.platform +
"\n";
371 res +=
"Compiler = " + info_comp.compiler_family +
" " + info_comp.compiler_version +
"\n";
372 res +=
"C++ version = " + info_comp.cpp_version +
"\n";
373 res +=
"Build type = " + info_comp.build_type +
"\n";
374 res +=
"With OpenMP = " + std::string( info_comp.with_openmp ?
"true" :
"false" ) +
"\n";
375 res +=
"With AVX = " + std::string( info_comp.with_avx ?
"true" :
"false" ) +
"\n";
376 res +=
"With AVX2 = " + std::string( info_comp.with_avx2 ?
"true" :
"false" ) +
"\n";
395 #if defined( _WIN32 ) || defined( _WIN64 )
397 void get_cpuid_(int32_t out[4], int32_t eax, int32_t ecx)
399 __cpuidex(out, eax, ecx);
402 __int64
xgetbv(
unsigned int x)
408 typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
411 BOOL bIsWow64 = FALSE;
413 LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS) GetProcAddress(
414 GetModuleHandle(TEXT(
"kernel32")),
"IsWow64Process");
416 if (NULL != fnIsWow64Process)
418 if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
433 return IsWow64() != 0;
437 #elif defined(__arm__) || defined(__aarch64__) || defined(__ARM_ARCH) // for M1/M2 chips
439 void get_cpuid_( int32_t out[4], int32_t eax, int32_t ecx )
447 out[0] = out[1] = out[2] = out[3] = 0;
450 uint64_t
xgetbv(
unsigned int index)
470 __cpuid_count(eax, ecx, out[0], out[1], out[2], out[3]);
488 __asm__ __volatile__(
"xgetbv" :
"=a"(eax),
"=d"(edx) :
"c"(index));
489 return ((uint64_t)edx << 32) | eax;
500 #if defined(__linux__)
502 static std::string get_cpu_info_linux_( std::string
const& key )
504 std::string value =
"(not found)";
505 std::ifstream fs(
"/proc/cpuinfo");
509 std::getline(fs, line,
'\n');
510 if( strncmp(line.c_str(), key.c_str(), key.length()) == 0 ) {
511 size_t offset = key.length();
514 (
offset < line.length() )
518 value = line.c_str() +
offset;
536 bool avxSupported =
false;
541 bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0;
542 bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0;
544 if( osUsesXSAVE_XRSTORE && cpuAVXSuport ) {
545 auto const _XCR_XFEATURE_ENABLED_MASK = 0;
546 uint64_t xcrFeatureMask =
xgetbv(_XCR_XFEATURE_ENABLED_MASK);
547 avxSupported = (xcrFeatureMask & 0x6) == 0x6;
559 auto const _XCR_XFEATURE_ENABLED_MASK = 0;
560 uint64_t xcrFeatureMask =
xgetbv(_XCR_XFEATURE_ENABLED_MASK);
561 return (xcrFeatureMask & 0xe6) == 0xe6;
570 memcpy(name + 0, &CPUInfo[1], 4);
571 memcpy(name + 4, &CPUInfo[3], 4);
572 memcpy(name + 8, &CPUInfo[2], 4);
580 std::string model =
"unknown CPU model";
581 #if defined(__linux__)
582 model = get_cpu_info_linux_(
"model name");
583 #elif defined(__APPLE__)
586 if( sysctlbyname(
"machdep.cpu.brand_string", &str, &len, NULL, 0 ) == 0 ) {
593 #if defined(__linux__)
595 size_t get_memtotal_linux_()
600 struct sysinfo memInfo;
601 if( sysinfo( &memInfo )) {
604 return static_cast<size_t>( memInfo.totalram ) *
static_cast<size_t>( memInfo.mem_unit );
614 #if defined( _WIN32 ) || defined( _WIN64 )|| defined( _MSC_VER )
616 MEMORYSTATUSEX memInfo;
617 memInfo.dwLength =
sizeof(MEMORYSTATUSEX);
618 GlobalMemoryStatusEx(&memInfo);
619 return memInfo.ullTotalPhys;
621 #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
623 long phys_pages = sysconf(_SC_PHYS_PAGES);
624 long pagesize = sysconf(_SC_PAGESIZE);
626 if ((phys_pages == -1) || (pagesize == -1)) {
627 #if defined(__linux__)
628 return get_memtotal_linux_();
637 if (pagesize > LONG_MAX / phys_pages) {
640 return static_cast<size_t>( pagesize ) *
static_cast<size_t>( phys_pages );
643 #elif defined(__APPLE__)
645 int mib[] = { CTL_HW, HW_MEMSIZE };
647 size_t length =
sizeof(ram);
648 if (-1 == sysctl(mib, 2, &ram, &
length, NULL, 0)) {
653 #elif defined(__linux__)
655 return get_memtotal_linux_();
669 static bool initialized =
false;
675 static const uint16_t little_endian_test = 0x1000;
676 result.
is_little_endian = ( 0 == *
reinterpret_cast< uint8_t const*
>( &little_endian_test ));
718 uint32_t nExIds = info[0];
720 if( nIds >= 0x00000001 ) {
722 result.
HW_MMX = (info[3] & ((int)1 << 23)) != 0;
723 result.
HW_SSE = (info[3] & ((int)1 << 25)) != 0;
724 result.
HW_SSE2 = (info[3] & ((int)1 << 26)) != 0;
725 result.
HW_SSE3 = (info[2] & ((int)1 << 0)) != 0;
727 result.
HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0;
728 result.
HW_SSE41 = (info[2] & ((int)1 << 19)) != 0;
729 result.
HW_SSE42 = (info[2] & ((int)1 << 20)) != 0;
730 result.
HW_AES = (info[2] & ((int)1 << 25)) != 0;
732 result.
HW_AVX = (info[2] & ((int)1 << 28)) != 0;
733 result.
HW_FMA3 = (info[2] & ((int)1 << 12)) != 0;
735 result.
HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
737 if( nIds >= 0x00000007 ) {
739 result.
HW_AVX2 = (info[1] & ((int)1 << 5)) != 0;
741 result.
HW_BMI1 = (info[1] & ((int)1 << 3)) != 0;
742 result.
HW_BMI2 = (info[1] & ((int)1 << 8)) != 0;
743 result.
HW_ADX = (info[1] & ((int)1 << 19)) != 0;
744 result.
HW_MPX = (info[1] & ((int)1 << 14)) != 0;
745 result.
HW_SHA = (info[1] & ((int)1 << 29)) != 0;
746 result.
HW_RDSEED = (info[1] & ((int)1 << 18)) != 0;
748 result.
HW_RDPID = (info[2] & ((int)1 << 22)) != 0;
750 result.
HW_AVX512_F = (info[1] & ((int)1 << 16)) != 0;
769 result.
HW_GFNI = (info[2] & ((int)1 << 8)) != 0;
770 result.
HW_VAES = (info[2] & ((int)1 << 9)) != 0;
777 if( nExIds >= 0x80000001 ) {
779 result.
HW_x64 = (info[3] & ((int)1 << 29)) != 0;
780 result.
HW_ABM = (info[2] & ((int)1 << 5)) != 0;
781 result.
HW_SSE4a = (info[2] & ((int)1 << 6)) != 0;
782 result.
HW_FMA4 = (info[2] & ((int)1 << 16)) != 0;
783 result.
HW_XOP = (info[2] & ((int)1 << 11)) != 0;
799 std::stringstream ss;
800 auto print_ = [&](
char const* label,
bool yes )
803 ss << (yes ?
"true" :
"false") <<
"\n";
806 ss <<
"Hardware Features\n";
807 ss <<
"=============================================\n\n";
809 ss <<
"Memory:" <<
"\n";
811 print_(
" 64-bit = ", info_hardware.OS_x64);
812 print_(
" Little endian = ", info_hardware.is_little_endian);
815 ss <<
"CPU Vendor:" <<
"\n";
816 ss <<
" Vendor = " << info_hardware.vendor_string <<
"\n";
817 ss <<
" CPU model = " << info_hardware.cpu_model <<
"\n";
818 print_(
" AMD = ", info_hardware.vendor_AMD);
819 print_(
" Intel = ", info_hardware.vendor_Intel);
820 ss <<
" Cores = " << info_hardware.physical_core_count <<
"\n";
821 print_(
" Hyperthreads = ", info_hardware.with_hyperthreading);
825 ss <<
"OS Features:" <<
"\n";
826 print_(
" OS AVX = ", info_hardware.OS_AVX);
827 print_(
" OS AVX512 = ", info_hardware.OS_AVX512);
830 ss <<
"Hardware Features:" <<
"\n";
831 print_(
" MMX = ", info_hardware.HW_MMX);
832 print_(
" x64 = ", info_hardware.HW_x64);
833 print_(
" ABM = ", info_hardware.HW_ABM);
834 print_(
" RDRAND = ", info_hardware.HW_RDRAND);
835 print_(
" RDSEED = ", info_hardware.HW_RDSEED);
836 print_(
" BMI1 = ", info_hardware.HW_BMI1);
837 print_(
" BMI2 = ", info_hardware.HW_BMI2);
838 print_(
" ADX = ", info_hardware.HW_ADX);
839 print_(
" MPX = ", info_hardware.HW_MPX);
840 print_(
" PREFETCHW = ", info_hardware.HW_PREFETCHW);
841 print_(
" PREFETCHWT1 = ", info_hardware.HW_PREFETCHWT1);
842 print_(
" RDPID = ", info_hardware.HW_RDPID);
843 print_(
" GFNI = ", info_hardware.HW_GFNI);
844 print_(
" VAES = ", info_hardware.HW_VAES);
847 ss <<
"SIMD: 128-bit" <<
"\n";
848 print_(
" SSE = ", info_hardware.HW_SSE);
849 print_(
" SSE2 = ", info_hardware.HW_SSE2);
850 print_(
" SSE3 = ", info_hardware.HW_SSE3);
851 print_(
" SSSE3 = ", info_hardware.HW_SSSE3);
852 print_(
" SSE4a = ", info_hardware.HW_SSE4a);
853 print_(
" SSE4.1 = ", info_hardware.HW_SSE41);
854 print_(
" SSE4.2 = ", info_hardware.HW_SSE42);
855 print_(
" AES-NI = ", info_hardware.HW_AES);
856 print_(
" SHA = ", info_hardware.HW_SHA);
859 ss <<
"SIMD: 256-bit" <<
"\n";
860 print_(
" AVX = ", info_hardware.HW_AVX);
861 print_(
" XOP = ", info_hardware.HW_XOP);
862 print_(
" FMA3 = ", info_hardware.HW_FMA3);
863 print_(
" FMA4 = ", info_hardware.HW_FMA4);
864 print_(
" AVX2 = ", info_hardware.HW_AVX2);
867 ss <<
"SIMD: 512-bit" <<
"\n";
868 print_(
" AVX512-F = ", info_hardware.HW_AVX512_F);
869 print_(
" AVX512-CD = ", info_hardware.HW_AVX512_CD);
870 print_(
" AVX512-PF = ", info_hardware.HW_AVX512_PF);
871 print_(
" AVX512-ER = ", info_hardware.HW_AVX512_ER);
872 print_(
" AVX512-VL = ", info_hardware.HW_AVX512_VL);
873 print_(
" AVX512-BW = ", info_hardware.HW_AVX512_BW);
874 print_(
" AVX512-DQ = ", info_hardware.HW_AVX512_DQ);
875 print_(
" AVX512-IFMA = ", info_hardware.HW_AVX512_IFMA);
876 print_(
" AVX512-VBMI = ", info_hardware.HW_AVX512_VBMI);
877 print_(
" AVX512-VPOPCNTDQ = ", info_hardware.HW_AVX512_VPOPCNTDQ);
878 print_(
" AVX512-4VNNIW = ", info_hardware.HW_AVX512_4VNNIW);
879 print_(
" AVX512-4FMAPS = ", info_hardware.HW_AVX512_4FMAPS);
880 print_(
" AVX512-VBMI2 = ", info_hardware.HW_AVX512_VBMI2);
881 print_(
" AVX512-VPCLMUL = ", info_hardware.HW_AVX512_VPCLMUL);
882 print_(
" AVX512-VNNI = ", info_hardware.HW_AVX512_VNNI);
883 print_(
" AVX512-BITALG = ", info_hardware.HW_AVX512_BITALG);
884 print_(
" AVX512-BF16 = ", info_hardware.HW_AVX512_BF16);
888 ss <<
"SIMD Summary:" <<
"\n";
900 static const bool avx_is_safe_ =
911 static const bool avx2_is_safe_ =
916 return avx2_is_safe_;
922 static const bool avx512_is_safe_ =
927 return avx512_is_safe_;
937 auto ncores = std::thread::hardware_concurrency();
939 #if defined(__linux__)
941 if (sched_getaffinity(0,
sizeof(cpu_set_t), &mask) != -1) {
942 ncores = CPU_COUNT(&mask);
948 ncores /= threads_per_core;
956 return sysconf(_SC_NPROCESSORS_ONLN);
961 std::ifstream f(filename);
967 throw std::runtime_error(
"couldn't open sys files");
975 std::string node_path = cpu_path +
"../node";
976 for (
size_t i = 0; i < 1000; ++i) {
994 #if defined(__linux__)
995 std::unordered_set<size_t> cores;
996 for (
size_t i = 0; i < n_cpu; ++i) {
997 std::string cpu_path =
"/sys/devices/system/cpu/cpu" +
std::to_string(i) +
"/topology/";
1000 size_t uniq_core_id = (node_id << 16) + core_id;
1001 cores.insert(uniq_core_id);
1003 return cores.size();
1012 auto const hw_cores = std::thread::hardware_concurrency();
1016 if( phys_cores > 0 ) {
1025 return hw_cores / threads_per_core;
1035 #elif defined( _WIN32 )
1038 __cpuid_count( 1, 0, info[0], info[1], info[2], info[3] );
1041 return (
bool) (info[3] & (0x1 << 28));
1046 size_t openmp_threads = 0;
1048 #if defined( GENESIS_OPENMP )
1052 openmp_threads =
static_cast<size_t>( std::max( omp_get_max_threads(), 0 ));
1057 auto const openmp_ptr = std::getenv(
"OMP_NUM_THREADS" );
1059 openmp_threads = std::atoi( openmp_ptr );
1064 return openmp_threads;
1069 size_t slurm_cpus = 0;
1070 auto const slurm_ptr = std::getenv(
"SLURM_CPUS_PER_TASK" );
1072 slurm_cpus = std::atoi( slurm_ptr );
1088 if( physical_cores ) {
1090 if( phys_cores > 0 ) {
1094 auto const hw_concur = std::thread::hardware_concurrency();
1095 if( hw_concur > 0 ) {
1096 guess =
static_cast<size_t>( hw_concur );
1103 if( slurm_cpus > 0 ) {
1104 guess =
static_cast<size_t>( slurm_cpus );
1114 auto const hw_concur = std::thread::hardware_concurrency();
1115 if( openmp_threads > 0 && openmp_threads == hw_concur && physical_cores ) {
1118 guess = hw_concur / threads_per_core;
1119 }
else if( openmp_threads > 0 ) {
1121 guess =
static_cast<size_t>( openmp_threads );
1125 assert( guess > 0 );
1139 auto const pid = ::getpid();
1141 return static_cast<size_t>( pid );
1147 #if defined( _WIN32 ) || defined( _WIN64 )
1148 return _isatty( _fileno( stdin ));
1150 return isatty( fileno( stdin ));
1156 #if defined( _WIN32 ) || defined( _WIN64 )
1157 return _isatty( _fileno( stdout ));
1159 return isatty( fileno( stdout ));
1165 #if defined( _WIN32 ) || defined( _WIN64 )
1166 return _isatty( _fileno( stderr ));
1168 return isatty( fileno( stderr ));
1174 #if defined( _WIN32 ) || defined( _WIN64 )
1176 CONSOLE_SCREEN_BUFFER_INFO csbi;
1178 GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
1179 cols = csbi.srWindow.Right - csbi.srWindow.Left + 1;
1180 rows = csbi.srWindow.Bottom - csbi.srWindow.Top + 1;
1181 return { cols, rows };
1186 ioctl( STDOUT_FILENO, TIOCGWINSZ, &w );
1187 return { w.ws_col, w.ws_row };
1203 return getdtablesize();
1223 size_t fd_counter = 0;
1225 for(
size_t i = 0; i <= max_fd_cnt; ++i ) {
1228 if( errno != EBADF ) {
1235 #if defined( _WIN32 ) || defined( _WIN64 )|| defined( _MSC_VER )
1247 PROCESS_MEMORY_COUNTERS_EX pmc;
1248 GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc,
sizeof(pmc));
1249 return pmc.WorkingSetSize;
1258 MEMORYSTATUSEX memInfo;
1259 memInfo.dwLength =
sizeof(MEMORYSTATUSEX);
1260 GlobalMemoryStatusEx(&memInfo);
1261 assert( memInfo.ullTotalPhys >= memInfo.ullAvailPhys );
1262 return memInfo.ullTotalPhys - memInfo.ullAvailPhys;
1271 MEMORYSTATUSEX memInfo;
1272 memInfo.dwLength =
sizeof(MEMORYSTATUSEX);
1273 GlobalMemoryStatusEx(&memInfo);
1274 return memInfo.ullAvailPhys;
1283 static ULARGE_INTEGER lastCPU, lastSysCPU, lastUserCPU;
1284 static int num_processors;
1286 static bool initialized =
false;
1288 if( ! initialized ) {
1289 SYSTEM_INFO sysInfo;
1290 FILETIME ftime, fsys, fuser;
1292 GetSystemInfo(&sysInfo);
1293 num_processors = sysInfo.dwNumberOfProcessors;
1295 GetSystemTimeAsFileTime(&ftime);
1296 memcpy(&lastCPU, &ftime,
sizeof(FILETIME));
1298 self = GetCurrentProcess();
1299 GetProcessTimes(
self, &ftime, &ftime, &fsys, &fuser);
1300 memcpy(&lastSysCPU, &fsys,
sizeof(FILETIME));
1301 memcpy(&lastUserCPU, &fuser,
sizeof(FILETIME));
1307 FILETIME ftime, fsys, fuser;
1308 ULARGE_INTEGER now, sys, user;
1311 GetSystemTimeAsFileTime(&ftime);
1312 memcpy(&now, &ftime,
sizeof(FILETIME));
1314 GetProcessTimes(
self, &ftime, &ftime, &fsys, &fuser);
1315 memcpy(&sys, &fsys,
sizeof(FILETIME));
1316 memcpy(&user, &fuser,
sizeof(FILETIME));
1317 result = (sys.QuadPart - lastSysCPU.QuadPart) + (user.QuadPart - lastUserCPU.QuadPart);
1318 result /= (now.QuadPart - lastCPU.QuadPart);
1320 result /= num_processors;
1339 static PDH_HQUERY cpuQuery;
1340 static PDH_HCOUNTER cpuTotal;
1341 static int num_processors;
1342 static bool initialized =
false;
1344 if( ! initialized ) {
1345 SYSTEM_INFO sysInfo;
1346 GetSystemInfo(&sysInfo);
1347 num_processors = sysInfo.dwNumberOfProcessors;
1349 PdhOpenQuery(NULL, NULL, &cpuQuery);
1352 PdhAddEnglishCounter(cpuQuery, L
"\\Processor(_Total)\\% Processor Time", NULL, &cpuTotal);
1353 PdhCollectQueryData(cpuQuery);
1358 PDH_FMT_COUNTERVALUE counterVal;
1359 PdhCollectQueryData(cpuQuery);
1360 PdhGetFormattedCounterValue(cpuTotal, PDH_FMT_DOUBLE, NULL, &counterVal);
1362 auto result = counterVal.doubleValue;
1364 result /= num_processors;
1372 #elif defined( __APPLE__ )
1382 struct task_basic_info t_info;
1383 mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
1385 auto const ret = task_info(
1386 mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count
1388 if( KERN_SUCCESS != ret ) {
1393 return t_info.resident_size;
1400 vm_size_t page_size;
1401 mach_port_t mach_port;
1402 mach_msg_type_number_t count;
1403 vm_statistics64_data_t vm_stats;
1405 mach_port = mach_host_self();
1406 count =
sizeof(vm_stats) /
sizeof(natural_t);
1408 KERN_SUCCESS == host_page_size(mach_port, &page_size) &&
1409 KERN_SUCCESS == host_statistics64(
1410 mach_port, HOST_VM_INFO, (host_info64_t)&vm_stats, &count
1414 auto const relevant_sum =
1415 static_cast<size_t>( vm_stats.active_count ) +
1416 static_cast<size_t>( vm_stats.inactive_count ) +
1417 static_cast<size_t>( vm_stats.wire_count )
1419 return relevant_sum *
static_cast<size_t>( page_size );
1429 vm_size_t page_size;
1430 mach_port_t mach_port;
1431 mach_msg_type_number_t count;
1432 vm_statistics64_data_t vm_stats;
1434 mach_port = mach_host_self();
1435 count =
sizeof(vm_stats) /
sizeof(natural_t);
1437 KERN_SUCCESS == host_page_size(mach_port, &page_size) &&
1438 KERN_SUCCESS == host_statistics64(
1439 mach_port, HOST_VM_INFO, (host_info64_t)&vm_stats, &count
1442 return static_cast<size_t>( vm_stats.free_count ) *
static_cast<size_t>( page_size );
1447 size_t info_process_number_of_processors_()
1452 host_basic_info_data_t hostInfo;
1453 mach_msg_type_number_t hostCount = HOST_BASIC_INFO_COUNT;
1454 auto const hi = host_info(
1455 mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &hostCount
1457 if( hi != KERN_SUCCESS ) {
1460 return hostInfo.avail_cpus;
1468 static uint64_t last_total_time = 0;
1469 static struct timeval last_time;
1470 static int num_processors = 0;
1471 static bool initialized =
false;
1473 struct task_thread_times_info thread_times;
1474 mach_msg_type_number_t count = TASK_THREAD_TIMES_INFO_COUNT;
1476 if( !initialized ) {
1477 gettimeofday(&last_time, NULL);
1478 auto const ti = task_info(
1479 mach_task_self(), TASK_THREAD_TIMES_INFO, (task_info_t)&thread_times, &count
1481 if( ti != KERN_SUCCESS ) {
1485 thread_times.user_time.seconds * 1000000 +
1486 thread_times.user_time.microseconds +
1487 thread_times.system_time.seconds * 1000000 +
1488 thread_times.system_time.microseconds
1492 num_processors = info_process_number_of_processors_();
1500 auto const ti = task_info(
1501 mach_task_self(), TASK_THREAD_TIMES_INFO, (task_info_t)&thread_times, &count
1503 if( ti != KERN_SUCCESS ) {
1506 uint64_t total_time =
1507 thread_times.user_time.seconds * 1000000 +
1508 thread_times.user_time.microseconds +
1509 thread_times.system_time.seconds * 1000000 +
1510 thread_times.system_time.microseconds
1512 uint64_t elapsed_time =
1516 double result = (total_time - last_total_time) /
static_cast<double>(elapsed_time);
1518 result *= num_processors;
1525 last_total_time = total_time;
1534 static size_t _previousTotalTicks = 0;
1535 static size_t _previousIdleTicks = 0;
1536 static int num_processors = 0;
1537 static bool initialized =
false;
1539 if( ! initialized ) {
1540 num_processors = info_process_number_of_processors_();
1545 auto calculate_CPU_load_ = [&](
size_t idleTicks,
size_t totalTicks)
1547 size_t totalTicksSinceLastTime = totalTicks - _previousTotalTicks;
1548 size_t idleTicksSinceLastTime = idleTicks - _previousIdleTicks;
1549 _previousTotalTicks = totalTicks;
1550 _previousIdleTicks = idleTicks;
1552 if( totalTicksSinceLastTime == 0 ) {
1555 return 1.0 -
static_cast<double>( idleTicksSinceLastTime ) / totalTicksSinceLastTime;
1561 host_cpu_load_info_data_t cpuinfo;
1562 mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
1565 mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&cpuinfo, &count
1571 size_t totalTicks = 0;
1572 for(
int i = 0; i < CPU_STATE_MAX; i++ ) {
1573 totalTicks += cpuinfo.cpu_ticks[i];
1575 auto result = calculate_CPU_load_( cpuinfo.cpu_ticks[CPU_STATE_IDLE], totalTicks );
1577 result *= num_processors;
1585 #elif defined( __GNUC__ ) || defined( __clang__ ) || defined(__linux__)
1591 size_t parse_proc_line_kb_(
char* line )
1599 auto length = strlen(line);
1605 if( line[
length - 1] ==
'\n' ) {
1620 while( *p && !isdigit(*p) ) {
1636 FILE* file = fopen(
"/proc/self/status",
"r");
1643 while( fgets( line, 128, file ) != NULL ) {
1644 if( strncmp( line,
"VmRSS:", 6 ) == 0 ) {
1645 result = parse_proc_line_kb_(line);
1650 return result * 1024;
1653 std::unordered_map<std::string, size_t> get_proc_meminfo_lines_()
1655 std::unordered_map<std::string, size_t> meminfo;
1656 std::ifstream file(
"/proc/meminfo");
1659 while (std::getline(file, line)) {
1660 std::istringstream iss(line);
1664 iss >> key >> value >> unit;
1666 meminfo[key] = value * 1024;
1672 size_t info_system_current_memory_helper_(
bool available )
1679 size_t mem_avail = 0;
1680 auto meminfo = get_proc_meminfo_lines_();
1681 if( meminfo.count(
"MemAvailable" ) > 0 ) {
1682 mem_avail = meminfo[
"MemAvailable"];
1685 size_t memFree = meminfo[
"MemFree"];
1686 size_t buffers = meminfo[
"Buffers"];
1687 size_t cached = meminfo[
"Cached"];
1688 mem_avail = memFree + buffers + cached;
1692 }
else if( meminfo.count(
"MemTotal" ) != 0 ) {
1694 return meminfo[
"MemTotal"] - mem_avail;
1703 struct sysinfo memInfo;
1704 if( sysinfo( &memInfo )) {
1707 auto const mem_unit =
static_cast<size_t>( memInfo.mem_unit );
1709 return static_cast<size_t>( memInfo.freeram ) * mem_unit;
1711 return static_cast<size_t>( memInfo.totalram - memInfo.freeram ) * mem_unit;
1718 return info_system_current_memory_helper_(
false );
1723 return info_system_current_memory_helper_(
true );
1726 size_t info_process_number_of_processors_()
1731 size_t num_processors = 0;
1732 FILE* file = fopen(
"/proc/cpuinfo",
"r");
1737 while( fgets(line, 128, file) != NULL) {
1738 if( strncmp(line,
"processor", 9) == 0 ) {
1743 return num_processors;
1750 static clock_t lastCPU, lastSysCPU, lastUserCPU;
1751 static int num_processors;
1752 static bool initialized =
false;
1754 if( ! initialized ) {
1755 struct tms timeSample;
1756 lastCPU = times(&timeSample);
1757 lastSysCPU = timeSample.tms_stime;
1758 lastUserCPU = timeSample.tms_utime;
1759 num_processors = info_process_number_of_processors_();
1765 struct tms timeSample;
1769 now = times(&timeSample);
1772 timeSample.tms_stime < lastSysCPU ||
1773 timeSample.tms_utime < lastUserCPU
1778 result = (timeSample.tms_stime - lastSysCPU) + (timeSample.tms_utime - lastUserCPU);
1779 result /= (now - lastCPU);
1781 result /= num_processors;
1788 lastSysCPU = timeSample.tms_stime;
1789 lastUserCPU = timeSample.tms_utime;
1798 static unsigned long long lastTotalUser, lastTotalUserLow, lastTotalSys, lastTotalIdle;
1799 static int num_processors;
1800 static bool initialized =
false;
1802 if( ! initialized ) {
1803 FILE* file = fopen(
"/proc/stat",
"r");
1808 auto const scanned = fscanf(
1809 file,
"cpu %llu %llu %llu %llu",
1810 &lastTotalUser, &lastTotalUserLow, &lastTotalSys, &lastTotalIdle
1813 num_processors = info_process_number_of_processors_();
1814 if( scanned != 4 ) {
1824 unsigned long long totalUser, totalUserLow, totalSys, totalIdle, total;
1826 file = fopen(
"/proc/stat",
"r");
1830 auto const scanned = fscanf(
1831 file,
"cpu %llu %llu %llu %llu",
1832 &totalUser, &totalUserLow, &totalSys, &totalIdle
1835 if( scanned != 4 ) {
1840 totalUser < lastTotalUser || totalUserLow < lastTotalUserLow ||
1841 totalSys < lastTotalSys || totalIdle < lastTotalIdle
1847 (totalUser - lastTotalUser) +
1848 (totalUserLow - lastTotalUserLow) +
1849 (totalSys - lastTotalSys)
1852 total += (totalIdle - lastTotalIdle);
1855 result *= num_processors;
1862 lastTotalUser = totalUser;
1863 lastTotalUserLow = totalUserLow;
1864 lastTotalSys = totalSys;
1865 lastTotalIdle = totalIdle;
1915 struct rusage r_usage;
1916 getrusage(RUSAGE_SELF, &r_usage);
1918 #if defined __APPLE__
1920 return static_cast<size_t>( r_usage.ru_maxrss );
1923 return static_cast<size_t>( r_usage.ru_maxrss * 1024 );
1930 struct rusage r_usage;
1931 getrusage(RUSAGE_SELF, &r_usage);
1934 auto const u_tmr = r_usage.ru_utime.tv_sec * 1.0 + (double)r_usage.ru_utime.tv_usec * 1.0e-6;
1935 auto const s_tmr = r_usage.ru_stime.tv_sec * 1.0 + r_usage.ru_stime.tv_usec * 1.0e-6;
1937 return std::make_pair( u_tmr, s_tmr );
1945 double energy = 0.0;
1946 auto const basepath =
"/sys/class/powercap/intel-rapl/intel-rapl:";
1949 size_t const max_packages = 32;
1950 for(
size_t i = 0; i < max_packages; ++i ) {
1956 std::ifstream fs(fname);
1958 energy += pkg_energy;
1979 std::stringstream ss;
1980 ss << std::setprecision(3) << std::fixed;
1981 ss <<
"Time: " << time.first <<
"s (user)\n";
1982 ss <<
"Time: " << time.second <<
"s (sys)\n";
1984 ss <<
"Energy: " << energy <<
"Wh\n";