mirror of
https://github.com/ClusterCockpit/cc-metric-collector.git
synced 2024-12-26 15:29:04 +01:00
2306 lines
97 KiB
C
2306 lines
97 KiB
C
/*
|
|
* =======================================================================================
|
|
*
|
|
* Filename: likwid.h
|
|
*
|
|
* Description: Header File of likwid API
|
|
*
|
|
* Version: <VERSION>
|
|
* Released: <DATE>
|
|
*
|
|
* Authors: Thomas Gruber (tr), thomas.roehl@googlemail.com
|
|
*
|
|
* Project: likwid
|
|
*
|
|
* Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify it under
|
|
* the terms of the GNU General Public License as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
* version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
* PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with
|
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* =======================================================================================
|
|
*/
|
|
#ifndef LIKWID_H
|
|
#define LIKWID_H
|
|
|
|
#include <stdint.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
|
|
#include <bstrlib.h>
|
|
|
|
#define DEBUGLEV_ONLY_ERROR 0
|
|
#define DEBUGLEV_INFO 1
|
|
#define DEBUGLEV_DETAIL 2
|
|
#define DEBUGLEV_DEVELOP 3
|
|
|
|
#define LIKWID_VERSION "VERSION.RELEASE.MINORVERSION"
|
|
#define LIKWID_COMMIT GITCOMMIT
|
|
|
|
extern int perfmon_verbosity;
|
|
extern int likwid_nvmon_verbosity;
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#ifndef LIKWID_MARKER_INIT
|
|
#include <likwid-marker.h>
|
|
#endif
|
|
|
|
/*
|
|
################################################################################
|
|
# Marker API related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup MarkerAPI Marker API module
|
|
* @{
|
|
*/
|
|
/*! \brief Initialize LIKWID's marker API
|
|
|
|
Must be called in serial region of the application to set up basic data structures
|
|
of LIKWID.
|
|
Reads environment variables:
|
|
- LIKWID_MODE (access mode)
|
|
- LIKWID_MASK (event bitmask)
|
|
- LIKWID_EVENTS (event string)
|
|
- LIKWID_THREADS (cpu list separated by ,)
|
|
- LIKWID_GROUPS (amount of groups)
|
|
*/
|
|
extern void likwid_markerInit(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Initialize LIKWID's marker API for the current thread
|
|
|
|
Must be called in parallel region of the application to set up basic data structures
|
|
of LIKWID. Before you can call likwid_markerThreadInit() you have to call likwid_markerInit().
|
|
|
|
*/
|
|
extern void likwid_markerThreadInit(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Switch to next group to measure
|
|
|
|
Should be called in a serial region of code. If it is to be called from inside
|
|
a parallel region, ensure only one thread runs it by using "#pragma omp single"
|
|
or similar. Additionally, if this function is called in a parallel region,
|
|
ensure that the serial regions is preceeded by a barrier ("#pragma omp barrier"
|
|
or similar) to prevent race conditions.
|
|
*/
|
|
extern void likwid_markerNextGroup(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Close LIKWID's marker API
|
|
|
|
Must be called in serial region of the application. It gathers all data of regions and
|
|
writes them out to a file (filepath in env variable LIKWID_FILEPATH).
|
|
*/
|
|
extern void likwid_markerClose(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Register a measurement region
|
|
|
|
Initializes the hashTable entry in order to reduce execution time of likwid_markerStartRegion()
|
|
@param regionTag [in] Initialize data using this string
|
|
@return Error code
|
|
*/
|
|
extern int likwid_markerRegisterRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start a measurement region
|
|
|
|
Reads the values of all configured counters and saves the results under the
|
|
name given in regionTag. Must be called on every thread that is to be measured,
|
|
e.g. if the code to be measured is run in a parallel region, this function must
|
|
also be called in a parallel region (typically the same parallel region as the
|
|
measured code). If this function is to be called multiple times in one parallel
|
|
region, place a barrier ("#pragma omp barrier" or similar) before each call to
|
|
likwid_markerStartRegion
|
|
@param regionTag [in] Store data using this string
|
|
@return Error code of start operation
|
|
*/
|
|
extern int likwid_markerStartRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop a measurement region
|
|
|
|
Reads the values of all configured counters and saves the results under the
|
|
name given in regionTag. The measurement data of the stopped region gets summed
|
|
up in global region counters. Must be called on every thread that is to be
|
|
measured, e.g. if the code to be measured is run in a parallel region, this
|
|
function must also be called in a parallel region (typically the same parallel
|
|
region as the measured code). If this function is called multiple times in one
|
|
parallel region, place a barrier ("#pragma omp barrier" or similar) after each
|
|
call to likwid_markerStopRegion
|
|
@param regionTag [in] Store data using this string
|
|
@return Error code of stop operation
|
|
*/
|
|
extern int likwid_markerStopRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Reset a measurement region
|
|
|
|
Reset the values of all configured counters and timers.
|
|
@param regionTag [in] Reset data using this string
|
|
@return Error code of reset operation
|
|
*/
|
|
extern int likwid_markerResetRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get accumulated data of a code region
|
|
|
|
Get the accumulated data of the current thread for the given regionTag.
|
|
@param regionTag [in] Print data using this string
|
|
@param nr_events [in,out] Length of events array
|
|
@param events [out] Events array for the intermediate results
|
|
@param time [out] Accumulated measurement time
|
|
@param count [out] Call count of the code region
|
|
*/
|
|
extern void likwid_markerGetRegion(const char* regionTag, int* nr_events, double* events, double *time, int *count) __attribute__ ((visibility ("default") ));
|
|
/* utility routines */
|
|
/*! \brief Get CPU ID of the current process/thread
|
|
|
|
Returns the ID of the CPU the current process or thread is running on.
|
|
@return current CPU ID
|
|
*/
|
|
extern int likwid_getProcessorId() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Pin the current process to given CPU
|
|
|
|
Pin the current process to the given CPU ID. The process cannot be scheduled to
|
|
another CPU after pinning but the pinning can be changed anytime with this function.
|
|
@param [in] processorId CPU ID to pin the current process to
|
|
@return error code (1 for success, 0 for error)
|
|
*/
|
|
extern int likwid_pinProcess(int processorId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Pin the current thread to given CPU
|
|
|
|
Pin the current thread to the given CPU ID. The thread cannot be scheduled to
|
|
another CPU after pinning but the pinning can be changed anytime with this function
|
|
@param [in] processorId CPU ID to pin the current thread to
|
|
@return error code (1 for success, 0 for error)
|
|
*/
|
|
extern int likwid_pinThread(int processorId) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Access client related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup Access Access module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Enum for the access modes
|
|
|
|
LIKWID supports multiple access modes to the MSR and PCI performance monitoring
|
|
registers. For direct access the user must have enough priviledges to access the
|
|
MSR and PCI devices. The daemon mode forwards the operations to a daemon with
|
|
higher priviledges.
|
|
*/
|
|
typedef enum {
|
|
ACCESSMODE_PERF = -1, /*!< \brief Access performance monitoring through perf_event kernel interface */
|
|
ACCESSMODE_DIRECT = 0, /*!< \brief Access performance monitoring registers directly */
|
|
ACCESSMODE_DAEMON = 1 /*!< \brief Use the access daemon to access the registers */
|
|
} AccessMode;
|
|
|
|
/*! \brief Set access mode
|
|
|
|
Sets the mode how the MSR and PCI registers should be accessed. 0 for direct access (propably root priviledges required) and 1 for accesses through the access daemon. It must be called before HPMinit()
|
|
@param [in] mode (0=direct, 1=daemon)
|
|
*/
|
|
extern void HPMmode(int mode) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Initialize access module
|
|
|
|
Initialize the module internals to either the MSR/PCI files or the access daemon
|
|
@return error code (0 for access)
|
|
*/
|
|
extern int HPMinit() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Add CPU to access module
|
|
|
|
Add the given CPU to the access module. This opens the commnunication to either the MSR/PCI files or the access daemon.
|
|
@param [in] cpu_id CPU that should be enabled for measurements
|
|
@return error code (0 for success, -ENODEV if access cannot be initialized
|
|
*/
|
|
extern int HPMaddThread(int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Close connections
|
|
|
|
Close the connections to the MSR/PCI files or the access daemon
|
|
*/
|
|
extern void HPMfinalize() __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Config file related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup Config Config file module
|
|
* @{
|
|
*/
|
|
/*! \brief Structure holding values of the configuration file
|
|
|
|
LIKWID supports the definition of runtime values in a configuration file. The
|
|
most important configurations in most cases are the path the access daemon and
|
|
the corresponding access mode. In order to avoid reading in the system topology
|
|
at each start, a path to a topology file can be set. The other values are mostly
|
|
used internally.
|
|
*/
|
|
typedef struct {
|
|
char* configFileName; /*!< \brief Path to the configuration file */
|
|
char* topologyCfgFileName; /*!< \brief Path to the topology file */
|
|
char* daemonPath; /*!< \brief Path of the access daemon */
|
|
char* groupPath; /*!< \brief Path of default performance group directory */
|
|
AccessMode daemonMode; /*!< \brief Access mode to the MSR and PCI registers */
|
|
int maxNumThreads; /*!< \brief Maximum number of HW threads */
|
|
int maxNumNodes; /*!< \brief Maximum number of NUMA nodes */
|
|
} Likwid_Configuration;
|
|
|
|
/** \brief Pointer for exporting the Configuration data structure */
|
|
typedef Likwid_Configuration* Configuration_t;
|
|
/*! \brief Read the config file of LIKWID, if it exists
|
|
|
|
Search for LIKWID config file and read the values in
|
|
Currently the paths /usr/local/etc/likwid.cfg, /etc/likwid.cfg and the path
|
|
defined in config.mk are checked.
|
|
@return error code (0 for success, -EFAULT if no file can be found)
|
|
*/
|
|
extern int init_configuration(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Destroy the config structure
|
|
|
|
Destroys the current config structure and frees all allocated memory for path names
|
|
@return error code (0 for success, -EFAULT if config structure not initialized)
|
|
*/
|
|
extern int destroy_configuration(void) __attribute__ ((visibility ("default") ));
|
|
|
|
|
|
/*! \brief Retrieve the config structure
|
|
|
|
Get the initialized configuration
|
|
\sa Configuration_t
|
|
@return Configuration_t (pointer to internal Configuration structure)
|
|
*/
|
|
extern Configuration_t get_configuration(void) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set group path in the config struction
|
|
|
|
Set group path in the config struction. The path must be a directory.
|
|
@param [in] path
|
|
@return error code (0 for success, -ENOMEM if reallocation failed, -ENOTDIR if no directoy)
|
|
*/
|
|
extern int config_setGroupPath(const char* path) __attribute__ ((visibility ("default") ));
|
|
|
|
/** @}*/
|
|
/*
|
|
################################################################################
|
|
# CPU topology related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup CPUTopology CPU information module
|
|
* @{
|
|
*/
|
|
/*! \brief Structure with general CPU information
|
|
|
|
General information covers CPU family, model, name and current clock and vendor
|
|
specific information like the version of Intel's performance monitoring facility.
|
|
*/
|
|
typedef struct {
|
|
uint32_t family; /*!< \brief CPU family ID*/
|
|
uint32_t model; /*!< \brief CPU model ID */
|
|
uint32_t stepping; /*!< \brief Stepping (version) of the CPU */
|
|
uint32_t vendor; /*!< \brief Vendor of the CPU */
|
|
uint32_t part; /*!< \brief Part number of the CPU */
|
|
uint64_t clock; /*!< \brief Current clock frequency of the executing CPU*/
|
|
int turbo; /*!< \brief Flag if CPU has a turbo mode */
|
|
char* osname; /*!< \brief Name of the CPU reported by OS */
|
|
char* name; /*!< \brief Name of the CPU as identified by LIKWID */
|
|
char* short_name; /*!< \brief Short name of the CPU*/
|
|
char* features; /*!< \brief String with all features supported by the CPU*/
|
|
int isIntel; /*!< \brief Flag if it is an Intel CPU*/
|
|
char architecture[20]; /*!< \brief name of the architecture like x86_64 or ppc64 (comparable with uname -m)*/
|
|
int supportUncore; /*!< \brief Flag if system has Uncore performance monitors */
|
|
int supportClientmem; /*!< \brief Flag if system has mappable memory controllers */
|
|
uint64_t featureFlags; /*!< \brief Mask of all features supported by the CPU*/
|
|
uint32_t perf_version; /*!< \brief Version of Intel's performance monitoring facility */
|
|
uint32_t perf_num_ctr; /*!< \brief Number of general purpose HWthread-local performance monitoring counters */
|
|
uint32_t perf_width_ctr; /*!< \brief Bit width of fixed and general purpose counters */
|
|
uint32_t perf_num_fixed_ctr; /*!< \brief Number of fixed purpose HWthread-local performance monitoring counters */
|
|
} CpuInfo;
|
|
|
|
/*! \brief Structure with IDs of a HW thread
|
|
|
|
For each HW thread this structure stores the ID of the thread inside a CPU, the
|
|
CPU core ID of the HW thread and the CPU socket ID.
|
|
\extends CpuTopology
|
|
*/
|
|
typedef struct {
|
|
uint32_t threadId; /*!< \brief ID of HW thread inside the CPU core */
|
|
uint32_t coreId; /*!< \brief ID of CPU core that executes the HW thread */
|
|
uint32_t packageId; /*!< \brief ID of CPU socket containing the HW thread */
|
|
uint32_t apicId; /*!< \brief ID of HW thread retrieved through the Advanced Programmable Interrupt Controller */
|
|
uint32_t inCpuSet; /*!< \brief Flag if HW thread is inside the CPUset */
|
|
} HWThread;
|
|
|
|
/*! \brief Enum of possible caches
|
|
|
|
CPU caches can have different tasks and hold different kind of data. This enum lists all shapes used in all supported CPUs
|
|
\extends CacheLevel
|
|
*/
|
|
typedef enum {
|
|
NOCACHE=0, /*!< \brief No cache used as undef value */
|
|
DATACACHE, /*!< \brief Cache holding data cache lines */
|
|
INSTRUCTIONCACHE, /*!< \brief Cache holding instruction cache lines */
|
|
UNIFIEDCACHE, /*!< \brief Cache holding both instruction and data cache lines */
|
|
ITLB, /*!< \brief Translation Lookaside Buffer cache for instruction pages */
|
|
DTLB /*!< \brief Translation Lookaside Buffer cache for data pages */
|
|
} CacheType;
|
|
|
|
/*! \brief Structure describing a cache level
|
|
|
|
CPUs are connected to a cache hierarchy with different amount of caches at each level. The CacheLevel structure holds general information about the cache.
|
|
\extends CpuTopology
|
|
*/
|
|
typedef struct {
|
|
uint32_t level; /*!< \brief Level of the cache in the hierarchy */
|
|
CacheType type; /*!< \brief Type of the cache */
|
|
uint32_t associativity; /*!< \brief Amount of cache lines hold by each set */
|
|
uint32_t sets; /*!< \brief Amount of sets */
|
|
uint32_t lineSize; /*!< \brief Size in bytes of one cache line */
|
|
uint32_t size; /*!< \brief Size in bytes of the cache */
|
|
uint32_t threads; /*!< \brief Number of HW thread connected to the cache */
|
|
uint32_t inclusive; /*!< \brief Flag if cache is inclusive (holds also cache lines available in caches nearer to the CPU) or exclusive */
|
|
} CacheLevel;
|
|
|
|
/*! \brief Structure describing the topology of the HW threads in the system
|
|
|
|
This structure describes the topology at HW thread level like the amount of HW threads, how they are distributed over the CPU sockets/packages and how the caching hierarchy is assembled.
|
|
*/
|
|
typedef struct {
|
|
uint32_t numHWThreads; /*!< \brief Amount of active HW threads in the system (e.g. in cpuset) */
|
|
uint32_t activeHWThreads; /*!< \brief Amount of HW threads in the system and length of \a threadPool */
|
|
uint32_t numSockets; /*!< \brief Amount of CPU sockets/packages in the system */
|
|
uint32_t numCoresPerSocket; /*!< \brief Amount of physical cores in one CPU socket/package */
|
|
uint32_t numThreadsPerCore; /*!< \brief Amount of HW threads in one physical CPU core */
|
|
uint32_t numCacheLevels; /*!< \brief Amount of caches for each HW thread and length of \a cacheLevels */
|
|
HWThread* threadPool; /*!< \brief List of all HW thread descriptions */
|
|
CacheLevel* cacheLevels; /*!< \brief List of all caches in the hierarchy */
|
|
struct treeNode* topologyTree; /*!< \brief Anchor for a tree structure describing the system topology */
|
|
} CpuTopology;
|
|
|
|
/*! \brief Variable holding the global cpu information structure */
|
|
extern CpuInfo cpuid_info;
|
|
/*! \brief Variable holding the global cpu topology structure */
|
|
extern CpuTopology cpuid_topology;
|
|
|
|
/** \brief Pointer for exporting the CpuInfo data structure */
|
|
typedef CpuInfo* CpuInfo_t;
|
|
/** \brief Pointer for exporting the CpuTopology data structure */
|
|
typedef CpuTopology* CpuTopology_t;
|
|
/*! \brief Initialize topology information
|
|
|
|
CpuInfo_t and CpuTopology_t are initialized by either HWLOC, CPUID/ProcFS or topology file if present. The topology file name can be configured in the configuration file. Furthermore, the paths /etc/likwid_topo.cfg and <PREFIX>/etc/likwid_topo.cfg are checked.
|
|
\sa CpuInfo_t and CpuTopology_t
|
|
@return always 0
|
|
*/
|
|
extern int topology_init(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve CPU topology of the current machine
|
|
|
|
\sa CpuTopology_t
|
|
@return CpuTopology_t (pointer to internal cpuid_topology structure)
|
|
*/
|
|
extern CpuTopology_t get_cpuTopology(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve CPU information of the current machine
|
|
|
|
Get the previously initialized CPU info structure containing number of CPUs/Threads
|
|
\sa CpuInfo_t
|
|
@return CpuInfo_t (pointer to internal cpuid_info structure)
|
|
*/
|
|
extern CpuInfo_t get_cpuInfo(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Destroy topology structures CpuInfo_t and CpuTopology_t.
|
|
|
|
Retrieved pointers to the structures are not valid anymore after this function call
|
|
\sa CpuInfo_t and CpuTopology_t
|
|
*/
|
|
extern void topology_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Print all supported architectures
|
|
*/
|
|
extern void print_supportedCPUs(void) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
/*
|
|
################################################################################
|
|
# NUMA related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup NumaTopology NUMA memory topology module
|
|
* @{
|
|
*/
|
|
/*! \brief CPUs in NUMA node and general information about a NUMA domain
|
|
|
|
The NumaNode structure describes the topology and holds general information of a
|
|
NUMA node. The structure is filled by calling numa_init() by either the HWLOC
|
|
library or by evaluating the /proc filesystem.
|
|
\extends NumaTopology
|
|
*/
|
|
typedef struct {
|
|
uint32_t id; /*!< \brief ID of the NUMA node */
|
|
uint64_t totalMemory; /*!< \brief Amount of memory in the NUMA node */
|
|
uint64_t freeMemory; /*!< \brief Amount of free memory in the NUMA node */
|
|
uint32_t numberOfProcessors; /*!< \brief umber of processors covered by the NUMA node and length of \a processors */
|
|
uint32_t* processors; /*!< \brief List of HW threads in the NUMA node */
|
|
uint32_t numberOfDistances; /*!< \brief Amount of distances to the other NUMA nodes in the system and self */
|
|
uint32_t* distances; /*!< \brief List of distances to the other NUMA nodes and self */
|
|
} NumaNode;
|
|
|
|
|
|
/*! \brief The NumaTopology structure describes all NUMA nodes in the current system.
|
|
*/
|
|
typedef struct {
|
|
uint32_t numberOfNodes; /*!< \brief Number of NUMA nodes in the system and length of \a nodes */
|
|
NumaNode* nodes; /*!< \brief List of NUMA nodes */
|
|
} NumaTopology;
|
|
|
|
/*! \brief Variable holding the global NUMA information structure */
|
|
extern NumaTopology numa_info;
|
|
|
|
/** \brief Pointer for exporting the NumaTopology data structure */
|
|
typedef NumaTopology* NumaTopology_t;
|
|
|
|
/*! \brief Initialize NUMA information
|
|
|
|
Initialize NUMA information NumaTopology_t using either HWLOC or CPUID/ProcFS. If
|
|
a topology config file is present it is read at topology_init() and fills \a NumaTopology_t
|
|
\sa NumaTopology_t
|
|
@return error code (0 for success, -1 if initialization failed)
|
|
*/
|
|
extern int numa_init(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve NUMA information of the current machine
|
|
|
|
Get the previously initialized NUMA info structure
|
|
\sa NumaTopology_t
|
|
@return NumaTopology_t (pointer to internal numa_info structure)
|
|
*/
|
|
extern NumaTopology_t get_numaTopology(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set memory allocation policy to interleaved
|
|
|
|
Set the memory allocation policy to interleaved for given list of CPUs
|
|
@param [in] processorList List of processors
|
|
@param [in] numberOfProcessors Length of processor list
|
|
*/
|
|
extern void numa_setInterleaved(const int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Allocate memory from a specific specific NUMA node
|
|
@param [in,out] ptr Start pointer of memory
|
|
@param [in] size Size for the allocation
|
|
@param [in] domainId ID of NUMA node for the allocation
|
|
*/
|
|
extern void numa_membind(void* ptr, size_t size, int domainId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set memory allocation policy to membind
|
|
|
|
Set the memory allocation policy to membind for given list of CPUs. This forces
|
|
allocation to be placed in NUMA domains spanning the given processor list.
|
|
@param [in] processorList List of processors
|
|
@param [in] numberOfProcessors Length of processor list
|
|
*/
|
|
extern void numa_setMembind(const int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Destroy NUMA information structure
|
|
|
|
Destroys the NUMA information structure NumaTopology_t. Retrieved pointers
|
|
to the structures are not valid anymore after this function call
|
|
\sa NumaTopology_t
|
|
*/
|
|
extern void numa_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve the number of NUMA nodes
|
|
|
|
Returns the number of NUMA nodes of the current machine. Can also be read out of
|
|
NumaTopology_t
|
|
\sa NumaTopology_t
|
|
@return Number of NUMA nodes
|
|
*/
|
|
extern int likwid_getNumberOfNodes(void) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
/*
|
|
################################################################################
|
|
# Affinity domains related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup AffinityDomains Thread affinity module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief The AffinityDomain data structure describes a single domain in the current system
|
|
|
|
The AffinityDomain data structure describes a single domain in the current system. Example domains are NUMA nodes, CPU sockets/packages or LLC (Last Level Cache) cache domains.
|
|
\extends AffinityDomains
|
|
*/
|
|
typedef struct {
|
|
bstring tag; /*!< \brief Bstring with the ID for the affinity domain. Currently possible values: N (node), SX (socket/package X), CX (LLC cache domain X) and MX (memory domain X) */
|
|
uint32_t numberOfProcessors; /*!< \brief Number of HW threads in the domain and length of \a processorList */
|
|
uint32_t numberOfCores; /*!< \brief Number of hardware threads in the domain */
|
|
int* processorList; /*!< \brief List of HW thread IDs in the domain */
|
|
} AffinityDomain;
|
|
|
|
/*! \brief The AffinityDomains data structure holds different count variables describing the
|
|
various system layers
|
|
|
|
Affinity domains are for example the amount of NUMA domains, CPU sockets/packages or LLC
|
|
(Last Level Cache) cache domains of the current machine. Moreover a list of
|
|
\a domains holds the processor lists for each domain that are used for
|
|
scheduling processes to domain specific HW threads. Some amounts are duplicates
|
|
or derivation of values in \a CpuInfo, \a CpuTopology and \a NumaTopology.
|
|
*/
|
|
typedef struct {
|
|
uint32_t numberOfSocketDomains; /*!< \brief Number of CPU sockets/packages in the system */
|
|
uint32_t numberOfNumaDomains; /*!< \brief Number of NUMA nodes in the system */
|
|
uint32_t numberOfProcessorsPerSocket; /*!< \brief Number of HW threads per socket/package in the system */
|
|
uint32_t numberOfCacheDomains; /*!< \brief Number of LLC caches in the system */
|
|
uint32_t numberOfCoresPerCache; /*!< \brief Number of CPU cores per LLC cache in the system */
|
|
uint32_t numberOfProcessorsPerCache; /*!< \brief Number of hardware threads per LLC cache in the system */
|
|
uint32_t numberOfAffinityDomains; /*!< \brief Number of affinity domains in the current system and length of \a domains array */
|
|
AffinityDomain* domains; /*!< \brief List of all domains in the system */
|
|
} AffinityDomains;
|
|
|
|
/** \brief Pointer for exporting the AffinityDomains data structure */
|
|
typedef AffinityDomains* AffinityDomains_t;
|
|
|
|
/*! \brief Initialize affinity information
|
|
|
|
Initialize affinity information AffinityDomains_t using the data of the structures
|
|
\a CpuInfo_t, CpuTopology_t and NumaTopology_t
|
|
\sa AffinityDomains_t
|
|
*/
|
|
extern void affinity_init() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve affinity structure
|
|
|
|
Get the previously initialized affinity info structure
|
|
\sa AffinityDomains_t
|
|
@return AffinityDomains_t (pointer to internal affinityDomains structure)
|
|
*/
|
|
extern AffinityDomains_t get_affinityDomains(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Pin process to a CPU
|
|
|
|
Pin process to a CPU. Duplicate of likwid_pinProcess()
|
|
@param [in] processorId CPU ID for pinning
|
|
*/
|
|
extern void affinity_pinProcess(int processorId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Pin processes to a CPU
|
|
|
|
Pin processes to a CPU. Creates a cpuset with the given processor IDs
|
|
@param [in] cpu_count Number of processors in processorIds
|
|
@param [in] processorIds Array of processor IDs
|
|
*/
|
|
extern void affinity_pinProcesses(int cpu_count, const int* processorIds) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Pin thread to a CPU
|
|
|
|
Pin thread to a CPU. Duplicate of likwid_pinThread()
|
|
@param [in] processorId CPU ID for pinning
|
|
*/
|
|
extern void affinity_pinThread(int processorId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the CPU ID where the current process runs.
|
|
|
|
@return CPU ID
|
|
*/
|
|
extern int affinity_processGetProcessorId() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the CPU ID where the current thread runs.
|
|
|
|
@return CPU ID
|
|
*/
|
|
extern int affinity_threadGetProcessorId() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Destroy affinity information structure
|
|
|
|
Destroys the affinity information structure AffinityDomains_t. Retrieved pointers
|
|
to the structures are not valid anymore after this function call
|
|
\sa AffinityDomains_t
|
|
*/
|
|
extern void affinity_finalize() __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# CPU string parsing related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup CPUParse CPU string parser module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Read CPU selection string and resolve to available CPU numbers
|
|
|
|
Reads the CPU selection string and fills the given list with the CPU numbers
|
|
defined in the selection string. This function is a interface function for the
|
|
different selection modes: scatter, expression, logical and physical.
|
|
@param [in] cpustring Selection string
|
|
@param [in,out] cpulist List of CPUs
|
|
@param [in] length Length of cpulist
|
|
@return error code (>0 on success for the returned list length, -ERRORCODE on failure)
|
|
*/
|
|
extern int cpustr_to_cpulist(const char* cpustring, int* cpulist, int length) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read NUMA node selection string and resolve to available NUMA node numbers
|
|
|
|
Reads the NUMA node selection string and fills the given list with the NUMA node numbers
|
|
defined in the selection string.
|
|
@param [in] nodestr Selection string
|
|
@param [out] nodes List of available NUMA nodes
|
|
@param [in] length Length of NUMA node list
|
|
@return error code (>0 on success for the returned list length, -ERRORCODE on failure)
|
|
*/
|
|
extern int nodestr_to_nodelist(const char* nodestr, int* nodes, int length) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read CPU socket selection string and resolve to available CPU socket numbers
|
|
|
|
Reads the CPU socket selection string and fills the given list with the CPU socket numbers
|
|
defined in the selection string.
|
|
@param [in] sockstr Selection string
|
|
@param [out] sockets List of available CPU sockets
|
|
@param [in] length Length of CPU socket list
|
|
@return error code (>0 on success for the returned list length, -ERRORCODE on failure)
|
|
*/
|
|
extern int sockstr_to_socklist(const char* sockstr, int* sockets, int length) __attribute__ ((visibility ("default") ));
|
|
|
|
#ifdef LIKWID_WITH_NVMON
|
|
/*! \brief Read GPU selection string and resolve to available GPUs numbers
|
|
|
|
Reads the GPU selection string and fills the given list with the GPU numbers defined in the selection string.
|
|
@param [in] gpustr Selection string
|
|
@param [out] gpulist List of available GPU
|
|
@param [in] length Length of GPU list
|
|
@return error code (>0 on success for the returned list length, -ERRORCODE on failure)
|
|
*/
|
|
extern int gpustr_to_gpulist(const char* gpustr, int* gpulist, int length) __attribute__ ((visibility ("default") ));
|
|
|
|
#endif /* LIKWID_WITH_NVMON */
|
|
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Performance monitoring related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup PerfMon Performance monitoring module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Get all groups
|
|
|
|
Checks the configured performance group path for the current architecture and
|
|
returns all found group names
|
|
@return Amount of found performance groups
|
|
*/
|
|
extern int perfmon_getGroups(char*** groups, char*** shortinfos, char*** longinfos) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Free all group information
|
|
|
|
@param [in] nrgroups Number of groups
|
|
@param [in] groups List of group names
|
|
@param [in] shortinfos List of short information string about group
|
|
@param [in] longinfos List of long information string about group
|
|
*/
|
|
extern void perfmon_returnGroups(int nrgroups, char** groups, char** shortinfos, char** longinfos) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Initialize performance monitoring facility
|
|
|
|
Initialize the performance monitoring feature by creating basic data structures.
|
|
The CPU ids for the threadsToCpu list can be found in cpuTopology->threadPool[thread_id]->apicId.
|
|
The access mode must already be set when calling perfmon_init().
|
|
\sa HPMmode() function and CpuTopology structure with HWThread list
|
|
|
|
@param [in] nrThreads Amount of threads
|
|
@param [in] threadsToCpu List of CPUs
|
|
@return error code (0 on success, -ERRORCODE on failure)
|
|
*/
|
|
extern int perfmon_init(int nrThreads, const int* threadsToCpu) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Initialize performance monitoring maps
|
|
|
|
Initialize the performance monitoring maps for counters, events and Uncore boxes
|
|
for the current architecture. topology_init() and numa_init() must be called before calling
|
|
perfmon_init_maps()
|
|
\sa RegisterMap list, PerfmonEvent list and BoxMap list
|
|
*/
|
|
extern void perfmon_init_maps(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Check the performance monitoring maps whether counters and events are available
|
|
|
|
Checks each counter and event in the performance monitoring maps for their availibility on
|
|
the current system. topology_init(), numa_init() and perfmon_init_maps() must be called before calling
|
|
perfmon_check_counter_map().
|
|
\sa RegisterMap list, PerfmonEvent list and BoxMap list
|
|
*/
|
|
extern void perfmon_check_counter_map(int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Add an event string to LIKWID
|
|
|
|
A event string looks like Eventname:Countername(:Option1:Option2:...),...
|
|
The eventname, countername and options are checked if they are available.
|
|
@param [in] eventCString Event string
|
|
@return Returns the ID of the new eventSet
|
|
*/
|
|
extern int perfmon_addEventSet(const char* eventCString) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Setup all performance monitoring counters of an eventSet
|
|
|
|
@param [in] groupId (returned from perfmon_addEventSet()
|
|
@return error code (-ENOENT if groupId is invalid and -1 if the counters of one CPU cannot be set up)
|
|
*/
|
|
extern int perfmon_setupCounters(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start performance monitoring counters
|
|
|
|
Start the counters that have been previously set up by perfmon_setupCounters().
|
|
The counter registered are zeroed before enabling the counters
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_startCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop performance monitoring counters
|
|
|
|
Stop the counters that have been previously started by perfmon_startCounters().
|
|
This function reads the counters, so afterwards the results are availble through
|
|
perfmon_getResult, perfmon_getLastResult, perfmon_getMetric and perfmon_getLastMetric.
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_stopCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the performance monitoring counters on all CPUs
|
|
|
|
Read the counters that have been previously started by perfmon_startCounters().
|
|
The counters are stopped directly to avoid interference of LIKWID with the measured
|
|
code. Before returning, the counters are started again.
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_readCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the performance monitoring counters on one CPU
|
|
|
|
Read the counters that have been previously started by perfmon_startCounters().
|
|
The counters are stopped directly to avoid interference of LIKWID with the measured
|
|
code. Before returning, the counters are started again. Only one CPU is read.
|
|
@param [in] cpu_id CPU ID of the CPU that should be read
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_readCountersCpu(int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the performance monitoring counters of all threads in a group
|
|
|
|
Read the counters that have been previously started by perfmon_startCounters().
|
|
The counters are stopped directly to avoid interference of LIKWID with the measured
|
|
code. Before returning, the counters are started again.
|
|
@param [in] groupId Read the counters for all threads taking part in group
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_readGroupCounters(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the performance monitoring counters of on thread in a group
|
|
|
|
Read the counters that have been previously started by perfmon_startCounters().
|
|
The counters are stopped directly to avoid interference of LIKWID with the measured
|
|
code. Before returning, the counters are started again. Only one thread's CPU is read.
|
|
@param [in] groupId Read the counters defined in group identified with groupId
|
|
@param [in] threadId Read the counters for the thread
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_readGroupThreadCounters(int groupId, int threadId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Switch the active eventSet to a new one
|
|
|
|
Stops the currently running counters, switches the eventSet by setting up the
|
|
counters and start the counters.
|
|
@param [in] new_group ID of group that should be switched to.
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
extern int perfmon_switchActiveGroup(int new_group) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Close the perfomance monitoring facility of LIKWID
|
|
|
|
Deallocates all internal data that is used during performance monitoring. Also
|
|
the counter values are not accessible after this function.
|
|
*/
|
|
extern void perfmon_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the results of the specified group, counter and thread
|
|
|
|
Get the result of all measurement cycles. The function takes care of happened
|
|
overflows and if the counter values need to be calculated with multipliers.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be read
|
|
@param [in] threadId ID of the thread/cpu that should be read
|
|
@return The counter result
|
|
*/
|
|
extern double perfmon_getResult(int groupId, int eventId, int threadId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the last results of the specified group, counter and thread
|
|
|
|
Get the result of the last measurement cycle. The function takes care of happened
|
|
overflows and if the counter values need to be calculated with multipliers.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be read
|
|
@param [in] threadId ID of the thread/cpu that should be read
|
|
@return The counter result
|
|
*/
|
|
extern double perfmon_getLastResult(int groupId, int eventId, int threadId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric result of the specified group, counter and thread
|
|
|
|
Get the metric result of all measurement cycles. It reads all raw results for the given groupId and threadId.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@param [in] threadId ID of the thread/cpu that should be read
|
|
@return The metric result
|
|
*/
|
|
extern double perfmon_getMetric(int groupId, int metricId, int threadId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the last metric result of the specified group, counter and thread
|
|
|
|
Get the metric result of the last measurement cycle. It reads all raw results for the given groupId and threadId.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@param [in] threadId ID of the thread/cpu that should be read
|
|
@return The metric result
|
|
*/
|
|
extern double perfmon_getLastMetric(int groupId, int metricId, int threadId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the number of configured event groups
|
|
|
|
@return Number of groups
|
|
*/
|
|
extern int perfmon_getNumberOfGroups(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of configured eventSets in group
|
|
|
|
@param [in] groupId ID of group
|
|
@return Number of eventSets
|
|
*/
|
|
extern int perfmon_getNumberOfEvents(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the accumulated measurement time a group
|
|
|
|
@param [in] groupId ID of group
|
|
@return Time in seconds the event group was measured
|
|
*/
|
|
extern double perfmon_getTimeOfGroup(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the ID of the currently set up event group
|
|
|
|
@return Number of active group
|
|
*/
|
|
extern int perfmon_getIdOfActiveGroup(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of threads specified at perfmon_init()
|
|
|
|
@return Number of threads
|
|
*/
|
|
extern int perfmon_getNumberOfThreads(void) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set verbosity of LIKWID library
|
|
|
|
*/
|
|
extern void perfmon_setVerbosity(int verbose) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the event name of the specified group and event
|
|
|
|
Get the metric name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be returned
|
|
@return The event name or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getEventName(int groupId, int eventId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the counter name of the specified group and event
|
|
|
|
Get the counter name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event of which the counter should be returned
|
|
@return The counter name or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getCounterName(int groupId, int eventId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the name group
|
|
|
|
Get the name of group. Either it is the name of the performance group or "Custom"
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The group name or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getGroupName(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric name of the specified group and metric
|
|
|
|
Get the metric name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@return The metric name or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getMetricName(int groupId, int metricId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the short informational string of the specified group
|
|
|
|
Returns the short information string as defined by performance groups or "Custom"
|
|
in case of custom event sets
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The short information or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getGroupInfoShort(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the long descriptive string of the specified group
|
|
|
|
Returns the long descriptive string as defined by performance groups or NULL
|
|
in case of custom event sets
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The long description or NULL in case of failure
|
|
*/
|
|
extern char* perfmon_getGroupInfoLong(int groupId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the number of configured metrics for group
|
|
|
|
@param [in] groupId ID of group
|
|
@return Number of metrics
|
|
*/
|
|
extern int perfmon_getNumberOfMetrics(int groupId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the last measurement time a group
|
|
|
|
@param [in] groupId ID of group
|
|
@return Time in seconds the event group was measured the last time
|
|
*/
|
|
extern double perfmon_getLastTimeOfGroup(int groupId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Read the output file of the Marker API
|
|
@param [in] filename Filename with Marker API results
|
|
@return 0 or negative error number
|
|
*/
|
|
extern int perfmon_readMarkerFile(const char* filename) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Free space for read in Marker API file
|
|
*/
|
|
extern void perfmon_destroyMarkerResults() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of regions listed in Marker API result file
|
|
|
|
@return Number of regions
|
|
*/
|
|
extern int perfmon_getNumberOfRegions() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the groupID of a region
|
|
|
|
@param [in] region ID of region
|
|
@return Group ID of region
|
|
*/
|
|
extern int perfmon_getGroupOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the tag of a region
|
|
@param [in] region ID of region
|
|
@return tag of region
|
|
*/
|
|
extern char* perfmon_getTagOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of events of a region
|
|
@param [in] region ID of region
|
|
@return Number of events of region
|
|
*/
|
|
extern int perfmon_getEventsOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of metrics of a region
|
|
@param [in] region ID of region
|
|
@return Number of metrics of region
|
|
*/
|
|
extern int perfmon_getMetricsOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of threads of a region
|
|
@param [in] region ID of region
|
|
@return Number of threads of region
|
|
*/
|
|
extern int perfmon_getThreadsOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the cpulist of a region
|
|
@param [in] region ID of region
|
|
@param [in] count Length of cpulist array
|
|
@param [in,out] cpulist cpulist array
|
|
@return Number of threads of region or count, whatever is lower
|
|
*/
|
|
extern int perfmon_getCpulistOfRegion(int region, int count, int* cpulist) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the accumulated measurement time of a region for a thread
|
|
@param [in] region ID of region
|
|
@param [in] thread ID of thread
|
|
@return Measurement time of a region for a thread
|
|
*/
|
|
extern double perfmon_getTimeOfRegion(int region, int thread) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the call count of a region for a thread
|
|
@param [in] region ID of region
|
|
@param [in] thread ID of thread
|
|
@return Call count of a region for a thread
|
|
*/
|
|
extern int perfmon_getCountOfRegion(int region, int thread) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the event result of a region for an event and thread
|
|
@param [in] region ID of region
|
|
@param [in] event ID of event
|
|
@param [in] thread ID of thread
|
|
@return Result of a region for an event and thread
|
|
*/
|
|
extern double perfmon_getResultOfRegionThread(int region, int event, int thread) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric result of a region for a metric and thread
|
|
@param [in] region ID of region
|
|
@param [in] metricId ID of metric
|
|
@param [in] threadId ID of thread
|
|
@return Metric result of a region for a thread
|
|
*/
|
|
extern double perfmon_getMetricOfRegionThread(int region, int metricId, int threadId) __attribute__ ((visibility ("default") ));
|
|
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Performance group related functions
|
|
################################################################################
|
|
*/
|
|
|
|
/** \addtogroup PerfGroup performance group module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief The groupInfo data structure describes a performance group
|
|
|
|
Groups can be either be read in from file or be a group with custom event set. For
|
|
performance groups commonly all values are set. For groups with custom event set,
|
|
the fields groupname and shortinfo are set to 'Custom', longinfo is NULL and in
|
|
general the nmetrics value is 0.
|
|
*/
|
|
typedef struct {
|
|
char* groupname; /*!< \brief Name of the group: performance group name or 'Custom' */
|
|
char* shortinfo; /*!< \brief Short info string for the group or 'Custom' */
|
|
int nevents; /*!< \brief Number of event/counter combinations */
|
|
char** events; /*!< \brief List of events */
|
|
char** counters; /*!< \brief List of counter registers */
|
|
int nmetrics; /*!< \brief Number of metrics */
|
|
char** metricnames; /*!< \brief Metric names */
|
|
char** metricformulas; /*!< \brief Metric formulas */
|
|
char* longinfo; /*!< \brief Descriptive text about the group or empty */
|
|
} GroupInfo;
|
|
|
|
/*! \brief Initialize values in GroupInfo struct
|
|
|
|
Initialize values in GroupInfo struct. The function does NOT allocate the GroupInfo struct
|
|
*/
|
|
int perfgroup_new(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Add a counter and event combination to the group
|
|
|
|
Add a counter and event combination to the group.
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] counter String with counter name
|
|
@param [in] event String with event name
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_addEvent(GroupInfo* ginfo, char* counter, char* event) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Remove a counter and event combination from a group
|
|
|
|
Remove a counter and event combination from a group
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] counter String with counter name
|
|
*/
|
|
void perfgroup_removeEvent(GroupInfo* ginfo, char* counter) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Add a metric to the group
|
|
|
|
Add a metric to the group
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] mname String with metric name/description
|
|
@param [in] mcalc String with metric formula. No spaces in string.
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_addMetric(GroupInfo* ginfo, char* mname, char* mcalc) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Remove a metric from a group
|
|
|
|
Remove a metric from a group
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] mname String with metric name/description
|
|
*/
|
|
void perfgroup_removeMetric(GroupInfo* ginfo, char* mname) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the event string of a group needed for perfmon_addEventSet
|
|
|
|
Get the event string of a group needed for perfmon_addEventSet
|
|
@param [in] ginfo GroupInfo struct
|
|
@return String with eventset or NULL
|
|
*/
|
|
char* perfgroup_getEventStr(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the eventset string of a group
|
|
|
|
Return the event string of a group
|
|
@param [in] eventStr Eventset string
|
|
*/
|
|
void perfgroup_returnEventStr(char* eventStr) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the group name of a group
|
|
|
|
Get the group name of a group
|
|
@param [in] ginfo GroupInfo struct
|
|
@return String with group name or NULL
|
|
*/
|
|
char* perfgroup_getGroupName(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set the group name of a group
|
|
|
|
Set the group name of a group. String must be zero-terminated
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] groupName String with group name
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_setGroupName(GroupInfo* ginfo, char* groupName) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the group name string of a group
|
|
|
|
Return the group name string of a group
|
|
@param [in] gname Group name string
|
|
*/
|
|
void perfgroup_returnGroupName(char* gname) __attribute__ ((visibility ("default") ));
|
|
|
|
|
|
/*! \brief Set the short information string of a group
|
|
|
|
Set the short information string of a group. String must be zero-terminated
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] shortInfo String with short information
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_setShortInfo(GroupInfo* ginfo, char* shortInfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the short information string of a group
|
|
|
|
Get the short information string of a group
|
|
@param [in] ginfo GroupInfo struct
|
|
@return String with short information or NULL
|
|
*/
|
|
char* perfgroup_getShortInfo(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the short information string of a group
|
|
|
|
Return the short information string of a group
|
|
@param [in] sinfo Short information string
|
|
*/
|
|
void perfgroup_returnShortInfo(char* sinfo) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set the long information string of a group
|
|
|
|
Set the long information string of a group. String must be zero-terminated
|
|
@param [in] ginfo GroupInfo struct
|
|
@param [in] longInfo String with long information
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_setLongInfo(GroupInfo* ginfo, char* longInfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the long information string of a group
|
|
|
|
Get the long information string of a group
|
|
@param [in] ginfo GroupInfo struct
|
|
@return String with long information or NULL
|
|
*/
|
|
char* perfgroup_getLongInfo(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the long information string of a group
|
|
|
|
Return the long information string of a group
|
|
@param [in] linfo Long information string
|
|
*/
|
|
void perfgroup_returnLongInfo(char* linfo) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Merge two groups
|
|
|
|
Merge two groups (group2 into group1).
|
|
@param [in,out] grp1 Group1
|
|
@param [in] grp2 Group2
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_mergeGroups(GroupInfo* grp1, GroupInfo* grp2) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Read group from file
|
|
|
|
Read group from file
|
|
@param [in] grouppath Base path to all groups
|
|
@param [in] architecture Architecture string (e.g. short_info in cpuid_info)
|
|
@param [in] groupname Group name
|
|
@param [in,out] ginfo Group filled with data from file
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_readGroup(const char* grouppath, const char* architecture, const char* groupname, GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Create group from event string
|
|
|
|
Create group from event string (list of event:counter(:opts)).
|
|
@param [in] eventStr event string
|
|
@param [in,out] ginfo Group filled with data from event string
|
|
@return 0 for success, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_customGroup(const char* eventStr, GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Return group
|
|
|
|
Return group (frees internal lists)
|
|
@param [in] ginfo Performance group info
|
|
*/
|
|
void perfgroup_returnGroup(GroupInfo* ginfo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get all groups available in the system (base + user home)
|
|
|
|
Get all groups available in the system (base + user home)
|
|
@param [in] grouppath Base path to all groups
|
|
@param [in] architecture Architecture string (e.g. short_info in cpuid_info)
|
|
@param [out] groupnames List of group names
|
|
@param [out] groupshort List of groups' short information string
|
|
@param [out] grouplong List of groups' long information string
|
|
@return number of groups, -EINVAL or -ENOMEM in case of error.
|
|
*/
|
|
int perfgroup_getGroups( const char* grouppath, const char* architecture, char*** groupnames, char*** groupshort, char*** grouplong) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return list of all groups
|
|
|
|
Return list of all groups
|
|
@param [in] groups Number of groups
|
|
@param [in] groupnames List of group names
|
|
@param [in] groupshort List of groups' short information string
|
|
@param [in] grouplong List of groups' long information string
|
|
*/
|
|
void perfgroup_returnGroups(int groups, char** groupnames, char** groupshort, char** grouplong) __attribute__ ((visibility ("default") ));
|
|
|
|
|
|
|
|
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Time measurements related functions
|
|
################################################################################
|
|
*/
|
|
|
|
/** \addtogroup TimerMon Time measurement module
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Struct defining the start and stop time of a time interval
|
|
\extends TimerData
|
|
*/
|
|
typedef union
|
|
{
|
|
uint64_t int64; /*!< \brief Cycle count in 64 bit */
|
|
struct {uint32_t lo, hi;} int32; /*!< \brief Cycle count stored in two 32 bit fields */
|
|
} TscCounter;
|
|
|
|
/*! \brief Struct defining the start and stop time of a time interval
|
|
*/
|
|
typedef struct {
|
|
TscCounter start; /*!< \brief Cycles at start */
|
|
TscCounter stop; /*!< \brief Cycles at stop */
|
|
} TimerData;
|
|
|
|
/*! \brief Initialize timer by retrieving baseline frequency and cpu clock
|
|
*/
|
|
extern void timer_init( void ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the measured interval in seconds
|
|
|
|
@param [in] time Structure holding the cycle count at start and stop
|
|
@return Time in seconds
|
|
*/
|
|
extern double timer_print( const TimerData* time) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the measured interval in cycles
|
|
|
|
@param [in] time Structure holding the cycle count at start and stop
|
|
@return Time in cycles
|
|
*/
|
|
extern uint64_t timer_printCycles( const TimerData* time) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Reset values in TimerData
|
|
|
|
@param [in] time Structure holding the cycle count at start and stop
|
|
*/
|
|
extern void timer_reset( TimerData* time ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the CPU clock determined at timer_init
|
|
|
|
@return CPU clock
|
|
*/
|
|
extern uint64_t timer_getCpuClock( void ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the current CPU clock read from sysfs
|
|
|
|
@return CPU clock
|
|
*/
|
|
extern uint64_t timer_getCpuClockCurrent( int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the cycles clock determined at timer_init
|
|
|
|
@return cycle clock
|
|
*/
|
|
extern uint64_t timer_getCycleClock( void ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Return the baseline CPU clock determined at timer_init
|
|
|
|
@return Baseline CPU clock
|
|
*/
|
|
extern uint64_t timer_getBaseline( void ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start time measurement
|
|
|
|
@param [in,out] time Structure holding the cycle count at start
|
|
*/
|
|
extern void timer_start( TimerData* time ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop time measurement
|
|
|
|
@param [in,out] time Structure holding the cycle count at stop
|
|
*/
|
|
extern void timer_stop ( TimerData* time) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Sleep for specified usecs
|
|
|
|
@param [in] usec Amount of usecs to sleep
|
|
*/
|
|
extern int timer_sleep(unsigned long usec) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Finalize timer module
|
|
|
|
*/
|
|
extern void timer_finalize(void) __attribute__ ((visibility ("default") ));
|
|
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Power measurements related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup PowerMon Power and Energy monitoring module
|
|
* @{
|
|
*/
|
|
|
|
/*!
|
|
\def NUM_POWER_DOMAINS
|
|
Amount of currently supported RAPL domains
|
|
*/
|
|
#define NUM_POWER_DOMAINS 5
|
|
/*! \brief List of all RAPL domain names
|
|
*/
|
|
extern const char* power_names[NUM_POWER_DOMAINS] __attribute__ ((visibility ("default") ));
|
|
|
|
/*!
|
|
\def POWER_DOMAIN_SUPPORT_STATUS
|
|
Flag to check in PowerDomain's supportFlag if the status msr registers are available
|
|
*/
|
|
#define POWER_DOMAIN_SUPPORT_STATUS (1ULL<<0)
|
|
/*!
|
|
\def POWER_DOMAIN_SUPPORT_LIMIT
|
|
Flag to check in PowerDomain's supportFlag if the limit msr registers are available
|
|
*/
|
|
#define POWER_DOMAIN_SUPPORT_LIMIT (1ULL<<1)
|
|
/*!
|
|
\def POWER_DOMAIN_SUPPORT_POLICY
|
|
Flag to check in PowerDomain's supportFlag if the policy msr registers are available
|
|
*/
|
|
#define POWER_DOMAIN_SUPPORT_POLICY (1ULL<<2)
|
|
/*!
|
|
\def POWER_DOMAIN_SUPPORT_PERF
|
|
Flag to check in PowerDomain's supportFlag if the perf msr registers are available
|
|
*/
|
|
#define POWER_DOMAIN_SUPPORT_PERF (1ULL<<3)
|
|
/*!
|
|
\def POWER_DOMAIN_SUPPORT_INFO
|
|
Flag to check in PowerDomain's supportFlag if the info msr registers are available
|
|
*/
|
|
#define POWER_DOMAIN_SUPPORT_INFO (1ULL<<4)
|
|
|
|
|
|
/*! \brief Information structure of CPU's turbo mode
|
|
\extends PowerInfo
|
|
*/
|
|
typedef struct {
|
|
int numSteps; /*!< \brief Amount of turbo mode steps/frequencies */
|
|
double* steps; /*!< \brief List of turbo mode steps */
|
|
} TurboBoost;
|
|
|
|
/*! \brief Enum for all supported RAPL domains
|
|
\extends PowerDomain
|
|
*/
|
|
typedef enum {
|
|
PKG = 0, /*!< \brief PKG domain, mostly one CPU socket/package */
|
|
PP0 = 1, /*!< \brief PP0 domain, not clearly defined by Intel */
|
|
PP1 = 2, /*!< \brief PP1 domain, not clearly defined by Intel */
|
|
DRAM = 3, /*!< \brief DRAM domain, the memory modules */
|
|
PLATFORM = 4 /*!< \brief PLATFORM domain, the whole system (if powered through the main board) */
|
|
} PowerType;
|
|
|
|
/*! \brief Structure describing an RAPL power domain
|
|
\extends PowerInfo
|
|
*/
|
|
typedef struct {
|
|
PowerType type; /*!< \brief Identifier which RAPL domain is managed by this struct */
|
|
uint32_t supportFlags; /*!< \brief Bitmask which features are supported by the power domain */
|
|
double energyUnit; /*!< \brief Multiplier for energy measurements */
|
|
double tdp; /*!< \brief Thermal Design Power (maximum amount of heat generated by the CPU) */
|
|
double minPower; /*!< \brief Minimal power consumption of the CPU */
|
|
double maxPower; /*!< \brief Maximal power consumption of the CPU */
|
|
double maxTimeWindow; /*!< \brief Minimal power measurement interval */
|
|
} PowerDomain;
|
|
|
|
/*! \brief Information structure of CPU's power measurement facility
|
|
*/
|
|
typedef struct {
|
|
double baseFrequency; /*!< \brief Base frequency of the CPU */
|
|
double minFrequency; /*!< \brief Minimal frequency of the CPU */
|
|
TurboBoost turbo; /*!< \brief Turbo boost information */
|
|
int hasRAPL; /*!< \brief RAPL support flag */
|
|
double powerUnit; /*!< \brief Multiplier for power measurements */
|
|
double timeUnit; /*!< \brief Multiplier for time information */
|
|
double uncoreMinFreq; /*!< \brief Minimal uncore frequency */
|
|
double uncoreMaxFreq; /*!< \brief Maximal uncore frequency */
|
|
uint8_t perfBias; /*!< \brief Performance energy bias */
|
|
PowerDomain domains[NUM_POWER_DOMAINS]; /*!< \brief List of power domains */
|
|
} PowerInfo;
|
|
|
|
/*! \brief Power measurement data for start/stop measurements
|
|
*/
|
|
typedef struct {
|
|
int domain; /*!< \brief RAPL domain identifier */
|
|
uint32_t before; /*!< \brief Counter state at start */
|
|
uint32_t after; /*!< \brief Counter state at stop */
|
|
} PowerData;
|
|
|
|
/*! \brief Variable holding the global power information structure */
|
|
extern PowerInfo power_info;
|
|
|
|
/** \brief Pointer for exporting the PowerInfo data structure */
|
|
typedef PowerInfo* PowerInfo_t;
|
|
/** \brief Pointer for exporting the PowerData data structure */
|
|
typedef PowerData* PowerData_t;
|
|
|
|
/*! \brief Initialize energy measurements on specific CPU
|
|
|
|
Additionally, it reads basic information about the energy measurements like
|
|
minimal measurement time.
|
|
@param [in] cpuId Initialize energy facility for this CPU
|
|
@return RAPL status (0=No RAPL, 1=RAPL working)
|
|
*/
|
|
extern int power_init(int cpuId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get a pointer to the energy facility information
|
|
|
|
@return PowerInfo_t pointer
|
|
\sa PowerInfo_t
|
|
*/
|
|
extern PowerInfo_t get_powerInfo(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the current power value
|
|
|
|
@param [in] cpuId Read energy facility for this CPU
|
|
@param [in] reg Energy register
|
|
@param [out] data Energy data
|
|
*/
|
|
extern int power_read(int cpuId, uint64_t reg, uint32_t *data) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the current energy value using a specific communication socket
|
|
|
|
@param [in] socket_fd Communication socket for the read operation
|
|
@param [in] cpuId Read energy facility for this CPU
|
|
@param [in] reg Energy register
|
|
@param [out] data Energy data
|
|
*/
|
|
extern int power_tread(int socket_fd, int cpuId, uint64_t reg, uint32_t *data) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start energy measurements
|
|
|
|
@param [in,out] data Data structure holding start and stop values for energy measurements
|
|
@param [in] cpuId Start energy facility for this CPU
|
|
@param [in] type Which type should be measured
|
|
@return error code
|
|
*/
|
|
extern int power_start(PowerData_t data, int cpuId, PowerType type) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop energy measurements
|
|
|
|
@param [in,out] data Data structure holding start and stop values for energy measurements
|
|
@param [in] cpuId Start energy facility for this CPU
|
|
@param [in] type Which type should be measured
|
|
@return error code
|
|
*/
|
|
extern int power_stop(PowerData_t data, int cpuId, PowerType type) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Print energy measurements gathered by power_start() and power_stop()
|
|
|
|
@param [in] data Data structure holding start and stop values for energy measurements
|
|
@return Consumed energy in Joules
|
|
*/
|
|
extern double power_printEnergy(const PowerData* data) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get energy Unit
|
|
|
|
@param [in] domain RAPL domain ID
|
|
@return Energy unit of the given RAPL domain
|
|
*/
|
|
extern double power_getEnergyUnit(int domain) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the values of the limit register of a domain
|
|
NOT IMPLEMENTED
|
|
|
|
@param [in] cpuId CPU ID
|
|
@param [in] domain RAPL domain ID
|
|
@param [out] power Energy limit
|
|
@param [out] time Time limit
|
|
@return error code
|
|
*/
|
|
int power_limitGet(int cpuId, PowerType domain, double* power, double* time) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set the values of the limit register of a domain
|
|
NOT IMPLEMENTED
|
|
|
|
@param [in] cpuId CPU ID
|
|
@param [in] domain RAPL domain ID
|
|
@param [in] power Energy limit
|
|
@param [in] time Time limit
|
|
@param [in] doClamping Activate clamping (going below OS-requested power level)
|
|
@return error code
|
|
*/
|
|
int power_limitSet(int cpuId, PowerType domain, double power, double time, int doClamping) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the state of a energy limit, activated or deactivated
|
|
NOT IMPLEMENTED
|
|
|
|
@param [in] cpuId CPU ID
|
|
@param [in] domain RAPL domain ID
|
|
@return state, 1 for active, 0 for inactive
|
|
*/
|
|
int power_limitState(int cpuId, PowerType domain) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Free space of power_unit
|
|
*/
|
|
extern void power_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Thermal measurements related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup ThermalMon Thermal monitoring module
|
|
* @{
|
|
*/
|
|
/*! \brief Initialize thermal measurements on specific CPU
|
|
|
|
@param [in] cpuId Initialize thermal facility for this CPU
|
|
*/
|
|
extern void thermal_init(int cpuId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the current thermal value
|
|
|
|
@param [in] cpuId Read thermal facility for this CPU
|
|
@param [out] data Thermal data
|
|
*/
|
|
extern int thermal_read(int cpuId, uint32_t *data) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the current thermal value using a specific communication socket
|
|
|
|
@param [in] socket_fd Communication socket for the read operation
|
|
@param [in] cpuId Read thermal facility for this CPU
|
|
@param [out] data Thermal data
|
|
*/
|
|
extern int thermal_tread(int socket_fd, int cpuId, uint32_t *data) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
|
|
/*
|
|
################################################################################
|
|
# Memory sweeping related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup MemSweep Memory sweeping module
|
|
* @{
|
|
*/
|
|
/*! \brief Sweeping the memory of a NUMA node
|
|
|
|
Sweeps (zeros) the memory of NUMA node with ID \a domainId
|
|
@param [in] domainId NUMA node ID
|
|
*/
|
|
extern void memsweep_domain(int domainId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Sweeping the memory of all NUMA nodes covered by CPU list
|
|
|
|
Sweeps (zeros) the memory of all NUMA nodes containing the CPUs in \a processorList
|
|
@param [in] processorList List of CPU IDs
|
|
@param [in] numberOfProcessors Number of CPUs in list
|
|
*/
|
|
extern void memsweep_threadGroup(const int* processorList, int numberOfProcessors) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# CPU feature related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup CpuFeatures Retrieval and manipulation of processor features
|
|
* @{
|
|
*/
|
|
/*! \brief Enumeration of all CPU related features.
|
|
*/
|
|
typedef enum {
|
|
FEAT_HW_PREFETCHER=0, /*!< \brief Hardware prefetcher */
|
|
FEAT_CL_PREFETCHER, /*!< \brief Adjacent cache line prefetcher */
|
|
FEAT_DCU_PREFETCHER, /*!< \brief DCU L1 data cache prefetcher */
|
|
FEAT_IP_PREFETCHER, /*!< \brief IP L1 data cache prefetcher */
|
|
FEAT_FAST_STRINGS, /*!< \brief Fast-strings feature */
|
|
FEAT_THERMAL_CONTROL, /*!< \brief Automatic Thermal Control Circuit */
|
|
FEAT_PERF_MON, /*!< \brief Hardware performance monitoring */
|
|
FEAT_FERR_MULTIPLEX, /*!< \brief FERR# Multiplexing, must be 1 for XAPIC interrupt model */
|
|
FEAT_BRANCH_TRACE_STORAGE, /*!< \brief Branch Trace Storage */
|
|
FEAT_XTPR_MESSAGE, /*!< \brief xTPR Message to set processor priority */
|
|
FEAT_PEBS, /*!< \brief Precise Event Based Sampling (PEBS) */
|
|
FEAT_SPEEDSTEP, /*!< \brief Enhanced Intel SpeedStep Technology to reduce energy consumption*/
|
|
FEAT_MONITOR, /*!< \brief MONITOR/MWAIT feature to monitor write-back stores*/
|
|
FEAT_SPEEDSTEP_LOCK, /*!< \brief Enhanced Intel SpeedStep Technology Select Lock */
|
|
FEAT_CPUID_MAX_VAL, /*!< \brief Limit CPUID Maxval */
|
|
FEAT_XD_BIT, /*!< \brief Execute Disable Bit */
|
|
FEAT_DYN_ACCEL, /*!< \brief Intel Dynamic Acceleration */
|
|
FEAT_TURBO_MODE, /*!< \brief Intel Turbo Mode */
|
|
FEAT_TM2, /*!< \brief Thermal Monitoring 2 */
|
|
CPUFEATURES_MAX
|
|
} CpuFeature;
|
|
|
|
/*! \brief Initialize the internal feature variables for all CPUs
|
|
|
|
Initialize the internal feature variables for all CPUs
|
|
*/
|
|
extern void cpuFeatures_init() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Print state of all CPU features for a given CPU
|
|
|
|
Print state of all CPU features for a given CPU
|
|
@param [in] cpu CPU ID
|
|
*/
|
|
extern void cpuFeatures_print(int cpu) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get state of a CPU feature for a given CPU
|
|
|
|
Get state of a CPU feature for a given CPU
|
|
@param [in] cpu CPU ID
|
|
@param [in] type CPU feature
|
|
@return State of CPU feature (1=enabled, 0=disabled)
|
|
*/
|
|
extern int cpuFeatures_get(int cpu, CpuFeature type) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the name of a CPU feature
|
|
|
|
Get the name of a CPU feature
|
|
@param [in] type CPU feature
|
|
@return Name of the CPU feature or NULL if feature is not available
|
|
*/
|
|
extern char* cpuFeatures_name(CpuFeature type) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Enable a CPU feature for a specific CPU
|
|
|
|
Enable a CPU feature for a specific CPU. Only the state of the prefetchers can be changed, all other features return -EINVAL
|
|
@param [in] cpu CPU ID
|
|
@param [in] type CPU feature
|
|
@param [in] print Print outcome of operation
|
|
@return Status of operation (0=success, all others are erros, either by MSR access or invalid feature)
|
|
*/
|
|
extern int cpuFeatures_enable(int cpu, CpuFeature type, int print) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Disable a CPU feature for a specific CPU
|
|
|
|
Disable a CPU feature for a specific CPU. Only the state of the prefetchers can be changed, all other features return -EINVAL
|
|
@param [in] cpu CPU ID
|
|
@param [in] type CPU feature
|
|
@param [in] print Print outcome of operation
|
|
@return Status of operation (0=success, all others are erros, either by MSR access or invalid feature)
|
|
*/
|
|
extern int cpuFeatures_disable(int cpu, CpuFeature type, int print) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
|
|
/*
|
|
################################################################################
|
|
# CPU frequency related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup CpuFreq Retrieval and manipulation of processor clock frequencies
|
|
* @{
|
|
*/
|
|
/*! \brief Initialize cpu frequency module
|
|
|
|
Initialize cpu frequency module
|
|
@return returns 0 if successfull and 1 if invalid accessmode
|
|
*/
|
|
extern int freq_init(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the base clock frequency of a hardware thread
|
|
|
|
Get the base clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
uint64_t freq_getCpuClockBase(const int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the current clock frequency of a hardware thread
|
|
|
|
Get the current clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_getCpuClockCurrent(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the maximal clock frequency of a hardware thread
|
|
|
|
Get the maximal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_getCpuClockMax(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the maximal available clock frequency of a hardware thread
|
|
|
|
Get the maximal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_getConfCpuClockMax(const int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set the maximal clock frequency of a hardware thread
|
|
|
|
Set the maximal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@param [in] freq Frequency in kHz
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_setCpuClockMax(const int cpu_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the minimal clock frequency of a hardware thread
|
|
|
|
Get the minimal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_getCpuClockMin(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the minimal available clock frequency of a hardware thread
|
|
|
|
Get the minimal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_getConfCpuClockMin(const int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set the minimal clock frequency of a hardware thread
|
|
|
|
Set the minimal clock frequency of a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@param [in] freq Frequency in kHz
|
|
@return Frequency or 0 in case of errors
|
|
*/
|
|
extern uint64_t freq_setCpuClockMin(const int cpu_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
|
|
/*! \brief De/Activate turbo mode for a hardware thread
|
|
|
|
De/Activate turbo mode for a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@param [in] turbo (0=off, 1=on)
|
|
@return 1 or 0 in case of errors
|
|
*/
|
|
extern int freq_setTurbo(const int cpu_id, int turbo) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get state of turbo mode for a hardware thread
|
|
|
|
Get state of turbo mode for a hardware thread
|
|
@param [in] cpu_id CPU ID
|
|
@return 1=Turbo active or 0=Turbo inactive
|
|
*/
|
|
extern int freq_getTurbo(const int cpu_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the frequency governor of a hardware thread
|
|
|
|
Get the frequency governor of a hardware thread. The returned string must be freed by the caller.
|
|
@param [in] cpu_id CPU ID
|
|
@return Governor or NULL in case of errors
|
|
*/
|
|
extern char * freq_getGovernor(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set the frequency governor of a hardware thread
|
|
|
|
Set the frequency governor of a hardware thread.
|
|
@param [in] cpu_id CPU ID
|
|
@param [in] gov Governor
|
|
@return 1 or 0 in case of errors
|
|
*/
|
|
extern int freq_setGovernor(const int cpu_id, const char* gov) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the available frequencies of a hardware thread
|
|
|
|
Get the available frequencies of a hardware thread. The returned string must be freed by the caller.
|
|
@param [in] cpu_id CPU ID
|
|
@return String with available frequencies or NULL in case of errors
|
|
*/
|
|
extern char * freq_getAvailFreq(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the available frequency governors of a hardware thread
|
|
|
|
Get the available frequency governors of a hardware thread. The returned string must be freed by the caller.
|
|
@param [in] cpu_id CPU ID
|
|
@return String with available frequency governors or NULL in case of errors
|
|
*/
|
|
extern char * freq_getAvailGovs(const int cpu_id ) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set the minimal Uncore frequency
|
|
|
|
Set the minimal Uncore frequency. Since the ranges are not documented, valid frequencies are from minimal CPU clock to maximal Turbo clock. If selecting a frequency at the borders, please check the result with the UNCORE_CLOCK event to be effective.
|
|
@param [in] socket_id ID of socket
|
|
@param [in] freq Frequency in MHz
|
|
@return 0 for success, -ERROR at failure
|
|
*/
|
|
extern int freq_setUncoreFreqMin(const int socket_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the minimal Uncore frequency
|
|
|
|
Get the minimal Uncore frequency.
|
|
@param [in] socket_id ID of socket
|
|
@return frequency in MHz or 0 at failure
|
|
*/
|
|
extern uint64_t freq_getUncoreFreqMin(const int socket_id) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Set the maximal Uncore frequency
|
|
|
|
Set the maximal Uncore frequency. Since the ranges are not documented, valid frequencies are from minimal CPU clock to maximal Turbo clock. If selecting a frequency at the borders, please check the result with the UNCORE_CLOCK event to be effective.
|
|
@param [in] socket_id ID of socket
|
|
@param [in] freq Frequency in MHz
|
|
@return 0 for success, -ERROR at failure
|
|
*/
|
|
extern int freq_setUncoreFreqMax(const int socket_id, const uint64_t freq) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the maximal Uncore frequency
|
|
|
|
Get the maximal Uncore frequency.
|
|
@param [in] socket_id ID of socket
|
|
@return frequency in MHz or 0 at failure
|
|
*/
|
|
extern uint64_t freq_getUncoreFreqMax(const int socket_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the current Uncore frequency
|
|
|
|
Get the current Uncore frequency.
|
|
@param [in] socket_id ID of socket
|
|
@return frequency in MHz or 0 at failure
|
|
*/
|
|
extern uint64_t freq_getUncoreFreqCur(const int socket_id) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Finalize cpu frequency module
|
|
|
|
Finalize cpu frequency module
|
|
*/
|
|
extern void freq_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/** @}*/
|
|
|
|
|
|
/*
|
|
################################################################################
|
|
# Performance monitoring for NVIDIA GPUs related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup Nvmon Performance monitoring for NVIDIA GPUs
|
|
* @{
|
|
*/
|
|
|
|
#if defined(LIKWID_WITH_NVMON) || defined(LIKWID_NVMON)
|
|
/*! \brief Structure with general GPU information for each device
|
|
|
|
General information covers GPU devid, name and clock and memory specific information.
|
|
Most information comes from cuDeviceGetProperties() and cuDeviceGetAttribute().
|
|
*/
|
|
typedef struct {
|
|
int devid; /*!< \brief Device ID */
|
|
int numaNode; /*!< \brief Closest NUMA domain to the device */
|
|
char* name; /*!< \brief Name of the device */
|
|
char* short_name; /*!< \brief Short name of the device */
|
|
uint64_t mem; /*!< \brief Total memory of device */
|
|
int ccapMajor; /*!< \brief Major number of device's compute capability */
|
|
int ccapMinor; /*!< \brief Minor number of device's compute capability */
|
|
int maxThreadsPerBlock; /*!< \brief Maximam number of thread per block */
|
|
int maxThreadsDim[3]; /*!< \brief Maximum sizes of each dimension of a block */
|
|
int maxGridSize[3]; /*!< \brief Maximum sizes of each dimension of a grid */
|
|
int sharedMemPerBlock; /*!< \brief Total amount of shared memory available per block */
|
|
int totalConstantMemory; /*!< \brief Total amount of constant memory available on the device */
|
|
int simdWidth; /*!< \brief SIMD width of arithmetic units = warp size */
|
|
int memPitch; /*!< \brief Maximum pitch allowed by the memory copy functions that involve memory regions allocated through cuMemAllocPitch() */
|
|
int regsPerBlock; /*!< \brief Total number of registers available per block */
|
|
int clockRatekHz; /*!< \brief Clock frequency in kilohertz */
|
|
int textureAlign; /*!< \brief Alignment requirement */
|
|
int surfaceAlign; /*!< \brief Alignment requirement for surfaces */
|
|
int l2Size; /*!< \brief L2 cache in bytes. 0 if the device doesn't have L2 cache */
|
|
int memClockRatekHz; /*!< \brief Peak memory clock frequency in kilohertz */
|
|
int pciBus; /*!< \brief PCI bus identifier of the device */
|
|
int pciDev; /*!< \brief PCI device (also known as slot) identifier of the device */
|
|
int pciDom; /*!< \brief PCI domain identifier of the device */
|
|
int maxBlockRegs; /*!< \brief Maximum number of 32-bit registers available to a thread block */
|
|
int numMultiProcs; /*!< \brief Number of multiprocessors on the device */
|
|
int maxThreadPerMultiProc; /*!< \brief Maximum resident threads per multiprocessor */
|
|
int memBusWidth; /*!< \brief Global memory bus width in bits */
|
|
int unifiedAddrSpace; /*!< \brief 1 if the device shares a unified address space with the host, or 0 if not */
|
|
int ecc; /*!< \brief 1 if error correction is enabled on the device, 0 if error correction is disabled or not supported by the device */
|
|
int asyncEngines; /*!< \brief Number of asynchronous engines */
|
|
int mapHostMem; /*!< \brief 1 if the device can map host memory into the CUDA address space */
|
|
int integrated; /*!< \brief 1 if the device is an integrated (motherboard) GPU and 0 if it is a discrete (card) component */
|
|
} GpuDevice;
|
|
|
|
|
|
/*! \brief Structure holding information of all GPUs
|
|
|
|
*/
|
|
typedef struct {
|
|
int numDevices; /*!< \brief Number of detected devices */
|
|
GpuDevice* devices; /*!< \brief List with GPU-specific topology information */
|
|
} GpuTopology;
|
|
|
|
/*! \brief Variable holding the global gpu information structure */
|
|
extern GpuTopology gpuTopology;
|
|
/** \brief Pointer for exporting the GpuTopology data structure */
|
|
typedef GpuTopology* GpuTopology_t;
|
|
|
|
|
|
/*! \brief Initialize GPU topology information
|
|
|
|
Reads in the topology information from the CUDA library (if found).
|
|
\sa GpuTopology_t
|
|
@return 0 or -errno in case of error
|
|
*/
|
|
extern int topology_gpu_init(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Destroy GPU topology structure GpuTopology_t
|
|
|
|
Retrieved pointers to the structures are not valid anymore after this function call
|
|
\sa GpuTopology_t
|
|
*/
|
|
extern void topology_gpu_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Retrieve GPU topology of the current machine
|
|
|
|
\sa GpuTopology_t
|
|
@return GpuTopology_t (pointer to internal gpuTopology structure)
|
|
*/
|
|
extern GpuTopology_t get_gpuTopology(void) __attribute__ ((visibility ("default") ));
|
|
|
|
|
|
/*
|
|
################################################################################
|
|
# NvMarker API related functions
|
|
################################################################################
|
|
*/
|
|
/** \addtogroup NvMarkerAPI Marker API module for GPUs
|
|
* @{
|
|
*/
|
|
/*! \brief Initialize NvLIKWID's marker API
|
|
|
|
Must be called in serial region of the application to set up basic data structures
|
|
of LIKWID.
|
|
Reads environment variables:
|
|
- LIKWID_GEVENTS (GPU event string)
|
|
- LIKWID_GPUS (GPU list separated by ,)
|
|
- LIKWID_GPUFILEPATH (Outputpath for NvMarkerAPI file)
|
|
*/
|
|
extern void likwid_gpuMarkerInit(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Select next group to measure
|
|
|
|
Must be called in parallel region of the application to switch group on every CPU.
|
|
*/
|
|
extern void likwid_gpuMarkerNextGroup(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Close LIKWID's NvMarker API
|
|
|
|
Must be called in serial region of the application. It gathers all data of regions and
|
|
writes them out to a file (filepath in env variable LIKWID_FILEPATH).
|
|
*/
|
|
extern void likwid_gpuMarkerClose(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Register a measurement region
|
|
|
|
Initializes the hashTable entry in order to reduce execution time of likwid_gpuMarkerStartRegion()
|
|
@param regionTag [in] Initialize data using this string
|
|
@return Error code
|
|
*/
|
|
extern int likwid_gpuMarkerRegisterRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start a measurement region
|
|
|
|
Reads the values of all configured counters and saves the results under the name given
|
|
in regionTag.
|
|
@param regionTag [in] Store data using this string
|
|
@return Error code of start operation
|
|
*/
|
|
extern int likwid_gpuMarkerStartRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop a measurement region
|
|
|
|
Reads the values of all configured counters and saves the results under the name given
|
|
in regionTag. The measurement data of the stopped region gets summed up in global region counters.
|
|
@param regionTag [in] Store data using this string
|
|
@return Error code of stop operation
|
|
*/
|
|
extern int likwid_gpuMarkerStopRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Reset a measurement region
|
|
|
|
Reset the values of all configured counters and timers.
|
|
@param regionTag [in] Reset data using this string
|
|
@return Error code of reset operation
|
|
*/
|
|
extern int likwid_gpuMarkerResetRegion(const char* regionTag) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get accumulated data of a code region
|
|
|
|
Get the accumulated data of the current thread for the given regionTag.
|
|
@param regionTag [in] Print data using this string
|
|
@param nr_gpus [in,out] Length of first dimension of the arrys. Afterwards the actual count of GPUs.
|
|
@param nr_events [in,out] Length of events array
|
|
@param events [out] Events array for the intermediate results
|
|
@param time [out] Accumulated measurement time
|
|
@param count [out] Call count of the code region
|
|
*/
|
|
extern void likwid_gpuMarkerGetRegion(const char* regionTag, int* nr_gpus, int* nr_events, double** events, double **time, int **count) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Read the output file of the NvMarker API
|
|
@param [in] filename Filename with NvMarker API results
|
|
@return 0 or negative error number
|
|
*/
|
|
int nvmon_readMarkerFile(const char* filename) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Free space for read in NvMarker API file
|
|
*/
|
|
void nvmon_destroyMarkerResults() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of regions listed in NvMarker API result file
|
|
|
|
@return Number of regions
|
|
*/
|
|
int nvmon_getNumberOfRegions() __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of metrics of a region
|
|
@param [in] region ID of region
|
|
@return Number of metrics of region
|
|
*/
|
|
int nvmon_getMetricsOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of GPUs of a region
|
|
@param [in] region ID of region
|
|
@return Number of GPUs of region
|
|
*/
|
|
int nvmon_getGpusOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the GPU list of a region
|
|
@param [in] region ID of region
|
|
@param [in] count Length of gpulist array
|
|
@param [in,out] gpulist gpulist array
|
|
@return Number of GPUs of region or count, whatever is lower
|
|
*/
|
|
int nvmon_getGpulistOfRegion(int region, int count, int* gpulist) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the accumulated measurement time of a region for a GPU
|
|
@param [in] region ID of region
|
|
@param [in] gpu ID of GPU
|
|
@return Measurement time of a region for a GPU
|
|
*/
|
|
double nvmon_getTimeOfRegion(int region, int gpu) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the call count of a region for a GPU
|
|
@param [in] region ID of region
|
|
@param [in] gpu ID of GPU
|
|
@return Call count of a region for a GPU
|
|
*/
|
|
int nvmon_getCountOfRegion(int region, int gpu) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the groupID of a region
|
|
|
|
@param [in] region ID of region
|
|
@return Group ID of region
|
|
*/
|
|
int nvmon_getGroupOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the tag of a region
|
|
@param [in] region ID of region
|
|
@return tag of region
|
|
*/
|
|
char* nvmon_getTagOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of events of a region
|
|
@param [in] region ID of region
|
|
@return Number of events of region
|
|
*/
|
|
int nvmon_getEventsOfRegion(int region) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the event result of a region for an event and GPU
|
|
@param [in] region ID of region
|
|
@param [in] eventId ID of event
|
|
@param [in] gpuId ID of GPU
|
|
@return Result of a region for an event and GPU
|
|
*/
|
|
double nvmon_getResultOfRegionGpu(int region, int eventId, int gpuId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric result of a region for a metric and GPU
|
|
@param [in] region ID of region
|
|
@param [in] metricId ID of metric
|
|
@param [in] gpuId ID of GPU
|
|
@return Metric result of a region for a GPU
|
|
*/
|
|
double nvmon_getMetricOfRegionGpu(int region, int metricId, int gpuId) __attribute__ ((visibility ("default") ));
|
|
|
|
/** @}*/
|
|
|
|
/*
|
|
################################################################################
|
|
# Nvmon related functions (Nvidia GPU monitoring)
|
|
################################################################################
|
|
*/
|
|
|
|
/** \addtogroup Nvmon Nvidia GPU monitoring API module for GPUs
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Element in the output list from nvmon_getEventsOfGpu
|
|
|
|
It holds the name, the description and the limitation string for one event.
|
|
*/
|
|
typedef struct {
|
|
char* name; /*!< \brief Name of the event */
|
|
char* desc; /*!< \brief Description of the event */
|
|
char* limit; /*!< \brief Limitation string of the event, commonly 'GPU' */
|
|
} NvmonEventListEntry;
|
|
|
|
/*! \brief Output list from nvmon_getEventsOfGpu with all supported events
|
|
|
|
Output list from nvmon_getEventsOfGpu with all supported events
|
|
*/
|
|
typedef struct {
|
|
int numEvents; /*!< \brief Number of events */
|
|
NvmonEventListEntry *events; /*!< \brief List of events */
|
|
} NvmonEventList;
|
|
/** \brief Pointer for exporting the NvmonEventList data structure */
|
|
typedef NvmonEventList* NvmonEventList_t;
|
|
|
|
|
|
/*! \brief Get the list of supported event of a GPU
|
|
|
|
@param [in] gpuId ID of GPU (from GPU topology)
|
|
@param [out] list List of events
|
|
@return Number of supported events or -errno
|
|
*/
|
|
int nvmon_getEventsOfGpu(int gpuId, NvmonEventList_t* list);
|
|
/*! \brief Return the list of supported event of a GPU
|
|
|
|
Return the list of supported event of a GPU from nvmon_getEventsOfGpu()
|
|
@param [in] list List of events
|
|
*/
|
|
void nvmon_returnEventsOfGpu(NvmonEventList_t list);
|
|
|
|
|
|
/*! \brief Initialize the Nvidia GPU performance monitoring facility (Nvmon)
|
|
|
|
Initialize the Nvidia GPU performance monitoring feature by creating basic data structures.
|
|
The CUDA and CUPTI library paths need to be in LD_LIBRARY_PATH to be found by dlopen.
|
|
|
|
@param [in] nrGpus Amount of GPUs
|
|
@param [in] gpuIds List of GPUs
|
|
@return error code (0 on success, -ERRORCODE on failure)
|
|
*/
|
|
int nvmon_init(int nrGpus, const int* gpuIds) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Close the Nvidia GPU perfomance monitoring facility of LIKWID (Nvmon)
|
|
|
|
Deallocates all internal data that is used during Nvmon performance monitoring. Also
|
|
the counter values are not accessible anymore after calling this function.
|
|
*/
|
|
void nvmon_finalize(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Add an event string to LIKWID Nvmon
|
|
|
|
A event string looks like Eventname:Countername,...
|
|
The eventname and countername are checked if they are available.
|
|
|
|
@param [in] eventCString Event string
|
|
@return Returns the ID of the new eventSet
|
|
*/
|
|
int nvmon_addEventSet(const char* eventCString) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Setup all Nvmon performance monitoring counters of an eventSet
|
|
|
|
@param [in] gid (returned from perfmon_addEventSet()
|
|
@return error code (-ENOENT if groupId is invalid and -1 if the counters of one CPU cannot be set up)
|
|
*/
|
|
int nvmon_setupCounters(int gid) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Start Nvmon performance monitoring counters
|
|
|
|
Start the counters that have been previously set up by nvmon_setupCounters().
|
|
The counter registered are zeroed before enabling the counters
|
|
@return 0 on success and -(gpuid+1) for error
|
|
*/
|
|
int nvmon_startCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Stop Nvmon performance monitoring counters
|
|
|
|
Stop the counters that have been previously started by nvmon_startCounters().
|
|
@return 0 on success and -(gpuid+1) for error
|
|
*/
|
|
int nvmon_stopCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Read the Nvmon performance monitoring counters on all GPUs
|
|
|
|
Read the counters that have been previously started by nvmon_startCounters().
|
|
@return 0 on success and -(gpuid+1) for error
|
|
*/
|
|
int nvmon_readCounters(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Switch the active eventSet to a new one (Nvmon)
|
|
|
|
Stops the currently running counters, switches the eventSet by setting up the
|
|
counters and start the counters.
|
|
@param [in] new_group ID of group that should be switched to.
|
|
@return 0 on success and -(thread_id+1) for error
|
|
*/
|
|
int nvmon_switchActiveGroup(int new_group) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Set verbosity of LIKWID Nvmon library
|
|
|
|
*/
|
|
void nvmon_setVerbosity(int level) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get the results of the specified group, counter and GPU (Nvmon)
|
|
|
|
Get the result of all measurement cycles.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be read
|
|
@param [in] gpuId ID of the GPU that should be read
|
|
@return The counter result
|
|
*/
|
|
double nvmon_getResult(int groupId, int eventId, int gpuId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the last results of the specified group, counter and GPU (Nvmon)
|
|
|
|
Get the result of the last measurement cycle (between start/stop, start/read, read/read or read/top).
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be read
|
|
@param [in] gpuId ID of the GPU that should be read
|
|
@return The counter result
|
|
*/
|
|
double nvmon_getLastResult(int groupId, int eventId, int gpuId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric result of the specified group, counter and GPU (Nvmon)
|
|
|
|
Get the metric result of all measurement cycles. It reads all raw results for the given groupId and gpuId.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@param [in] gpuId ID of the GPU that should be read
|
|
@return The metric result
|
|
*/
|
|
double nvmon_getMetric(int groupId, int metricId, int gpuId);
|
|
/*! \brief Get the last metric result of the specified group, counter and GPU (Nvmon)
|
|
|
|
Get the metric result of the last measurement cycle. It reads all raw results for the given groupId and gpuId.
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@param [in] gpuId ID of the GPU that should be read
|
|
@return The metric result
|
|
*/
|
|
double nvmon_getLastMetric(int groupId, int metricId, int gpuId);
|
|
/*! \brief Get the number of configured event groups (Nvmon)
|
|
|
|
@return Number of groups
|
|
*/
|
|
int nvmon_getNumberOfGroups(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the ID of the currently set up event group (Nvmon)
|
|
|
|
@return Number of active group
|
|
*/
|
|
int nvmon_getIdOfActiveGroup(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of GPUs specified at nvmon_init() (Nvmon)
|
|
|
|
@return Number of GPUs
|
|
*/
|
|
int nvmon_getNumberOfGPUs(void) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of configured eventSets in group (Nvmon)
|
|
|
|
@param [in] groupId ID of group
|
|
@return Number of eventSets
|
|
*/
|
|
int nvmon_getNumberOfEvents(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the number of configured metrics for group (Nvmon)
|
|
|
|
@param [in] groupId ID of group
|
|
@return Number of metrics
|
|
*/
|
|
int nvmon_getNumberOfMetrics(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the accumulated measurement time a group (Nvmon)
|
|
|
|
@param [in] groupId ID of group
|
|
@return Time in seconds the event group was measured
|
|
*/
|
|
double nvmon_getTimeOfGroup(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the last measurement time a group (Nvmon)
|
|
|
|
@param [in] groupId ID of group
|
|
@return Time in seconds the event group was measured the last time
|
|
*/
|
|
double nvmon_getLastTimeOfGroup(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the event name of the specified group and event (Nvmon)
|
|
|
|
Get the metric name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event that should be returned
|
|
@return The event name or NULL in case of failure
|
|
*/
|
|
char* nvmon_getEventName(int groupId, int eventId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the counter name of the specified group and event (Nvmon)
|
|
|
|
Get the counter name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] eventId ID of the event of which the counter should be returned
|
|
@return The counter name or NULL in case of failure
|
|
*/
|
|
char* nvmon_getCounterName(int groupId, int eventId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the metric name of the specified group and metric (Nvmon)
|
|
|
|
Get the metric name as defined in the performance group file
|
|
@param [in] groupId ID of the group that should be read
|
|
@param [in] metricId ID of the metric that should be calculated
|
|
@return The metric name or NULL in case of failure
|
|
*/
|
|
char* nvmon_getMetricName(int groupId, int metricId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the name group (Nvmon)
|
|
|
|
Get the name of group. Either it is the name of the performance group or "Custom"
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The group name or NULL in case of failure
|
|
*/
|
|
char* nvmon_getGroupName(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the short informational string of the specified group (Nvmon)
|
|
|
|
Returns the short information string as defined by performance groups or "Custom"
|
|
in case of custom event sets
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The short information or NULL in case of failure
|
|
*/
|
|
char* nvmon_getGroupInfoShort(int groupId) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Get the long descriptive string of the specified group (Nvmon)
|
|
|
|
Returns the long descriptive string as defined by performance groups or NULL
|
|
in case of custom event sets
|
|
@param [in] groupId ID of the group that should be read
|
|
@return The long description or NULL in case of failure
|
|
*/
|
|
char* nvmon_getGroupInfoLong(int groupId) __attribute__ ((visibility ("default") ));
|
|
|
|
/*! \brief Get all groups (Nvmon)
|
|
|
|
Checks the configured performance group path for the current GPU and
|
|
returns all found group names
|
|
@param [in] gpuId Get groups for a specific GPU
|
|
@param [out] groups List of group names
|
|
@param [out] shortinfos List of short information string about group
|
|
@param [out] longinfos List of long information string about group
|
|
@return Amount of found performance groups
|
|
*/
|
|
int nvmon_getGroups(int gpuId, char*** groups, char*** shortinfos, char*** longinfos) __attribute__ ((visibility ("default") ));
|
|
/*! \brief Free all group information (Nvmon)
|
|
|
|
@param [in] nrgroups Number of groups
|
|
@param [in] groups List of group names
|
|
@param [in] shortinfos List of short information string about group
|
|
@param [in] longinfos List of long information string about group
|
|
*/
|
|
int nvmon_returnGroups(int nrgroups, char** groups, char** shortinfos, char** longinfos) __attribute__ ((visibility ("default") ));
|
|
|
|
|
|
|
|
/** @}*/
|
|
|
|
#endif /* LIKWID_WITH_NVMON */
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /*LIKWID_H*/
|