Merge branch 'main' into message_processor

This commit is contained in:
Thomas Gruber 2024-12-11 19:02:59 +01:00 committed by GitHub
commit f7c7a52f60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 57 additions and 29 deletions

View File

@ -195,7 +195,7 @@ jobs:
Release:
runs-on: ubuntu-latest
# We need the RPMs, so add dependency
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build]
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-jammy-build]
steps:
# See: https://github.com/actions/download-artifact

View File

@ -88,13 +88,11 @@ jobs:
submodules: recursive
fetch-depth: 0
# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'
- name: RPM build MetricCollector
id: rpmbuild
@ -126,13 +124,11 @@ jobs:
submodules: recursive
fetch-depth: 0
# Use dnf to install build dependencies
- name: Install build dependencies
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'
- name: RPM build MetricCollector
id: rpmbuild
@ -163,12 +159,13 @@ jobs:
submodules: recursive
fetch-depth: 0
# Use official golang package
- name: Install Golang
run: |
wget -q https://go.dev/dl/go1.22.4.linux-amd64.tar.gz --output-document=- | \
tar --directory=/usr/local --extract --gzip
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
go version
# See: https://github.com/marketplace/actions/setup-go-environment
- name: Setup Golang
uses: actions/setup-go@v4
with:
go-version: '1.21'
- name: DEB build MetricCollector
id: dpkg-build
run: |

View File

@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"time"
cclog "github.com/ClusterCockpit/cc-metric-collector/pkg/ccLogger"
lp "github.com/ClusterCockpit/cc-metric-collector/pkg/ccMetric"
)
@ -54,15 +55,30 @@ func (m *IpmiCollector) Init(config json.RawMessage) error {
// Check if executables ipmitool or ipmisensors are found
p, err := exec.LookPath(m.config.IpmitoolPath)
if err == nil {
m.ipmitool = p
command := exec.Command(p)
err := command.Run()
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %v", p, err.Error()))
m.ipmitool = ""
} else {
m.ipmitool = p
}
}
p, err = exec.LookPath(m.config.IpmisensorsPath)
if err == nil {
m.ipmisensors = p
command := exec.Command(p)
err := command.Run()
if err != nil {
cclog.ComponentError(m.name, fmt.Sprintf("Failed to execute %s: %v", p, err.Error()))
m.ipmisensors = ""
} else {
m.ipmisensors = p
}
}
if len(m.ipmitool) == 0 && len(m.ipmisensors) == 0 {
return errors.New("no IPMI reader found")
return errors.New("no usable IPMI reader found")
}
m.init = true
return nil
}
@ -119,8 +135,8 @@ func (m *IpmiCollector) readIpmiTool(cmd string, output chan lp.CCMetric) {
cclog.ComponentError(
m.name,
fmt.Sprintf("readIpmiTool(): Failed to wait for the end of command \"%s\": %v\n", command.String(), err),
fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", string(errMsg)),
)
cclog.ComponentError(m.name, fmt.Sprintf("readIpmiTool(): command stderr: \"%s\"\n", strings.TrimSpace(string(errMsg))))
return
}
}

View File

@ -374,10 +374,21 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
}
defer watcher.Close()
if len(m.config.LockfilePath) > 0 {
// Check if the lock file exists
info, err := os.Stat(m.config.LockfilePath)
if os.IsNotExist(err) {
// Create the lock file if it does not exist
file, createErr := os.Create(m.config.LockfilePath)
if createErr != nil {
return true, fmt.Errorf("failed to create lock file: %v", createErr)
}
file.Close()
info, err = os.Stat(m.config.LockfilePath) // Recheck the file after creation
}
if err != nil {
return true, err
}
// Check file ownership
uid := info.Sys().(*syscall.Stat_t).Uid
if uid != uint32(os.Getuid()) {
usr, err := user.LookupId(fmt.Sprint(uid))
@ -387,6 +398,7 @@ func (m *LikwidCollector) takeMeasurement(evidx int, evset LikwidEventsetConfig,
return true, fmt.Errorf("Access to performance counters locked by %d", uid)
}
}
// Add the lock file to the watcher
err = watcher.Add(m.config.LockfilePath)
if err != nil {
cclog.ComponentError(m.name, err.Error())

View File

@ -15,7 +15,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
{
"events" : {
"COUNTER0": "EVENT0",
"COUNTER1": "EVENT1",
"COUNTER1": "EVENT1"
},
"metrics" : [
{
@ -27,7 +27,7 @@ The `likwid` collector is probably the most complicated collector. The LIKWID li
}
]
}
]
],
"globalmetrics" : [
{
"name": "global_sum",
@ -132,6 +132,9 @@ In some cases LIKWID returns `0.0` for some events that are further used in proc
One might think this does not happen often but often used metrics in the world of performance engineering like Instructions-per-Cycle (IPC) or more frequently the actual CPU clock are derived with events like `CPU_CLK_UNHALTED_CORE` (Intel) which do not increment in halted state (as the name implies). In there are different power management systems in a chip which can cause a hardware thread to go in such a state. Moreover, if no cycles are executed by the core, also many other events are not incremented as well (like `INSTR_RETIRED_ANY` for retired instructions and part of IPC).
### `lockfile_path` option
LIKWID can be configured with a lock file with which the access to the performance monitoring registers can be disabled (only the owner of the lock file is allowed to access the registers). When the `lockfile_path` option is set, the collector subscribes to changes to this file to stop monitoring if the owner of the lock file changes. This feature is useful when users should be able to perform own hardware performance counter measurements through LIKWID or any other tool.
### `send_*_total values` option
- `send_core_total_values`: Metrics, which are usually collected on a per hardware thread basis, are additionally summed up per CPU core.

View File

@ -17,7 +17,7 @@ This folder contains the SinkManager and sink implementations for the cc-metric-
The configuration file for the sinks is a list of configurations. The `type` field in each specifies which sink to initialize.
```json
[
{
"mystdout" : {
"type" : "stdout",
"meta_as_tags" : [
@ -31,7 +31,7 @@ The configuration file for the sinks is a list of configurations. The `type` fie
"database" : "ccmetric",
"password" : "<jwt token>"
}
]
}
```