From 2e0860a0645ee0872070e65e515f364892176aa9 Mon Sep 17 00:00:00 2001 From: Thomas Roehl Date: Tue, 15 Mar 2022 15:43:53 +0100 Subject: [PATCH] Add ccUnits code --- LICENSE | 21 ++++++ README.md | 152 +++++++++++++++++++++++++++++++++++++++ ccUnitMeasure.go | 184 +++++++++++++++++++++++++++++++++++++++++++++++ ccUnitPrefix.go | 144 +++++++++++++++++++++++++++++++++++++ ccUnits.go | 123 +++++++++++++++++++++++++++++++ ccUnits_test.go | 114 +++++++++++++++++++++++++++++ 6 files changed, 738 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 ccUnitMeasure.go create mode 100644 ccUnitPrefix.go create mode 100644 ccUnits.go create mode 100644 ccUnits_test.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b3f9fbd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 ClusterCockpit + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..eea0dee --- /dev/null +++ b/README.md @@ -0,0 +1,152 @@ +# ccUnits - A unit system for ClusterCockpit + +When working with metrics, the problem comes up that they may use different unit name but have the same unit in fact. There are a lot of real world examples like 'kB' and 'Kbyte'. In CC Metric Collector, the Collectors read data from different sources which may use different units or the programmer specifies a unit for a metric by hand. The ccUnits system is not comparable with the SI unit system. If you are looking for a package for the SI units, see [here](https://pkg.go.dev/github.com/gurre/si). + +In order to enable unit comparison and conversion, the ccUnits package provides some helpers: +There are basically two important functions: +```go +NewUnit(unit string) Unit +GetUnitPrefixFactor(in Unit, out Unit) (func(value float64) float64, error) // Get conversion function for the value + +type Unit interface { + Valid() bool + String() string + Short() string + AddDivisorUnit(div Measure) +} +``` + +In order to get the "normalized" string unit back or test for validity, you can use: +```go +u := NewUnit("MB") +fmt.Println(u.Valid()) // true +fmt.Printf("Long string %q", u.String()) // MegaBytes +fmt.Printf("Short string %q", u.Short()) // MBytes +v := NewUnit("foo") +fmt.Println(v.Valid()) // false +``` + +If you have two units and need the conversion function: +```go +u1 := NewUnit("kB") +u2 := NewUnit("MBytes") +convFunc, err := GetUnitPrefixFactor(u1, u2) // Returns an error if the units have different measures +if err == nil { + v2 := convFunc(v1) +} +``` + +(In the ClusterCockpit ecosystem the separation between values and units if useful since they are commonly not stored as a single entity but the value is a field in the CCMetric while unit is a tag or a meta information). + +If you have a metric and want the derivation to a bandwidth or events per second, you can use the original unit: + +```go +in_unit, err := metric.GetMeta("unit") +if err == nil { + value, ok := metric.GetField("value") + if ok { + out_unit = NewUnit(in_unit) + out_unit.AddDivisorUnit("seconds") + seconds := timeDiff.Seconds() + y, err := lp.New(metric.Name()+"_bw", + metric.Tags(), + metric.Meta(), + map[string]interface{"value": value/seconds}, + metric.Time()) + if err == nil { + y.AddMeta("unit", out_unit.Short()) + } + } +} +``` + +## Special unit detection + +Some used measures like Bytes and Flops are non-dividable. Consequently there prefixes like Milli, Micro and Nano are not useful. This is quite handy since a unit `mb` for `MBytes` is not uncommon but would by default be parsed as "MilliBytes". + +Special parsing rules for the following measures: iff `prefix==Milli`, use `prefix==Mega` + - `Bytes` + - `Flops` + - `Packets` + - `Events` + - `Cycles` + - `Requests` + +This means the prefixes `Micro` (like `ubytes`) and `Nano` like (`nflops/sec`) are not allowed and return an invalid unit. But you can specify `mflops` and `mb`. + +Prefixes for `%` or `percent` are ignored. + +## Supported prefixes + +```go +const ( + Base Prefix = iota + Exa = 1e18 + Peta = 1e15 + Tera = 1e12 + Giga = 1e9 + Mega = 1e6 + Kilo = 1e3 + Milli = 1e-3 + Micro = 1e-6 + Nano = 1e-9 + Kibi = 1024 + Mebi = 1024 * 1024 + Gibi = 1024 * 1024 * 1024 + Tebi = 1024 * 1024 * 1024 * 1024 +) +``` + +The prefixes are detected using a regular expression `^([kKmMgGtTpP]?[i]?)(.*)` that splits the prefix from the measure. You probably don't need to deal with the prefixes in the code. + +## Supported measures + +```go +const ( + None Measure = iota + Bytes + Flops + Percentage + TemperatureC + TemperatureF + Rotation + Hertz + Time + Watt + Joule + Cycles + Requests + Packets + Events +) +``` + +There a regular expression for each of the measures like `^([bB][yY]?[tT]?[eE]?[sS]?)` for the `Bytes` measure. + + +## New units + +If the selected units are not suitable for your metric, feel free to send a PR. + +### New prefix + +For a new prefix, add it to the big `const` in `ccUnitPrefix.go` and adjust the prefix-unit-splitting regular expression. Afterwards, you have to add cases to the three functions `String()`, `Prefix()` and `NewPrefix()`. `NewPrefix()` contains the parser (`k` or `K` -> `Kilo`). The other one are used for output. `String()` outputs a longer version of the prefix (`Kilo`), while `Prefix()` returns only the short notation (`K`). + +### New measure + +Adding new prefixes is probably rare but adding a new measure is a more common task. At first, add it to the big `const` in `ccUnitMeasure.go`. Moreover, create a regular expression matching the measure (and pre-compile it like the others). Add the expression matching to `NewMeasure()`. The `String()` and `Short()` functions return descriptive strings for the measure in long form (like `Hertz`) and short form (like `Hz`). + +If there are special conversation rules between measures and you want to convert one measure to another, like temperatures in Celsius to Fahrenheit, a special case in `GetUnitPrefixFactor()` is required. + +### Special parsing rules + +The two parsers for prefix and measure are called under the hood by `NewUnit()` and there might some special rules apply. Like in the above section about 'special unit detection', special rules for your new measure might be required. Currently there are two special cases: + +- Measures that are non-dividable like Flops, Bytes, Events, ... cannot use `Milli`, `Micro` and `Nano`. The prefix `m` is forced to `M` for these measures +- If the prefix is `p`/`P` (`Peta`) or `e`/`E` (`Exa`) and the measure is not detectable, it retries detection with the prefix. So first round it tries, for example, prefix `p` and measure `ackets` which fails, so it retries the detection with measure `packets` and `` prefix (resolves to `Base` prefix). + +## Limitations + +The `ccUnits` package is a simple implemtation of a unit system and comes with some limitations: + +- The unit denominator (like `s` in `Mbyte/s`) can only have the `Base` prefix, you cannot specify `Byte/ms` for "Bytes per milli second". diff --git a/ccUnitMeasure.go b/ccUnitMeasure.go new file mode 100644 index 0000000..de8f7a0 --- /dev/null +++ b/ccUnitMeasure.go @@ -0,0 +1,184 @@ +package ccunits + +import "regexp" + +type Measure int + +const ( + None Measure = iota + Bytes + Flops + Percentage + TemperatureC + TemperatureF + Rotation + Hertz + Time + Watt + Joule + Cycles + Requests + Packets + Events +) + +func (m *Measure) String() string { + switch *m { + case Bytes: + return "Bytes" + case Flops: + return "Flops" + case Percentage: + return "Percent" + case TemperatureC: + return "DegreeC" + case TemperatureF: + return "DegreeF" + case Rotation: + return "RPM" + case Hertz: + return "Hertz" + case Time: + return "Seconds" + case Watt: + return "Watts" + case Joule: + return "Joules" + case Cycles: + return "Cycles" + case Requests: + return "Requests" + case Packets: + return "Packets" + case Events: + return "Events" + default: + return "Unknown" + } +} + +func (m *Measure) Short() string { + switch *m { + case Bytes: + return "Bytes" + case Flops: + return "Flops" + case Percentage: + return "Percent" + case TemperatureC: + return "degC" + case TemperatureF: + return "degF" + case Rotation: + return "RPM" + case Hertz: + return "Hz" + case Time: + return "s" + case Watt: + return "W" + case Joule: + return "J" + case Cycles: + return "cyc" + case Requests: + return "requests" + case Packets: + return "packets" + case Events: + return "events" + default: + return "Unknown" + } +} + +const bytesRegexStr = `^([bB][yY]?[tT]?[eE]?[sS]?)` +const flopsRegexStr = `^([fF][lL]?[oO]?[pP]?[sS]?)` +const percentRegexStr = `^(%|[pP]ercent)` +const degreeCRegexStr = `^(deg[Cc]|°[cC])` +const degreeFRegexStr = `^(deg[fF]|°[fF])` +const rpmRegexStr = `^([rR][pP][mM])` +const hertzRegexStr = `^([hH][eE]?[rR]?[tT]?[zZ])` +const timeRegexStr = `^([sS][eE]?[cC]?[oO]?[nN]?[dD]?[sS]?)` +const wattRegexStr = `^([wW][aA]?[tT]?[tT]?[sS]?)` +const jouleRegexStr = `^([jJ][oO]?[uU]?[lL]?[eE]?[sS]?)` +const cyclesRegexStr = `^([cC][yY][cC]?[lL]?[eE]?[sS]?)` +const requestsRegexStr = `^([rR][eE][qQ][uU]?[eE]?[sS]?[tT]?[sS]?)` +const packetsRegexStr = `^([pP][aA]?[cC]?[kK][eE]?[tT][sS]?)` +const eventsRegexStr = `^([eE][vV]?[eE]?[nN][tT][sS]?)` + +var bytesRegex = regexp.MustCompile(bytesRegexStr) +var flopsRegex = regexp.MustCompile(flopsRegexStr) +var percentRegex = regexp.MustCompile(percentRegexStr) +var degreeCRegex = regexp.MustCompile(degreeCRegexStr) +var degreeFRegex = regexp.MustCompile(degreeFRegexStr) +var rpmRegex = regexp.MustCompile(rpmRegexStr) +var hertzRegex = regexp.MustCompile(hertzRegexStr) +var timeRegex = regexp.MustCompile(timeRegexStr) +var wattRegex = regexp.MustCompile(wattRegexStr) +var jouleRegex = regexp.MustCompile(jouleRegexStr) +var cyclesRegex = regexp.MustCompile(cyclesRegexStr) +var requestsRegex = regexp.MustCompile(requestsRegexStr) +var packetsRegex = regexp.MustCompile(packetsRegexStr) +var eventsRegex = regexp.MustCompile(eventsRegexStr) + +func NewMeasure(unit string) Measure { + var match []string + match = bytesRegex.FindStringSubmatch(unit) + if match != nil { + return Bytes + } + match = flopsRegex.FindStringSubmatch(unit) + if match != nil { + return Flops + } + match = percentRegex.FindStringSubmatch(unit) + if match != nil { + return Percentage + } + match = degreeCRegex.FindStringSubmatch(unit) + if match != nil { + return TemperatureC + } + match = degreeFRegex.FindStringSubmatch(unit) + if match != nil { + return TemperatureF + } + match = rpmRegex.FindStringSubmatch(unit) + if match != nil { + return Rotation + } + match = hertzRegex.FindStringSubmatch(unit) + if match != nil { + return Hertz + } + match = timeRegex.FindStringSubmatch(unit) + if match != nil { + return Time + } + match = cyclesRegex.FindStringSubmatch(unit) + if match != nil { + return Cycles + } + match = wattRegex.FindStringSubmatch(unit) + if match != nil { + return Watt + } + match = jouleRegex.FindStringSubmatch(unit) + if match != nil { + return Joule + } + match = requestsRegex.FindStringSubmatch(unit) + if match != nil { + return Requests + } + match = packetsRegex.FindStringSubmatch(unit) + if match != nil { + return Packets + } + match = eventsRegex.FindStringSubmatch(unit) + if match != nil { + return Events + } + return None +} diff --git a/ccUnitPrefix.go b/ccUnitPrefix.go new file mode 100644 index 0000000..ae3fe0d --- /dev/null +++ b/ccUnitPrefix.go @@ -0,0 +1,144 @@ +package ccunits + +import "regexp" + +type Prefix float64 + +const ( + Base Prefix = 1 + Exa = 1e18 + Peta = 1e15 + Tera = 1e12 + Giga = 1e9 + Mega = 1e6 + Kilo = 1e3 + Milli = 1e-3 + Micro = 1e-6 + Nano = 1e-9 + Kibi = 1024 + Mebi = 1024 * 1024 + Gibi = 1024 * 1024 * 1024 + Tebi = 1024 * 1024 * 1024 * 1024 +) +const prefixRegexStr = `^([kKmMgGtTpP]?[i]?)(.*)` + +var prefixRegex = regexp.MustCompile(prefixRegexStr) + +func (s *Prefix) String() string { + switch *s { + case Base: + return "" + case Kilo: + return "Kilo" + case Mega: + return "Mega" + case Giga: + return "Giga" + case Tera: + return "Tera" + case Peta: + return "Peta" + case Exa: + return "Exa" + case Milli: + return "Milli" + case Micro: + return "Micro" + case Nano: + return "Nano" + case Kibi: + return "Kibi" + case Mebi: + return "Mebi" + case Gibi: + return "Gibi" + case Tebi: + return "Tebi" + default: + return "Unkn" + } +} + +func (s *Prefix) Prefix() string { + switch *s { + case Base: + return "" + case Kilo: + return "K" + case Mega: + return "M" + case Giga: + return "G" + case Tera: + return "T" + case Peta: + return "P" + case Exa: + return "E" + case Milli: + return "m" + case Micro: + return "u" + case Nano: + return "n" + case Kibi: + return "Ki" + case Mebi: + return "Mi" + case Gibi: + return "Gi" + case Tebi: + return "Ti" + default: + return "" + } +} + +func NewPrefix(prefix string) Prefix { + switch prefix { + case "k": + return Kilo + case "K": + return Kilo + case "m": + return Milli + case "M": + return Mega + case "g": + return Giga + case "G": + return Giga + case "t": + return Tera + case "T": + return Tera + case "p": + return Peta + case "P": + return Peta + case "e": + return Exa + case "E": + return Exa + case "u": + return Micro + case "n": + return Nano + case "ki": + return Kibi + case "Ki": + return Kibi + case "Mi": + return Mebi + case "gi": + return Gibi + case "Gi": + return Gibi + case "Ti": + return Tebi + case "": + return Base + default: + return Base + } +} diff --git a/ccUnits.go b/ccUnits.go new file mode 100644 index 0000000..b339ef3 --- /dev/null +++ b/ccUnits.go @@ -0,0 +1,123 @@ +package ccunits + +import ( + "fmt" + "strings" +) + +type unit struct { + prefix Prefix + measure Measure + divMeasure Measure +} + +type Unit interface { + Valid() bool + String() string + Short() string + AddDivisorUnit(div Measure) + getPrefix() Prefix + getMeasure() Measure + getDivMeasure() Measure +} + +func (u *unit) Valid() bool { + return u.measure != None +} + +func (u *unit) String() string { + if u.divMeasure != None { + return fmt.Sprintf("%s%s/%s", u.prefix.String(), u.measure.String(), u.divMeasure.String()) + } else { + return fmt.Sprintf("%s%s", u.prefix.String(), u.measure.String()) + } +} + +func (u *unit) Short() string { + if u.divMeasure != None { + return fmt.Sprintf("%s%s/%s", u.prefix.Prefix(), u.measure.Short(), u.divMeasure.Short()) + } else { + return fmt.Sprintf("%s%s", u.prefix.Prefix(), u.measure.Short()) + } +} + +func (u *unit) AddDivisorUnit(div Measure) { + u.divMeasure = div +} + +func (u *unit) getPrefix() Prefix { + return u.prefix +} + +func (u *unit) getMeasure() Measure { + return u.measure +} + +func (u *unit) getDivMeasure() Measure { + return u.divMeasure +} + +func GetPrefixFactor(in Prefix, out Prefix) func(value float64) float64 { + var factor = 1.0 + var in_prefix = float64(in) + var out_prefix = float64(out) + factor = in_prefix / out_prefix + return func(value float64) float64 { return factor } +} + +func GetUnitPrefixFactor(in Unit, out Unit) (func(value float64) float64, error) { + if in.getMeasure() == TemperatureC && out.getMeasure() == TemperatureF { + return func(value float64) float64 { return (value * 1.8) + 32 }, nil + } else if in.getMeasure() == TemperatureF && out.getMeasure() == TemperatureC { + return func(value float64) float64 { return (value - 32) / 1.8 }, nil + } else if in.getMeasure() != out.getMeasure() || in.getDivMeasure() != out.getDivMeasure() { + return func(value float64) float64 { return 1.0 }, fmt.Errorf("invalid measures in in and out Unit") + } + return GetPrefixFactor(in.getPrefix(), out.getPrefix()), nil +} + +func NewUnit(unitStr string) Unit { + u := &unit{ + prefix: Base, + measure: None, + divMeasure: None, + } + matches := prefixRegex.FindStringSubmatch(unitStr) + if len(matches) > 2 { + pre := NewPrefix(matches[1]) + measures := strings.Split(matches[2], "/") + m := NewMeasure(measures[0]) + // Special case for prefix 'p' or 'P' (Peta) and measures starting with 'p' or 'P' + // like 'packets' or 'percent'. Same for 'e' or 'E' (Exa) for measures starting with + // 'e' or 'E' like 'events' + if m == None { + switch pre { + case Peta, Exa: + t := NewMeasure(matches[1] + measures[0]) + if t != None { + m = t + pre = Base + } + } + } + div := None + if len(measures) > 1 { + div = NewMeasure(measures[1]) + } + + switch m { + // Special case for 'm' as prefix for Bytes and some others as thers is no unit like MilliBytes + case Bytes, Flops, Packets, Events, Cycles, Requests: + if pre == Milli { + pre = Mega + } + // Special case for percentage. No/ignore prefix + case Percentage: + pre = Base + } + u.prefix = pre + u.measure = m + u.divMeasure = div + } + return u +} diff --git a/ccUnits_test.go b/ccUnits_test.go new file mode 100644 index 0000000..40ee967 --- /dev/null +++ b/ccUnits_test.go @@ -0,0 +1,114 @@ +package ccunits + +import ( + "fmt" + "testing" +) + +func TestUnitsExact(t *testing.T) { + testCases := []struct { + in string + want Unit + }{ + {"b", NewUnit("Bytes")}, + {"B", NewUnit("Bytes")}, + {"byte", NewUnit("Bytes")}, + {"bytes", NewUnit("Bytes")}, + {"BYtes", NewUnit("Bytes")}, + {"Mb", NewUnit("MBytes")}, + {"MB", NewUnit("MBytes")}, + {"Mbyte", NewUnit("MBytes")}, + {"Mbytes", NewUnit("MBytes")}, + {"MbYtes", NewUnit("MBytes")}, + {"Gb", NewUnit("GBytes")}, + {"GB", NewUnit("GBytes")}, + {"Hz", NewUnit("Hertz")}, + {"MHz", NewUnit("MHertz")}, + {"GHertz", NewUnit("GHertz")}, + {"pkts", NewUnit("Packets")}, + {"packets", NewUnit("Packets")}, + {"packet", NewUnit("Packets")}, + {"flop", NewUnit("Flops")}, + {"flops", NewUnit("Flops")}, + {"floPS", NewUnit("Flops")}, + {"Mflop", NewUnit("MFlops")}, + {"Gflop", NewUnit("GFlops")}, + {"gflop", NewUnit("GFlops")}, + {"%", NewUnit("Percent")}, + {"percent", NewUnit("Percent")}, + {"degc", NewUnit("degC")}, + {"degC", NewUnit("degC")}, + {"degf", NewUnit("degF")}, + {"°f", NewUnit("degF")}, + {"events", NewUnit("events")}, + {"event", NewUnit("events")}, + {"EveNts", NewUnit("events")}, + {"reqs", NewUnit("requests")}, + {"requests", NewUnit("requests")}, + {"Requests", NewUnit("requests")}, + {"cyc", NewUnit("cycles")}, + {"cy", NewUnit("cycles")}, + {"Cycles", NewUnit("cycles")}, + {"J", NewUnit("Joules")}, + {"Joule", NewUnit("Joules")}, + {"joule", NewUnit("Joules")}, + {"W", NewUnit("Watt")}, + {"Watts", NewUnit("Watt")}, + {"watt", NewUnit("Watt")}, + {"s", NewUnit("seconds")}, + {"sec", NewUnit("seconds")}, + {"secs", NewUnit("seconds")}, + {"RPM", NewUnit("rpm")}, + {"rPm", NewUnit("rpm")}, + {"watt/byte", NewUnit("W/B")}, + {"watts/bytes", NewUnit("W/B")}, + {"flop/byte", NewUnit("flops/Bytes")}, + {"F/B", NewUnit("flops/Bytes")}, + } + compareUnitExact := func(in, out Unit) bool { + if in.getMeasure() == out.getMeasure() && in.getDivMeasure() == out.getDivMeasure() && in.getPrefix() == out.getPrefix() { + return true + } + return false + } + for _, c := range testCases { + u := NewUnit(c.in) + if (!u.Valid()) || (!compareUnitExact(u, c.want)) { + t.Errorf("func NewUnit(%q) == %q, want %q", c.in, u.String(), c.want.String()) + } + } +} + +func TestUnitsDifferentPrefix(t *testing.T) { + testCases := []struct { + in string + want Unit + prefixFactor float64 + }{ + {"kb", NewUnit("Bytes"), 1000}, + {"Mb", NewUnit("Bytes"), 1000000}, + {"Mb/s", NewUnit("Bytes/s"), 1000000}, + {"Flops/s", NewUnit("MFlops/s"), 1e-6}, + {"Flops/s", NewUnit("GFlops/s"), 1e-9}, + {"MHz", NewUnit("Hertz"), 1e6}, + {"kb", NewUnit("Kib"), 1000.0 / 1024}, + {"Mib", NewUnit("MBytes"), (1024 * 1024.0) / (1e6)}, + {"mb", NewUnit("MBytes"), 1.0}, + } + compareUnitWithPrefix := func(in, out Unit, factor float64) bool { + if in.getMeasure() == out.getMeasure() && in.getDivMeasure() == out.getDivMeasure() { + if f := GetPrefixFactor(in.getPrefix(), out.getPrefix()); f(1.0) == factor { + return true + } else { + fmt.Println(f(1.0)) + } + } + return false + } + for _, c := range testCases { + u := NewUnit(c.in) + if (!u.Valid()) || (!compareUnitWithPrefix(u, c.want, c.prefixFactor)) { + t.Errorf("func NewUnit(%q) == %q, want %q with factor %f", c.in, u.String(), c.want.String(), c.prefixFactor) + } + } +}