mirror of
				https://github.com/ClusterCockpit/cc-metric-collector.git
				synced 2025-10-31 00:55:06 +01:00 
			
		
		
		
	Fixed topology detection
This commit is contained in:
		| @@ -32,10 +32,19 @@ func readOneLine(filename string) (text string, ok bool) { | |||||||
| 	return | 	return | ||||||
| } | } | ||||||
|  |  | ||||||
| type CPUFreqCollectorCPU struct { | type CPUFreqCollectorTopology struct { | ||||||
| 	// coreID, packageID, num_cores, num_package | 	processor          string // logical processor number (continuous, starting at 0) | ||||||
| 	tagSet             map[string]string | 	coreID             string // socket local core ID | ||||||
|  | 	coreID_int         int | ||||||
|  | 	physicalID         string // socket / package ID | ||||||
|  | 	physicalID_int     int | ||||||
|  | 	numPhysicalID      string // number of  sockets / packages | ||||||
|  | 	numPhysicalID_int  int | ||||||
|  | 	isHT               bool | ||||||
|  | 	numNonHT           string // number of non hyperthreading processors | ||||||
|  | 	numNonHT_int       int | ||||||
| 	scalingCurFreqFile string | 	scalingCurFreqFile string | ||||||
|  | 	tagSet             map[string]string | ||||||
| } | } | ||||||
|  |  | ||||||
| // | // | ||||||
| @@ -48,10 +57,10 @@ type CPUFreqCollectorCPU struct { | |||||||
| // | // | ||||||
| type CPUFreqCollector struct { | type CPUFreqCollector struct { | ||||||
| 	MetricCollector | 	MetricCollector | ||||||
| 	config struct { | 	topology []CPUFreqCollectorTopology | ||||||
|  | 	config   struct { | ||||||
| 		ExcludeMetrics []string `json:"exclude_metrics,omitempty"` | 		ExcludeMetrics []string `json:"exclude_metrics,omitempty"` | ||||||
| 	} | 	} | ||||||
| 	cpus []CPUFreqCollectorCPU |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func (m *CPUFreqCollector) Init(config []byte) error { | func (m *CPUFreqCollector) Init(config []byte) error { | ||||||
| @@ -64,9 +73,6 @@ func (m *CPUFreqCollector) Init(config []byte) error { | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Initialize CPU list |  | ||||||
| 	m.cpus = make([]CPUFreqCollectorCPU, 0) |  | ||||||
|  |  | ||||||
| 	// Loop for all CPU directories | 	// Loop for all CPU directories | ||||||
| 	baseDir := "/sys/devices/system/cpu" | 	baseDir := "/sys/devices/system/cpu" | ||||||
| 	globPattern := filepath.Join(baseDir, "cpu[0-9]*") | 	globPattern := filepath.Join(baseDir, "cpu[0-9]*") | ||||||
| @@ -78,82 +84,98 @@ func (m *CPUFreqCollector) Init(config []byte) error { | |||||||
| 		return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) | 		return fmt.Errorf("CPUFreqCollector.Init() unable to find any files with pattern %s", globPattern) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	maxPackageID := 0 | 	// Initialize CPU topology | ||||||
| 	maxCoreID := 0 | 	m.topology = make([]CPUFreqCollectorTopology, len(cpuDirs)) | ||||||
| 	for _, cpuDir := range cpuDirs { | 	for _, cpuDir := range cpuDirs { | ||||||
| 		cpuID := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") | 		processor := strings.TrimPrefix(cpuDir, "/sys/devices/system/cpu/cpu") | ||||||
|  | 		processor_int, err := strconv.Atoi(processor) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return fmt.Errorf("CPUFreqCollector.Init() unable to convert cpuID to int: %v", err) | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		// Read thread sibling list | 		// Read package ID | ||||||
| 		threadSiblingListFile := filepath.Join(cpuDir, "topology", "thread_siblings_list") | 		packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") | ||||||
| 		threadSiblingList, ok := readOneLine(threadSiblingListFile) | 		packageID, ok := readOneLine(packageIDFile) | ||||||
| 		if !ok { | 		if !ok { | ||||||
| 			return fmt.Errorf("CPUFreqCollector.Init() unable to read thread siblings list from %s", threadSiblingListFile) | 			return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) | ||||||
|  | 		} | ||||||
|  | 		packageID_int, err := strconv.Atoi(packageID) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// Read frequency only from first hardware thread | 		// Read core ID | ||||||
| 		// Ignore Simultaneous Multithreading (SMT) / Hyper-Threading | 		coreIDFile := filepath.Join(cpuDir, "topology", "core_id") | ||||||
| 		if strings.Split(threadSiblingList, ",")[0] == cpuID { | 		coreID, ok := readOneLine(coreIDFile) | ||||||
| 			// Read package ID | 		if !ok { | ||||||
| 			packageIDFile := filepath.Join(cpuDir, "topology", "physical_package_id") | 			return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) | ||||||
| 			packageID, ok := readOneLine(packageIDFile) | 		} | ||||||
| 			if !ok { | 		coreID_int, err := strconv.Atoi(coreID) | ||||||
| 				return fmt.Errorf("CPUFreqCollector.Init() unable to read physical package ID from %s", packageIDFile) | 		if err != nil { | ||||||
| 			} | 			return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) | ||||||
| 			packageID_int, err := strconv.Atoi(packageID) | 		} | ||||||
| 			if err != nil { |  | ||||||
| 				return fmt.Errorf("CPUFreqCollector.Init() unable to convert packageID to int: %v", err) |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			// Update maxPackageID | 		// Check access to current frequency file | ||||||
| 			if packageID_int > maxPackageID { | 		scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") | ||||||
| 				maxPackageID = packageID_int | 		err = unix.Access(scalingCurFreqFile, unix.R_OK) | ||||||
| 			} | 		if err != nil { | ||||||
|  | 			return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 			// Read core ID | 		t := &m.topology[processor_int] | ||||||
| 			coreIDFile := filepath.Join(cpuDir, "topology", "core_id") | 		t.processor = processor | ||||||
| 			coreID, ok := readOneLine(coreIDFile) | 		t.physicalID = packageID | ||||||
| 			if !ok { | 		t.physicalID_int = packageID_int | ||||||
| 				return fmt.Errorf("CPUFreqCollector.Init() unable to read core ID from %s", coreIDFile) | 		t.coreID = coreID | ||||||
| 			} | 		t.coreID_int = coreID_int | ||||||
| 			coreID_int, err := strconv.Atoi(coreID) | 		t.scalingCurFreqFile = scalingCurFreqFile | ||||||
| 			if err != nil { | 	} | ||||||
| 				return fmt.Errorf("CPUFreqCollector.Init() unable to convert coreID to int: %v", err) |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			// Update maxCoreID | 	// is processor a hyperthread? | ||||||
| 			if coreID_int > maxCoreID { | 	coreSeenBefore := make(map[string]bool) | ||||||
| 				maxCoreID = coreID_int | 	for i := range m.topology { | ||||||
| 			} | 		t := &m.topology[i] | ||||||
|  |  | ||||||
| 			// Check access to current frequency file | 		globalID := t.physicalID + ":" + t.coreID | ||||||
| 			scalingCurFreqFile := filepath.Join(cpuDir, "cpufreq", "scaling_cur_freq") | 		t.isHT = coreSeenBefore[globalID] | ||||||
| 			err = unix.Access(scalingCurFreqFile, unix.R_OK) | 		coreSeenBefore[globalID] = true | ||||||
| 			if err != nil { | 	} | ||||||
| 				return fmt.Errorf("CPUFreqCollector.Init() unable to access %s: %v", scalingCurFreqFile, err) |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			m.cpus = append( | 	// number of non hyper thread cores and packages / sockets | ||||||
| 				m.cpus, | 	numNonHT_int := 0 | ||||||
| 				CPUFreqCollectorCPU{ | 	maxPhysicalID := 0 | ||||||
| 					tagSet: map[string]string{ | 	for i := range m.topology { | ||||||
| 						"type":      "cpu", | 		t := &m.topology[i] | ||||||
| 						"type-id":   strings.TrimSpace(coreID), |  | ||||||
| 						"packageID": strings.TrimSpace(packageID), | 		// Update maxPackageID | ||||||
| 					}, | 		if t.physicalID_int > maxPhysicalID { | ||||||
| 					scalingCurFreqFile: scalingCurFreqFile, | 			maxPhysicalID = t.physicalID_int | ||||||
| 				}) | 		} | ||||||
|  |  | ||||||
|  | 		if !t.isHT { | ||||||
|  | 			numNonHT_int++ | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Add num packages and num cores as tags | 	numPhysicalID_int := maxPhysicalID + 1 | ||||||
| 	numPackages := strconv.Itoa(maxPackageID + 1) | 	numPhysicalID := fmt.Sprint(numPhysicalID_int) | ||||||
| 	numCores := strconv.Itoa(maxCoreID + 1) | 	numNonHT := fmt.Sprint(numNonHT_int) | ||||||
| 	for i := range m.cpus { | 	for i := range m.topology { | ||||||
| 		c := &m.cpus[i] | 		t := &m.topology[i] | ||||||
| 		c.tagSet["num_core"] = numCores | 		t.numPhysicalID = numPhysicalID | ||||||
| 		c.tagSet["num_package"] = numPackages | 		t.numPhysicalID_int = numPhysicalID_int | ||||||
|  | 		t.numNonHT = numNonHT | ||||||
|  | 		t.numNonHT_int = numNonHT_int | ||||||
|  | 		t.tagSet = map[string]string{ | ||||||
|  | 			"type":        "cpu", | ||||||
|  | 			"type-id":     t.processor, | ||||||
|  | 			"num_core":    t.numNonHT, | ||||||
|  | 			"package_id":  t.physicalID, | ||||||
|  | 			"num_package": t.numPhysicalID, | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	fmt.Printf("%+v\n", m.topology) | ||||||
| 	m.init = true | 	m.init = true | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
| @@ -164,13 +186,18 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	now := time.Now() | 	now := time.Now() | ||||||
| 	for i := range m.cpus { | 	for i := range m.topology { | ||||||
| 		cpu := &m.cpus[i] | 		t := &m.topology[i] | ||||||
|  |  | ||||||
|  | 		// skip hyperthreads | ||||||
|  | 		if t.isHT { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		// Read current frequency | 		// Read current frequency | ||||||
| 		line, ok := readOneLine(cpu.scalingCurFreqFile) | 		line, ok := readOneLine(t.scalingCurFreqFile) | ||||||
| 		if !ok { | 		if !ok { | ||||||
| 			log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", cpu.scalingCurFreqFile) | 			log.Printf("CPUFreqCollector.Read(): Failed to read one line from file '%s'", t.scalingCurFreqFile) | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
| 		cpuFreq, err := strconv.Atoi(line) | 		cpuFreq, err := strconv.Atoi(line) | ||||||
| @@ -179,7 +206,7 @@ func (m *CPUFreqCollector) Read(interval time.Duration, out *[]lp.MutableMetric) | |||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		y, err := lp.New("cpufreq", cpu.tagSet, map[string]interface{}{"value": cpuFreq}, now) | 		y, err := lp.New("cpufreq", t.tagSet, map[string]interface{}{"value": cpuFreq}, now) | ||||||
| 		if err == nil { | 		if err == nil { | ||||||
| 			*out = append(*out, y) | 			*out = append(*out, y) | ||||||
| 		} | 		} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user