Initial commit

2025-12-10 12:17:41 +01:00
commit 739563f916
12 changed files with 3428 additions and 0 deletions
--- a/xgb_local_example.py
+++ b/xgb_local_example.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+XGBoost Multi-Label Inference Usage Examples
+===================================================
+
+This script demonstrates how to use the XGBoostMultiLabelPredictor class
+for multi-label classification with confidence scores.
+
+Sample data is from real HPC workloads extracted from roofline_features.h5:
+- TurTLE: Turbulence simulation (memory-bound, low arithmetic intensity ~0.84)
+- SCALEXA: Scaling benchmarks (high bw-flops correlation ~0.995)
+- Chroma: Lattice QCD (compute-intensive, high arithmetic intensity ~2.6)
+"""
+
+import json
+from xgb_local import XGBoostMultiLabelPredictor
+
+# ============================================================================
+# Realistic Sample Data from roofline_features.h5
+# ============================================================================
+
+# TurTLE application - turbulence simulation workload
+SAMPLE_TURTLE = {
+    "bandwidth_raw_p10": 186.33,
+    "bandwidth_raw_median": 205.14,
+    "bandwidth_raw_p90": 210.83,
+    "bandwidth_raw_mad": 3.57,
+    "bandwidth_raw_range": 24.5,
+    "bandwidth_raw_iqr": 12.075,
+    "flops_raw_p10": 162.024,
+    "flops_raw_median": 171.45,
+    "flops_raw_p90": 176.48,
+    "flops_raw_mad": 3.08,
+    "flops_raw_range": 14.456,
+    "flops_raw_iqr": 8.29,
+    "arith_intensity_p10": 0.7906,
+    "arith_intensity_median": 0.837,
+    "arith_intensity_p90": 0.9109,
+    "arith_intensity_mad": 0.02,
+    "arith_intensity_range": 0.12,
+    "arith_intensity_iqr": 0.0425,
+    "bw_flops_covariance": 60.86,
+    "bw_flops_correlation": 0.16,
+    "avg_performance_gflops": 168.1,
+    "median_performance_gflops": 171.45,
+    "performance_gflops_mad": 3.08,
+    "avg_memory_bw_gbs": 350.0,
+    "scalar_peak_gflops": 432.0,
+    "simd_peak_gflops": 9216.0,
+    "node_num": 0,
+    "duration": 19366,
+}
+
+# SCALEXA application - scaling benchmark workload
+SAMPLE_SCALEXA = {
+    "bandwidth_raw_p10": 13.474,
+    "bandwidth_raw_median": 32.57,
+    "bandwidth_raw_p90": 51.466,
+    "bandwidth_raw_mad": 23.62,
+    "bandwidth_raw_range": 37.992,
+    "bandwidth_raw_iqr": 23.745,
+    "flops_raw_p10": 4.24,
+    "flops_raw_median": 16.16,
+    "flops_raw_p90": 24.584,
+    "flops_raw_mad": 10.53,
+    "flops_raw_range": 20.344,
+    "flops_raw_iqr": 12.715,
+    "arith_intensity_p10": 0.211,
+    "arith_intensity_median": 0.475,
+    "arith_intensity_p90": 0.492,
+    "arith_intensity_mad": 0.021,
+    "arith_intensity_range": 0.281,
+    "arith_intensity_iqr": 0.176,
+    "bw_flops_covariance": 302.0,
+    "bw_flops_correlation": 0.995,
+    "avg_performance_gflops": 14.7,
+    "median_performance_gflops": 16.16,
+    "performance_gflops_mad": 10.53,
+    "avg_memory_bw_gbs": 350.0,
+    "scalar_peak_gflops": 432.0,
+    "simd_peak_gflops": 9216.0,
+    "node_num": 18,
+    "duration": 165,
+}
+
+# Chroma application - lattice QCD workload (compute-intensive)
+SAMPLE_CHROMA = {
+    "bandwidth_raw_p10": 154.176,
+    "bandwidth_raw_median": 200.57,
+    "bandwidth_raw_p90": 259.952,
+    "bandwidth_raw_mad": 5.12,
+    "bandwidth_raw_range": 105.776,
+    "bandwidth_raw_iqr": 10.215,
+    "flops_raw_p10": 327.966,
+    "flops_raw_median": 519.8,
+    "flops_raw_p90": 654.422,
+    "flops_raw_mad": 16.97,
+    "flops_raw_range": 326.456,
+    "flops_raw_iqr": 34.88,
+    "arith_intensity_p10": 1.55,
+    "arith_intensity_median": 2.595,
+    "arith_intensity_p90": 3.445,
+    "arith_intensity_mad": 0.254,
+    "arith_intensity_range": 1.894,
+    "arith_intensity_iqr": 0.512,
+    "bw_flops_covariance": 382.76,
+    "bw_flops_correlation": 0.063,
+    "avg_performance_gflops": 503.26,
+    "median_performance_gflops": 519.8,
+    "performance_gflops_mad": 16.97,
+    "avg_memory_bw_gbs": 350.0,
+    "scalar_peak_gflops": 432.0,
+    "simd_peak_gflops": 9216.0,
+    "node_num": 3,
+    "duration": 31133,
+}
+
+# Raw JSON roofline data (before aggregation, as produced by monitoring)
+SAMPLE_JSON_ROOFLINE = json.dumps([
+    {"node_num": 1, "bandwidth_raw": 150.5, "flops_raw": 2500.0, "arith_intensity": 16.6,
+     "performance_gflops": 1200.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
+    {"node_num": 1, "bandwidth_raw": 155.2, "flops_raw": 2600.0, "arith_intensity": 16.8,
+     "performance_gflops": 1250.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
+    {"node_num": 1, "bandwidth_raw": 148.0, "flops_raw": 2450.0, "arith_intensity": 16.5,
+     "performance_gflops": 1180.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
+])
+
+
+def main():
+    print("XGBoost Multi-Label Inference Examples")
+    print("=" * 50)
+    
+    # Initialize the predictor
+    predictor = XGBoostMultiLabelPredictor()
+
+    # =========================================================================
+    # Example 1: Single prediction with aggregated features
+    # =========================================================================
+    print("\n=== Example 1: Single Prediction (TurTLE workload) ===")
+    
+    result = predictor.predict(SAMPLE_TURTLE, threshold=0.3)
+    
+    print(f"Predictions: {result['predictions']}")
+    print(f"Confidences: {result['confidences']}")
+    print("\nTop 5 probabilities:")
+    sorted_probs = sorted(result['probabilities'].items(), key=lambda x: x[1], reverse=True)
+    for cls, prob in sorted_probs[:5]:
+        print(f"  {cls}: {prob:.4f}")
+
+    # =========================================================================
+    # Example 2: Compare different workload types
+    # =========================================================================
+    print("\n=== Example 2: Compare Different Workloads ===")
+    
+    workloads = [
+        ("TurTLE (turbulence)", SAMPLE_TURTLE),
+        ("SCALEXA (benchmark)", SAMPLE_SCALEXA),
+        ("Chroma (lattice QCD)", SAMPLE_CHROMA),
+    ]
+    
+    for name, features in workloads:
+        result = predictor.predict(features, threshold=0.3)
+        top_pred = result['predictions'][0] if result['predictions'] else "None"
+        top_prob = max(result['probabilities'].values()) if result['probabilities'] else 0
+        print(f"{name:25} -> Top prediction: {top_pred:20} (prob: {top_prob:.4f})")
+
+    # =========================================================================
+    # Example 3: Top-K predictions
+    # =========================================================================
+    print("\n=== Example 3: Top-5 Predictions (Chroma workload) ===")
+    
+    top_k_result = predictor.predict_top_k(SAMPLE_CHROMA, k=5)
+    
+    for i, cls in enumerate(top_k_result['top_predictions'], 1):
+        prob = top_k_result['top_probabilities'][cls]
+        print(f"  {i}. {cls}: {prob:.4f}")
+
+    # =========================================================================
+    # Example 4: Batch prediction
+    # =========================================================================
+    print("\n=== Example 4: Batch Prediction ===")
+    
+    batch_data = [SAMPLE_TURTLE, SAMPLE_SCALEXA, SAMPLE_CHROMA]
+    batch_results = predictor.batch_predict(batch_data, threshold=0.3)
+    
+    for i, result in enumerate(batch_results, 1):
+        if 'error' not in result:
+            preds = result['predictions'][:2]  # Show top 2
+            print(f"  Sample {i}: {preds}")
+        else:
+            print(f"  Sample {i}: Error - {result['error']}")
+
+    # =========================================================================
+    # Example 5: Prediction from raw JSON roofline data
+    # =========================================================================
+    print("\n=== Example 5: Prediction from Raw JSON Data ===")
+    
+    result = predictor.predict(
+        SAMPLE_JSON_ROOFLINE,
+        is_json=True,
+        job_id="example_job_001",
+        threshold=0.3
+    )
+    
+    print(f"Predictions: {result['predictions'][:3]}")
+    print(f"(Aggregated from {len(json.loads(SAMPLE_JSON_ROOFLINE))} roofline samples)")
+
+    # =========================================================================
+    # Example 6: Model information
+    # =========================================================================
+    print("\n=== Example 6: Model Information ===")
+    
+    info = predictor.get_class_info()
+    print(f"Number of classes: {info['n_classes']}")
+    print(f"Number of features: {info['n_features']}")
+    print(f"Sample classes: {info['classes'][:5]}...")
+    print(f"Sample features: {info['feature_columns'][:3]}...")
+
+
+if __name__ == "__main__":
+    main()