slurm-application-detection…/xgb_local_example.py

#!/usr/bin/env python3
"""
XGBoost Multi-Label Inference Usage Examples
===================================================

This script demonstrates how to use the XGBoostMultiLabelPredictor class
for multi-label classification with confidence scores.

Sample data is from real HPC workloads extracted from roofline_features.h5:
- TurTLE: Turbulence simulation (memory-bound, low arithmetic intensity ~0.84)
- SCALEXA: Scaling benchmarks (high bw-flops correlation ~0.995)
- Chroma: Lattice QCD (compute-intensive, high arithmetic intensity ~2.6)
"""

import json
from xgb_local import XGBoostMultiLabelPredictor

# ============================================================================
# Realistic Sample Data from roofline_features.h5
# ============================================================================

# TurTLE application - turbulence simulation workload
SAMPLE_TURTLE = {
    "bandwidth_raw_p10": 186.33,
    "bandwidth_raw_median": 205.14,
    "bandwidth_raw_p90": 210.83,
    "bandwidth_raw_mad": 3.57,
    "bandwidth_raw_range": 24.5,
    "bandwidth_raw_iqr": 12.075,
    "flops_raw_p10": 162.024,
    "flops_raw_median": 171.45,
    "flops_raw_p90": 176.48,
    "flops_raw_mad": 3.08,
    "flops_raw_range": 14.456,
    "flops_raw_iqr": 8.29,
    "arith_intensity_p10": 0.7906,
    "arith_intensity_median": 0.837,
    "arith_intensity_p90": 0.9109,
    "arith_intensity_mad": 0.02,
    "arith_intensity_range": 0.12,
    "arith_intensity_iqr": 0.0425,
    "bw_flops_covariance": 60.86,
    "bw_flops_correlation": 0.16,
    "avg_performance_gflops": 168.1,
    "median_performance_gflops": 171.45,
    "performance_gflops_mad": 3.08,
    "avg_memory_bw_gbs": 350.0,
    "scalar_peak_gflops": 432.0,
    "simd_peak_gflops": 9216.0,
    "node_num": 0,
    "duration": 19366,
}

# SCALEXA application - scaling benchmark workload
SAMPLE_SCALEXA = {
    "bandwidth_raw_p10": 13.474,
    "bandwidth_raw_median": 32.57,
    "bandwidth_raw_p90": 51.466,
    "bandwidth_raw_mad": 23.62,
    "bandwidth_raw_range": 37.992,
    "bandwidth_raw_iqr": 23.745,
    "flops_raw_p10": 4.24,
    "flops_raw_median": 16.16,
    "flops_raw_p90": 24.584,
    "flops_raw_mad": 10.53,
    "flops_raw_range": 20.344,
    "flops_raw_iqr": 12.715,
    "arith_intensity_p10": 0.211,
    "arith_intensity_median": 0.475,
    "arith_intensity_p90": 0.492,
    "arith_intensity_mad": 0.021,
    "arith_intensity_range": 0.281,
    "arith_intensity_iqr": 0.176,
    "bw_flops_covariance": 302.0,
    "bw_flops_correlation": 0.995,
    "avg_performance_gflops": 14.7,
    "median_performance_gflops": 16.16,
    "performance_gflops_mad": 10.53,
    "avg_memory_bw_gbs": 350.0,
    "scalar_peak_gflops": 432.0,
    "simd_peak_gflops": 9216.0,
    "node_num": 18,
    "duration": 165,
}

# Chroma application - lattice QCD workload (compute-intensive)
SAMPLE_CHROMA = {
    "bandwidth_raw_p10": 154.176,
    "bandwidth_raw_median": 200.57,
    "bandwidth_raw_p90": 259.952,
    "bandwidth_raw_mad": 5.12,
    "bandwidth_raw_range": 105.776,
    "bandwidth_raw_iqr": 10.215,
    "flops_raw_p10": 327.966,
    "flops_raw_median": 519.8,
    "flops_raw_p90": 654.422,
    "flops_raw_mad": 16.97,
    "flops_raw_range": 326.456,
    "flops_raw_iqr": 34.88,
    "arith_intensity_p10": 1.55,
    "arith_intensity_median": 2.595,
    "arith_intensity_p90": 3.445,
    "arith_intensity_mad": 0.254,
    "arith_intensity_range": 1.894,
    "arith_intensity_iqr": 0.512,
    "bw_flops_covariance": 382.76,
    "bw_flops_correlation": 0.063,
    "avg_performance_gflops": 503.26,
    "median_performance_gflops": 519.8,
    "performance_gflops_mad": 16.97,
    "avg_memory_bw_gbs": 350.0,
    "scalar_peak_gflops": 432.0,
    "simd_peak_gflops": 9216.0,
    "node_num": 3,
    "duration": 31133,
}

# Raw JSON roofline data (before aggregation, as produced by monitoring)
SAMPLE_JSON_ROOFLINE = json.dumps([
    {"node_num": 1, "bandwidth_raw": 150.5, "flops_raw": 2500.0, "arith_intensity": 16.6,
     "performance_gflops": 1200.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
    {"node_num": 1, "bandwidth_raw": 155.2, "flops_raw": 2600.0, "arith_intensity": 16.8,
     "performance_gflops": 1250.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
    {"node_num": 1, "bandwidth_raw": 148.0, "flops_raw": 2450.0, "arith_intensity": 16.5,
     "performance_gflops": 1180.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
])


def main():
    print("XGBoost Multi-Label Inference Examples")
    print("=" * 50)

    # Initialize the predictor
    predictor = XGBoostMultiLabelPredictor()

    # =========================================================================
    # Example 1: Single prediction with aggregated features
    # =========================================================================
    print("\n=== Example 1: Single Prediction (TurTLE workload) ===")

    result = predictor.predict(SAMPLE_TURTLE, threshold=0.3)

    print(f"Predictions: {result['predictions']}")
    print(f"Confidences: {result['confidences']}")
    print("\nTop 5 probabilities:")
    sorted_probs = sorted(result['probabilities'].items(), key=lambda x: x[1], reverse=True)
    for cls, prob in sorted_probs[:5]:
        print(f"  {cls}: {prob:.4f}")

    # =========================================================================
    # Example 2: Compare different workload types
    # =========================================================================
    print("\n=== Example 2: Compare Different Workloads ===")

    workloads = [
        ("TurTLE (turbulence)", SAMPLE_TURTLE),
        ("SCALEXA (benchmark)", SAMPLE_SCALEXA),
        ("Chroma (lattice QCD)", SAMPLE_CHROMA),
    ]

    for name, features in workloads:
        result = predictor.predict(features, threshold=0.3)
        top_pred = result['predictions'][0] if result['predictions'] else "None"
        top_prob = max(result['probabilities'].values()) if result['probabilities'] else 0
        print(f"{name:25} -> Top prediction: {top_pred:20} (prob: {top_prob:.4f})")

    # =========================================================================
    # Example 3: Top-K predictions
    # =========================================================================
    print("\n=== Example 3: Top-5 Predictions (Chroma workload) ===")

    top_k_result = predictor.predict_top_k(SAMPLE_CHROMA, k=5)

    for i, cls in enumerate(top_k_result['top_predictions'], 1):
        prob = top_k_result['top_probabilities'][cls]
        print(f"  {i}. {cls}: {prob:.4f}")

    # =========================================================================
    # Example 4: Batch prediction
    # =========================================================================
    print("\n=== Example 4: Batch Prediction ===")

    batch_data = [SAMPLE_TURTLE, SAMPLE_SCALEXA, SAMPLE_CHROMA]
    batch_results = predictor.batch_predict(batch_data, threshold=0.3)

    for i, result in enumerate(batch_results, 1):
        if 'error' not in result:
            preds = result['predictions'][:2]  # Show top 2
            print(f"  Sample {i}: {preds}")
        else:
            print(f"  Sample {i}: Error - {result['error']}")

    # =========================================================================
    # Example 5: Prediction from raw JSON roofline data
    # =========================================================================
    print("\n=== Example 5: Prediction from Raw JSON Data ===")

    result = predictor.predict(
        SAMPLE_JSON_ROOFLINE,
        is_json=True,
        job_id="example_job_001",
        threshold=0.3
    )

    print(f"Predictions: {result['predictions'][:3]}")
    print(f"(Aggregated from {len(json.loads(SAMPLE_JSON_ROOFLINE))} roofline samples)")

    # =========================================================================
    # Example 6: Model information
    # =========================================================================
    print("\n=== Example 6: Model Information ===")

    info = predictor.get_class_info()
    print(f"Number of classes: {info['n_classes']}")
    print(f"Number of features: {info['n_features']}")
    print(f"Sample classes: {info['classes'][:5]}...")
    print(f"Sample features: {info['feature_columns'][:3]}...")


if __name__ == "__main__":
    main()