Initial commit
This commit is contained in:
221
xgb_local_example.py
Normal file
221
xgb_local_example.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
XGBoost Multi-Label Inference Usage Examples
|
||||
===================================================
|
||||
|
||||
This script demonstrates how to use the XGBoostMultiLabelPredictor class
|
||||
for multi-label classification with confidence scores.
|
||||
|
||||
Sample data is from real HPC workloads extracted from roofline_features.h5:
|
||||
- TurTLE: Turbulence simulation (memory-bound, low arithmetic intensity ~0.84)
|
||||
- SCALEXA: Scaling benchmarks (high bw-flops correlation ~0.995)
|
||||
- Chroma: Lattice QCD (compute-intensive, high arithmetic intensity ~2.6)
|
||||
"""
|
||||
|
||||
import json
|
||||
from xgb_local import XGBoostMultiLabelPredictor
|
||||
|
||||
# ============================================================================
|
||||
# Realistic Sample Data from roofline_features.h5
|
||||
# ============================================================================
|
||||
|
||||
# TurTLE application - turbulence simulation workload
|
||||
SAMPLE_TURTLE = {
|
||||
"bandwidth_raw_p10": 186.33,
|
||||
"bandwidth_raw_median": 205.14,
|
||||
"bandwidth_raw_p90": 210.83,
|
||||
"bandwidth_raw_mad": 3.57,
|
||||
"bandwidth_raw_range": 24.5,
|
||||
"bandwidth_raw_iqr": 12.075,
|
||||
"flops_raw_p10": 162.024,
|
||||
"flops_raw_median": 171.45,
|
||||
"flops_raw_p90": 176.48,
|
||||
"flops_raw_mad": 3.08,
|
||||
"flops_raw_range": 14.456,
|
||||
"flops_raw_iqr": 8.29,
|
||||
"arith_intensity_p10": 0.7906,
|
||||
"arith_intensity_median": 0.837,
|
||||
"arith_intensity_p90": 0.9109,
|
||||
"arith_intensity_mad": 0.02,
|
||||
"arith_intensity_range": 0.12,
|
||||
"arith_intensity_iqr": 0.0425,
|
||||
"bw_flops_covariance": 60.86,
|
||||
"bw_flops_correlation": 0.16,
|
||||
"avg_performance_gflops": 168.1,
|
||||
"median_performance_gflops": 171.45,
|
||||
"performance_gflops_mad": 3.08,
|
||||
"avg_memory_bw_gbs": 350.0,
|
||||
"scalar_peak_gflops": 432.0,
|
||||
"simd_peak_gflops": 9216.0,
|
||||
"node_num": 0,
|
||||
"duration": 19366,
|
||||
}
|
||||
|
||||
# SCALEXA application - scaling benchmark workload
|
||||
SAMPLE_SCALEXA = {
|
||||
"bandwidth_raw_p10": 13.474,
|
||||
"bandwidth_raw_median": 32.57,
|
||||
"bandwidth_raw_p90": 51.466,
|
||||
"bandwidth_raw_mad": 23.62,
|
||||
"bandwidth_raw_range": 37.992,
|
||||
"bandwidth_raw_iqr": 23.745,
|
||||
"flops_raw_p10": 4.24,
|
||||
"flops_raw_median": 16.16,
|
||||
"flops_raw_p90": 24.584,
|
||||
"flops_raw_mad": 10.53,
|
||||
"flops_raw_range": 20.344,
|
||||
"flops_raw_iqr": 12.715,
|
||||
"arith_intensity_p10": 0.211,
|
||||
"arith_intensity_median": 0.475,
|
||||
"arith_intensity_p90": 0.492,
|
||||
"arith_intensity_mad": 0.021,
|
||||
"arith_intensity_range": 0.281,
|
||||
"arith_intensity_iqr": 0.176,
|
||||
"bw_flops_covariance": 302.0,
|
||||
"bw_flops_correlation": 0.995,
|
||||
"avg_performance_gflops": 14.7,
|
||||
"median_performance_gflops": 16.16,
|
||||
"performance_gflops_mad": 10.53,
|
||||
"avg_memory_bw_gbs": 350.0,
|
||||
"scalar_peak_gflops": 432.0,
|
||||
"simd_peak_gflops": 9216.0,
|
||||
"node_num": 18,
|
||||
"duration": 165,
|
||||
}
|
||||
|
||||
# Chroma application - lattice QCD workload (compute-intensive)
|
||||
SAMPLE_CHROMA = {
|
||||
"bandwidth_raw_p10": 154.176,
|
||||
"bandwidth_raw_median": 200.57,
|
||||
"bandwidth_raw_p90": 259.952,
|
||||
"bandwidth_raw_mad": 5.12,
|
||||
"bandwidth_raw_range": 105.776,
|
||||
"bandwidth_raw_iqr": 10.215,
|
||||
"flops_raw_p10": 327.966,
|
||||
"flops_raw_median": 519.8,
|
||||
"flops_raw_p90": 654.422,
|
||||
"flops_raw_mad": 16.97,
|
||||
"flops_raw_range": 326.456,
|
||||
"flops_raw_iqr": 34.88,
|
||||
"arith_intensity_p10": 1.55,
|
||||
"arith_intensity_median": 2.595,
|
||||
"arith_intensity_p90": 3.445,
|
||||
"arith_intensity_mad": 0.254,
|
||||
"arith_intensity_range": 1.894,
|
||||
"arith_intensity_iqr": 0.512,
|
||||
"bw_flops_covariance": 382.76,
|
||||
"bw_flops_correlation": 0.063,
|
||||
"avg_performance_gflops": 503.26,
|
||||
"median_performance_gflops": 519.8,
|
||||
"performance_gflops_mad": 16.97,
|
||||
"avg_memory_bw_gbs": 350.0,
|
||||
"scalar_peak_gflops": 432.0,
|
||||
"simd_peak_gflops": 9216.0,
|
||||
"node_num": 3,
|
||||
"duration": 31133,
|
||||
}
|
||||
|
||||
# Raw JSON roofline data (before aggregation, as produced by monitoring)
|
||||
SAMPLE_JSON_ROOFLINE = json.dumps([
|
||||
{"node_num": 1, "bandwidth_raw": 150.5, "flops_raw": 2500.0, "arith_intensity": 16.6,
|
||||
"performance_gflops": 1200.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
|
||||
{"node_num": 1, "bandwidth_raw": 155.2, "flops_raw": 2600.0, "arith_intensity": 16.8,
|
||||
"performance_gflops": 1250.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
|
||||
{"node_num": 1, "bandwidth_raw": 148.0, "flops_raw": 2450.0, "arith_intensity": 16.5,
|
||||
"performance_gflops": 1180.0, "memory_bw_gbs": 450, "scalar_peak_gflops": 600, "duration": 3600},
|
||||
])
|
||||
|
||||
|
||||
def main():
|
||||
print("XGBoost Multi-Label Inference Examples")
|
||||
print("=" * 50)
|
||||
|
||||
# Initialize the predictor
|
||||
predictor = XGBoostMultiLabelPredictor()
|
||||
|
||||
# =========================================================================
|
||||
# Example 1: Single prediction with aggregated features
|
||||
# =========================================================================
|
||||
print("\n=== Example 1: Single Prediction (TurTLE workload) ===")
|
||||
|
||||
result = predictor.predict(SAMPLE_TURTLE, threshold=0.3)
|
||||
|
||||
print(f"Predictions: {result['predictions']}")
|
||||
print(f"Confidences: {result['confidences']}")
|
||||
print("\nTop 5 probabilities:")
|
||||
sorted_probs = sorted(result['probabilities'].items(), key=lambda x: x[1], reverse=True)
|
||||
for cls, prob in sorted_probs[:5]:
|
||||
print(f" {cls}: {prob:.4f}")
|
||||
|
||||
# =========================================================================
|
||||
# Example 2: Compare different workload types
|
||||
# =========================================================================
|
||||
print("\n=== Example 2: Compare Different Workloads ===")
|
||||
|
||||
workloads = [
|
||||
("TurTLE (turbulence)", SAMPLE_TURTLE),
|
||||
("SCALEXA (benchmark)", SAMPLE_SCALEXA),
|
||||
("Chroma (lattice QCD)", SAMPLE_CHROMA),
|
||||
]
|
||||
|
||||
for name, features in workloads:
|
||||
result = predictor.predict(features, threshold=0.3)
|
||||
top_pred = result['predictions'][0] if result['predictions'] else "None"
|
||||
top_prob = max(result['probabilities'].values()) if result['probabilities'] else 0
|
||||
print(f"{name:25} -> Top prediction: {top_pred:20} (prob: {top_prob:.4f})")
|
||||
|
||||
# =========================================================================
|
||||
# Example 3: Top-K predictions
|
||||
# =========================================================================
|
||||
print("\n=== Example 3: Top-5 Predictions (Chroma workload) ===")
|
||||
|
||||
top_k_result = predictor.predict_top_k(SAMPLE_CHROMA, k=5)
|
||||
|
||||
for i, cls in enumerate(top_k_result['top_predictions'], 1):
|
||||
prob = top_k_result['top_probabilities'][cls]
|
||||
print(f" {i}. {cls}: {prob:.4f}")
|
||||
|
||||
# =========================================================================
|
||||
# Example 4: Batch prediction
|
||||
# =========================================================================
|
||||
print("\n=== Example 4: Batch Prediction ===")
|
||||
|
||||
batch_data = [SAMPLE_TURTLE, SAMPLE_SCALEXA, SAMPLE_CHROMA]
|
||||
batch_results = predictor.batch_predict(batch_data, threshold=0.3)
|
||||
|
||||
for i, result in enumerate(batch_results, 1):
|
||||
if 'error' not in result:
|
||||
preds = result['predictions'][:2] # Show top 2
|
||||
print(f" Sample {i}: {preds}")
|
||||
else:
|
||||
print(f" Sample {i}: Error - {result['error']}")
|
||||
|
||||
# =========================================================================
|
||||
# Example 5: Prediction from raw JSON roofline data
|
||||
# =========================================================================
|
||||
print("\n=== Example 5: Prediction from Raw JSON Data ===")
|
||||
|
||||
result = predictor.predict(
|
||||
SAMPLE_JSON_ROOFLINE,
|
||||
is_json=True,
|
||||
job_id="example_job_001",
|
||||
threshold=0.3
|
||||
)
|
||||
|
||||
print(f"Predictions: {result['predictions'][:3]}")
|
||||
print(f"(Aggregated from {len(json.loads(SAMPLE_JSON_ROOFLINE))} roofline samples)")
|
||||
|
||||
# =========================================================================
|
||||
# Example 6: Model information
|
||||
# =========================================================================
|
||||
print("\n=== Example 6: Model Information ===")
|
||||
|
||||
info = predictor.get_class_info()
|
||||
print(f"Number of classes: {info['n_classes']}")
|
||||
print(f"Number of features: {info['n_features']}")
|
||||
print(f"Sample classes: {info['classes'][:5]}...")
|
||||
print(f"Sample features: {info['feature_columns'][:3]}...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user