feat: performance analytics panel on dashboard
dashboard/dashboard.py (+61 lines): - New /api/performance endpoint proxying to router metrics/performance - Performance Analytics row with 4 panels: - Latency distribution (p50/p95/p99 per model) with stacked bars - Throughput comparison (avg + p50 tokens/sec per model) - Routing effectiveness table by reason - Agent performance bars with latency - 1h/24h window toggle, auto-refresh every 15s - Color-coded per model (purple=MoE, amber=Dense, green=VLM)
This commit is contained in:
+63
-2
@@ -101,7 +101,19 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
|
||||
<div class="col-md-4"><div class="chart-card"><div class="title">Model Distribution</div><div id="route-bars"></div></div></div>
|
||||
<div class="col-md-4"><div class="chart-card"><div class="title">Agent Activity</div><div id="agent-bars"></div></div></div>
|
||||
|
||||
<!-- ROW 4: Live Stream -->
|
||||
<!-- ROW 4: Performance Analytics -->
|
||||
<div class="col-12 mb-2"><div class="d-flex align-items-center gap-2"><span class="fw-bold text-white" style="font-size:14px">📊 Performance Analytics</span>
|
||||
<div class="d-flex gap-1 ms-auto">
|
||||
<button class="btn-sm-period active" onclick="switchPerfWindow('1')">1h</button>
|
||||
<button class="btn-sm-period" onclick="switchPerfWindow('24')">24h</button>
|
||||
</div>
|
||||
</div></div>
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Latency — P50 / P95 / P99 (ms)</div><div id="perf-latency"></div></div></div>
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Throughput — Tokens / sec</div><div id="perf-throughput"></div></div></div>
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Routing Effectiveness — by Reason</div><div id="perf-reasons"></div></div></div>
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Agent Performance</div><div id="perf-agents"></div></div></div>
|
||||
|
||||
<!-- ROW 5: Live Stream -->
|
||||
<div class="col-12"><div class="chart-card"><div class="title">Live Stream</div>
|
||||
<div class="table-responsive"><table class="table table-custom mb-0">
|
||||
<thead><tr><th>Time</th><th>Agent</th><th>Model</th><th>Reason</th><th>Tier</th></tr></thead>
|
||||
@@ -188,8 +200,47 @@ var grid='';for(var g=0;g<=4;g++){var y=(g/4)*H;grid+='<line x1=\"0\" y1=\"'+y.t
|
||||
cn.innerHTML='<svg viewBox=\"0 0 100 '+(H+16)+'\" style=\"width:100%;height:'+(H+20)+'px;display:block\" preserveAspectRatio=\"none\">'+grid+paths+'</svg>';
|
||||
lg.innerHTML=mn.map(function(m){return'<span class=\"d-flex align-items-center gap-1\"><svg width=\"14\" height=\"8\"><line x1=\"0\" y1=\"4\" x2=\"14\" y2=\"4\" stroke=\"'+(MC[m]||'#38bdf8')+'\" stroke-width=\"2\"/></svg>'+(ML[m]||m)+'</span>';}).join('');
|
||||
}
|
||||
var perfWindow='1';
|
||||
function switchPerfWindow(w){perfWindow=w;document.querySelectorAll('.btn-sm-period').forEach(function(b,i){if(i>=4)b.classList.toggle('active',b.textContent.trim().replace('h','')===w)});loadPerf();}
|
||||
function loadPerf(){fetch('/api/performance?window='+perfWindow).then(function(r){return r.json()}).then(renderPerf).catch(function(){})}
|
||||
function renderPerf(d){
|
||||
var models=d.models||[],reasons=d.reasons||[],agents=d.agents||[],sum=d.summary||{};
|
||||
// Latency bars: p50/p95/p99 per model
|
||||
var mlab={'qwen3.6-35B-A3B':'35B MoE','qwen3.6-27B-code':'27B Dense','qwen3.5-9b-vlm':'9B VLM'};
|
||||
var mcol={'qwen3.6-35B-A3B':'#a78bfa','qwen3.6-27B-code':'#f59e0b','qwen3.5-9b-vlm':'#22c55e'};
|
||||
if(!models.length){$('perf-latency').innerHTML='<div class="text-secondary small text-center py-4">Accumulating data...</div>';return;}
|
||||
var maxLat=Math.max(...models.map(function(m){return m.latency.p99||0}),1);
|
||||
var latHTML=models.map(function(m){
|
||||
var l=m.latency||{},p50=l.p50||0,p95=l.p95||0,p99=l.p99||0,c=mcol[m.model]||'#38bdf8';
|
||||
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span class="text-secondary">'+m.count+' reqs</span></div>'+
|
||||
'<div class="d-flex align-items-center gap-2 mb-1"><span class="text-secondary" style="min-width:28px">p50</span><div class="flex-grow-1" style="height:14px;background:#1e293b;border-radius:4px;overflow:hidden;position:relative"><div style="position:absolute;left:0;top:0;height:100%;width:'+(p50/maxLat*100)+'%;background:'+c+';opacity:0.3;border-radius:4px"></div><div style="position:absolute;left:0;top:0;height:100%;width:'+(p95/maxLat*100)+'%;background:'+c+';opacity:0.5;border-radius:4px"></div><div style="position:absolute;left:0;top:0;height:100%;width:'+(p99/maxLat*100)+'%;background:'+c+';border-radius:4px"></div></div><span style="color:'+c+';min-width:48px;text-align:right;font-variant-numeric:tabular-nums">'+p99+'ms</span></div>'+
|
||||
'<div class="d-flex gap-3" style="font-size:10px;color:#64748b;padding-left:32px"><span>p50: '+p50+'ms</span><span>p95: '+p95+'ms</span><span>p99: '+p99+'ms</span></div></div>';
|
||||
}).join('');
|
||||
$('perf-latency').innerHTML=latHTML;
|
||||
// Throughput comparison
|
||||
var maxTps=Math.max(...models.map(function(m){return m.throughput.avg_tokens_per_sec||0}),1);
|
||||
var tpsHTML=models.map(function(m){
|
||||
var t=m.throughput||{},avg=t.avg_tokens_per_sec||0,p50=t.p50||0,c=mcol[m.model]||'#38bdf8';
|
||||
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:'+c+'" class="fw-bold">'+avg+' tok/s</span></div>'+
|
||||
'<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(avg/maxTps*100)+'%;background:'+c+';border-radius:3px"></div></div></div>'+
|
||||
'<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(p50/maxTps*100)+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>';
|
||||
}).join('');
|
||||
$('perf-throughput').innerHTML=tpsHTML;
|
||||
// Routing reasons table
|
||||
if(reasons.length){
|
||||
var rHTML='<table class="table table-custom mb-0"><thead><tr><th>Reason</th><th>Count</th><th>Avg Lat</th><th>P95 Lat</th></tr></thead><tbody>';
|
||||
reasons.forEach(function(r){rHTML+='<tr><td>'+r.reason+'</td><td>'+r.count+'</td><td>'+r.avg_total_ms+'ms</td><td>'+r.p95_total_ms+'ms</td></tr>';});
|
||||
rHTML+='</tbody></table>';$('perf-reasons').innerHTML=rHTML;
|
||||
}else{$('perf-reasons').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
|
||||
// Agent performance
|
||||
if(agents.length){
|
||||
var maxAc=Math.max(...agents.map(function(a){return a.count||0}),1);
|
||||
var aHTML=agents.map(function(a){return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+a.agent+'</span><span class="text-secondary">'+a.count+' reqs</span></div><div class="d-flex align-items-center gap-2"><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(a.count/maxAc*100)+'%;background:#38bdf8;border-radius:2px"></div></div><span class="small" style="color:#38bdf8;min-width:60px;text-align:right">'+a.avg_total_ms+'ms avg</span></div></div>';}).join('');
|
||||
$('perf-agents').innerHTML=aHTML;
|
||||
}else{$('perf-agents').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
|
||||
}
|
||||
function poll(){fetch('/api/state').then(function(r){return r.json()}).then(function(data){render(data);$('connection-status').textContent='live';}).catch(function(){$('connection-status').textContent='reconnecting';});}
|
||||
poll();setInterval(poll,3000);loadTS();
|
||||
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
@@ -200,6 +251,16 @@ def dashboard(): return render_template_string(DASHBOARD_HTML)
|
||||
@app.route("/api/state")
|
||||
def api_state(): return fetch_state()
|
||||
|
||||
@app.route("/api/performance")
|
||||
def api_performance():
|
||||
window = request.args.get("window", "24")
|
||||
model = request.args.get("model", "all")
|
||||
try:
|
||||
r = requests.get(f"http://router:9000/metrics/performance?window={window}&model={model}", timeout=10)
|
||||
if r.status_code == 200: return r.json()
|
||||
except Exception: pass
|
||||
return {"models": [], "reasons": [], "agents": [], "summary": {"total_requests": 0}}
|
||||
|
||||
@app.route("/api/timeseries")
|
||||
def api_timeseries():
|
||||
period = request.args.get("period", "day")
|
||||
|
||||
Reference in New Issue
Block a user