b2ec4b0572
- Dashboard: when a model has zero non-streaming records, shows "streaming only" instead of misleading 0 tok/s - Dashboard: minimum bar width enforced (6% avg, 4% p50) so low-tps models are always visible - Router: removed inflated streaming tps estimate (prompt tokens skewed results for long conversations) Fixes Dense model appearing to "register nothing" when Mumuni sends mostly streaming requests.
298 lines
24 KiB
Python
298 lines
24 KiB
Python
"""SyslogAI Harness Dashboard — Modern Design."""
|
|
import os, json, time, queue, threading
|
|
import requests
|
|
from flask import Flask, request, render_template_string, Response, stream_with_context
|
|
|
|
ROUTER_METRICS = os.environ.get("ROUTER_METRICS_URL", "http://router:9000/metrics")
|
|
app = Flask(__name__)
|
|
sse_subscribers = []; sse_lock = threading.Lock()
|
|
|
|
def fetch_state():
|
|
try:
|
|
r = requests.get(ROUTER_METRICS, timeout=5)
|
|
if r.status_code == 200: return r.json()
|
|
except Exception: pass
|
|
return {"gpus":[],"route_counts":{},"agent_counts":{},"recent":[],"timestamp":time.time()}
|
|
|
|
def broadcast_loop():
|
|
while True:
|
|
time.sleep(3)
|
|
data = fetch_state(); payload = json.dumps(data)
|
|
with sse_lock:
|
|
dead = [q for q in sse_subscribers if not q.put(payload)]
|
|
for q in dead: sse_subscribers.remove(q)
|
|
threading.Thread(target=broadcast_loop, daemon=True).start()
|
|
|
|
DASHBOARD_HTML = r"""<!DOCTYPE html>
|
|
<html lang="en" data-bs-theme="dark">
|
|
<head>
|
|
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>SyslogAI Harness</title>
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
|
|
<style>
|
|
body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; padding: 20px 24px; }
|
|
.card { background: #111827; border: 1px solid #1e293b; border-radius: 10px; height: 100%; }
|
|
.stat-card { background: #111827; border: 1px solid #1e293b; border-radius: 10px; padding: 18px 20px; text-align: center; }
|
|
.stat-value { font-size: 28px; font-weight: 700; line-height: 1.1; }
|
|
.stat-label { font-size: 11px; text-transform: uppercase; letter-spacing: 0.6px; color: #64748b; margin-top: 4px; }
|
|
.gpu-card { background: #111827; border: 1px solid #1e293b; border-radius: 10px; padding: 16px 18px; height: 100%; }
|
|
.gpu-card .title { font-size: 13px; font-weight: 600; color: #e2e8f0; margin-bottom: 12px; display: flex; align-items: center; gap: 8px; }
|
|
.gpu-card .status-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
|
|
.gpu-card .row-metric { display: flex; justify-content: space-between; font-size: 12px; padding: 2px 0; }
|
|
.gpu-card .row-metric .lbl { color: #64748b; }
|
|
.gpu-card .row-metric .val { color: #e2e8f0; font-variant-numeric: tabular-nums; }
|
|
.gpu-card .slot-bar { display: flex; gap: 3px; margin-top: 8px; }
|
|
.gpu-card .slot-bar .s { flex: 1; height: 5px; border-radius: 2px; background: #1e293b; }
|
|
.gpu-card .slot-bar .s.active { background: #38bdf8; }
|
|
.chart-card { background: #111827; border: 1px solid #1e293b; border-radius: 10px; padding: 16px 18px; height: 100%; display: flex; flex-direction: column; }
|
|
.chart-card .title { font-size: 13px; font-weight: 600; color: #e2e8f0; margin-bottom: 12px; }
|
|
.bar-row { margin-bottom: 8px; }
|
|
.bar-label { display: flex; justify-content: space-between; font-size: 11px; margin-bottom: 3px; color: #64748b; }
|
|
.bar-label .name { color: #cbd5e1; }
|
|
.bar-track { height: 5px; background: #1e293b; border-radius: 3px; overflow: hidden; }
|
|
.bar-fill { height: 100%; border-radius: 3px; transition: width 0.6s ease; }
|
|
.table-custom { font-size: 11px; margin: 0; }
|
|
.table-custom th { color: #64748b; font-weight: 500; font-size: 10px; text-transform: uppercase; border-color: #1e293b; padding: 8px 10px; }
|
|
.table-custom td { color: #94a3b8; border-color: rgba(30,41,59,0.5); padding: 6px 10px; }
|
|
.agent-badge { font-size: 10px; padding: 2px 7px; border-radius: 8px; font-weight: 600; }
|
|
.btn-sm-period { font-size: 10px; padding: 3px 10px; border-radius: 6px; border: 1px solid #1e293b; color: #64748b; background: transparent; cursor: pointer; }
|
|
.btn-sm-period.active { background: #1d4ed8; color: #fff; border-color: #1d4ed8; }
|
|
.ring-label { font-size: 22px; font-weight: 700; }
|
|
.ring-sublabel { font-size: 10px; color: #64748b; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
|
|
<!-- HEADER -->
|
|
<div class="d-flex justify-content-between align-items-center mb-4">
|
|
<div>
|
|
<h5 class="mb-0 text-white fw-bold">⚡ SyslogAI Harness</h5>
|
|
<div class="small text-secondary" id="live-indicator">
|
|
<span class="status-dot" id="live-dot" style="width:6px;height:6px;border-radius:50%;display:inline-block;background:#22c55e;animation:pulse 2s infinite"></span>
|
|
<span id="connection-status">live</span> · <span id="update-time"></span>
|
|
</div>
|
|
</div>
|
|
<div class="d-flex gap-2">
|
|
<div class="stat-card" style="min-width:100px"><div class="stat-value text-info" id="kpi-total">0</div><div class="stat-label">Requests</div></div>
|
|
<div class="stat-card" style="min-width:100px"><div class="stat-value text-warning" id="kpi-active">0</div><div class="stat-label">Active</div></div>
|
|
<div class="stat-card" style="min-width:100px"><div class="stat-value" style="color:#a78bfa" id="kpi-agents">0</div><div class="stat-label">Agents</div></div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="row g-3 align-items-stretch">
|
|
<!-- ROW 1: Usage Chart (8) + GPU Metrics (4) -->
|
|
<div class="col-md-8"><div class="chart-card"><div class="title d-flex justify-content-between align-items-center">
|
|
<span>Usage Over Time</span>
|
|
<div class="d-flex gap-1">
|
|
<button class="btn-sm-period active" onclick="switchPeriod('day')">24h</button>
|
|
<button class="btn-sm-period" onclick="switchPeriod('week')">7d</button>
|
|
<button class="btn-sm-period" onclick="switchPeriod('month')">30d</button>
|
|
</div>
|
|
</div><div id="timeseries-chart" style="height:150px"></div><div id="timeseries-legend" class="d-flex justify-content-center gap-3 mt-2 flex-wrap small"></div></div></div>
|
|
<div class="col-md-4"><div class="chart-card"><div class="title">GPU Metrics</div><div id="gpu-metrics-card"></div></div></div>
|
|
|
|
<!-- ROW 2: 3 GPU Cards -->
|
|
<div class="col-md-4"><div class="gpu-card" id="gpu-moe"><div class="text-secondary small">Loading...</div></div></div>
|
|
<div class="col-md-4"><div class="gpu-card" id="gpu-dense"><div class="text-secondary small">Loading...</div></div></div>
|
|
<div class="col-md-4"><div class="gpu-card" id="gpu-light"><div class="text-secondary small">Loading...</div></div></div>
|
|
|
|
<!-- ROW 3: Queue + Model + Agent -->
|
|
<div class="col-md-4"><div class="chart-card"><div class="title">Queue Status</div><div class="text-center" id="queue-viz"></div></div></div>
|
|
<div class="col-md-4"><div class="chart-card"><div class="title">Model Distribution</div><div id="route-bars"></div></div></div>
|
|
<div class="col-md-4"><div class="chart-card"><div class="title">Agent Activity</div><div id="agent-bars"></div></div></div>
|
|
|
|
<!-- ROW 4: Performance Analytics -->
|
|
<div class="col-12 mb-2"><div class="d-flex align-items-center gap-2"><span class="fw-bold text-white" style="font-size:14px">📊 Performance Analytics</span>
|
|
<div class="d-flex gap-1 ms-auto">
|
|
<button class="btn-sm-period active" onclick="switchPerfWindow('1')">1h</button>
|
|
<button class="btn-sm-period" onclick="switchPerfWindow('24')">24h</button>
|
|
</div>
|
|
</div></div>
|
|
<div class="col-md-6"><div class="chart-card"><div class="title">Latency — P50 / P95 / P99 (ms)</div><div id="perf-latency"></div></div></div>
|
|
<div class="col-md-6"><div class="chart-card"><div class="title">Throughput — Tokens / sec</div><div id="perf-throughput"></div></div></div>
|
|
<div class="col-md-6"><div class="chart-card"><div class="title">Routing Effectiveness — by Reason</div><div id="perf-reasons"></div></div></div>
|
|
<div class="col-md-6"><div class="chart-card"><div class="title">Agent Performance</div><div id="perf-agents"></div></div></div>
|
|
|
|
<!-- ROW 5: Live Stream -->
|
|
<div class="col-12"><div class="chart-card"><div class="title">Live Stream</div>
|
|
<div class="table-responsive"><table class="table table-custom mb-0">
|
|
<thead><tr><th>Time</th><th>Agent</th><th>Model</th><th>Reason</th><th>Tier</th></tr></thead>
|
|
<tbody id="route-tbody"></tbody>
|
|
</table></div>
|
|
</div></div>
|
|
</div>
|
|
|
|
<script>
|
|
var MC={'qwen3.5-9b-vlm':'#22c55e','qwen3.6-27B-code':'#f59e0b','qwen3.6-35B-A3B':'#a78bfa'};
|
|
var ML={'qwen3.5-9b-vlm':'Qwen3.5 9B VLM','qwen3.6-27B-code':'Qwen Code','qwen3.6-35B-A3B':'Qwen MoE'};
|
|
var GL={'qwen3.6-35B-A3B':'MoE - Strix Halo','qwen3.6-27B-code':'Dense - RTX 3090','qwen3.5-9b-vlm':'VLM - RTX 5070'};
|
|
function $(id){return document.getElementById(id);}
|
|
|
|
function render(data){
|
|
if(!data||!data.gpus)return;
|
|
var t=Object.values(data.route_counts||{}).reduce((a,b)=>a+b,0);
|
|
var ta=0,tm=0;data.gpus.forEach(function(g){ta+=(g.active_requests||0);tm+=(g.max_concurrent||1)});
|
|
$('kpi-total').textContent=t;$('kpi-active').textContent=ta+'/'+tm;$('kpi-agents').textContent=Object.keys(data.agent_counts||{}).length;
|
|
$('update-time').textContent=new Date().toLocaleTimeString();
|
|
var ids={'qwen3.6-35B-A3B':'gpu-moe','qwen3.6-27B-code':'gpu-dense','qwen3.5-9b-vlm':'gpu-light'};
|
|
data.gpus.forEach(function(g){
|
|
var el=$(ids[g.id]);if(!el)return;
|
|
var a=g.active_requests||0,mx=g.max_concurrent||1;
|
|
var sc=g.status==='healthy'?'#22c55e':g.status==='saturated'?'#f59e0b':'#ef4444';
|
|
var ss=g.status==='healthy'?'Online':g.status==='saturated'?'Busy':'Offline';
|
|
var slots='';for(var i=0;i<mx;i++)slots+='<span class=\"s'+(i<a?' active':'')+'\"></span>';
|
|
var h='<div class=\"title\"><span class=\"status-dot\" style=\"background:'+sc+'\"></span>'+GL[g.id]+'<span class=\"ms-auto small\" style=\"color:'+sc+'\">'+ss+'</span></div>';
|
|
h+='<div class=\"row-metric\"><span class=\"lbl\">VRAM</span><span class=\"val\">'+g.vram_used_mb+' / '+g.vram_total_mb+' MB</span></div>';
|
|
h+='<div class=\"row-metric\"><span class=\"lbl\">Utilization</span><span class=\"val\">'+g.gpu_util_pct+'%</span></div>';
|
|
h+='<div class=\"row-metric\"><span class=\"lbl\">Temperature</span><span class=\"val\" style=\"color:'+(g.temp_c>85?'#ef4444':g.temp_c>70?'#f59e0b':'#22c55e')+'\">'+g.temp_c+'C</span></div>';
|
|
if(g.power_w)h+='<div class=\"row-metric\"><span class=\"lbl\">Power</span><span class=\"val\">'+g.power_w+'W'+(g.power_limit_w?'/'+g.power_limit_w+'W':'')+'</span></div>';
|
|
h+='<div class=\"row-metric\"><span class=\"lbl\">Slots</span><span class=\"val\" style=\"color:'+(a>=mx?'#ef4444':'#e2e8f0')+'\">'+a+' / '+mx+'</span></div>';
|
|
h+='<div class=\"slot-bar\">'+slots+'</div>';el.innerHTML=h;
|
|
});
|
|
renderQueue(data);renderGPUMetrics(data);
|
|
var rc=data.route_counts||{},mr=Math.max(1,...Object.values(rc));
|
|
$('route-bars').innerHTML=Object.entries(rc).length?Object.entries(rc).sort((a,b)=>b[1]-a[1]).map(function(e){var m=e[0],c=e[1];return'<div class=\"bar-row\"><div class=\"bar-label\"><span class=\"name\">'+(ML[m]||m)+'</span><span>'+c+' ('+(t?Math.round(c/t*100):0)+'%)</span></div><div class=\"bar-track\"><div class=\"bar-fill\" style=\"width:'+(c/mr*100)+'%;background:'+(MC[m]||'#38bdf8')+'\"></div></div></div>';}).join(''):'<div class=\"text-secondary small\">-</div>';
|
|
var ac=data.agent_counts||{},ma=Math.max(1,...Object.values(ac));
|
|
$('agent-bars').innerHTML=Object.entries(ac).length?Object.entries(ac).sort((a,b)=>b[1]-a[1]).map(function(e){return'<div class=\"bar-row\"><div class=\"bar-label\"><span class=\"name\">'+e[0]+'</span><span>'+e[1]+'</span></div><div class=\"bar-track\"><div class=\"bar-fill\" style=\"width:'+(e[1]/ma*100)+'%;background:#38bdf8\"></div></div></div>';}).join(''):'<div class=\"text-secondary small\">-</div>';
|
|
var recent=data.recent||[];
|
|
$('route-tbody').innerHTML=recent.length?recent.slice(0,20).map(function(r){var d=new Date(r.ts*1000),ag=r.agent||'?';return'<tr><td class=\"text-secondary\">'+d.toLocaleTimeString()+'</td><td><span class=\"agent-badge\" style=\"background:rgba(56,189,248,0.12);color:#38bdf8\">'+ag+'</span></td><td>'+(ML[r.model]||r.model)+'</td><td class=\"text-secondary\">'+(r.reason||'')+'</td><td class=\"text-uppercase\" style=\"font-size:10px;color:'+(r.tier==='enterprise'?'#a78bfa':'#64748b')+'\">'+(r.tier||'')+'</td></tr>';}).join(''):'<tr><td colspan=\"5\" class=\"text-secondary\">Waiting...</td></tr>';
|
|
}
|
|
|
|
function renderQueue(data){
|
|
var el=$('queue-viz');if(!el)return;
|
|
var ta=0,tm=0;data.gpus.forEach(function(g){ta+=(g.active_requests||0);tm+=(g.max_concurrent||1)});
|
|
var pct=tm>0?Math.round(ta/tm*100):0,st=pct>=100?'SATURATED':pct>=50?'BUSY':'IDLE';
|
|
var sc=pct>=100?'#ef4444':pct>=50?'#f59e0b':'#22c55e';
|
|
var circ=188.5,dash=(pct/100)*circ;
|
|
var h='<div class=\"d-inline-block position-relative mb-2\"><svg width=\"72\" height=\"72\"><circle cx=\"36\" cy=\"36\" r=\"30\" fill=\"none\" stroke=\"#1e293b\" stroke-width=\"6\"/><circle cx=\"36\" cy=\"36\" r=\"30\" fill=\"none\" stroke=\"'+sc+'\" stroke-width=\"6\" stroke-dasharray=\"'+dash+' '+(circ-dash)+'\" stroke-linecap=\"round\" transform=\"rotate(-90 36 36)\"/></svg><div style=\"position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center\"><div class=\"ring-label\" style=\"color:'+sc+'\">'+ta+'</div><div class=\"ring-sublabel\">/ '+tm+' slots</div></div></div>';
|
|
h+='<div class=\"fw-bold mb-2 small\" style=\"color:'+sc+'\">'+st+'</div>';
|
|
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','qwen3.5-9b-vlm':'VLM'};
|
|
data.gpus.forEach(function(g){var a=g.active_requests||0,mx=g.max_concurrent||1,gp=mx>0?Math.round(a/mx*100):0;h+='<div class=\"d-flex align-items-center gap-2 mb-1 justify-content-center\"><span class=\"small\" style=\"min-width:32px;text-align:right;font-size:10px\">'+(lb[g.id]||g.id)+'</span><div style=\"flex:1;max-width:70px;height:3px;background:#1e293b;border-radius:2px;overflow:hidden\"><div style=\"height:100%;width:'+gp+'%;background:'+sc+';border-radius:2px\"></div></div><span class=\"small\" style=\"min-width:22px;font-size:10px\">'+a+'/'+mx+'</span></div>'});
|
|
el.innerHTML=h;
|
|
}
|
|
|
|
function renderGPUMetrics(data){
|
|
var el=$('gpu-metrics-card');if(!el)return;
|
|
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','qwen3.5-9b-vlm':'VLM'};
|
|
var h='';data.gpus.forEach(function(g){
|
|
var nm=lb[g.id]||g.id,tp=g.temp_c||0,ut=g.gpu_util_pct||0,pw=g.power_w||0,pl=g.power_limit_w||0;
|
|
var tc=tp>85?'#ef4444':tp>70?'#f59e0b':'#22c55e',uc=ut>90?'#ef4444':ut>70?'#f59e0b':'#22c55e';
|
|
h+='<div class=\"mb-3\"><div class=\"fw-bold small text-white-50 mb-1\">'+nm+'</div>';
|
|
h+='<div class=\"d-flex align-items-center gap-2 mb-1\"><span class=\"small text-secondary\" style=\"min-width:30px\">T</span><div class=\"flex-grow-1\" style=\"height:3px;background:#1e293b;border-radius:2px;overflow:hidden\"><div style=\"height:100%;width:'+Math.min(tp,100)+'%;background:'+tc+';border-radius:2px\"></div></div><span class=\"small\" style=\"color:'+tc+';min-width:30px;text-align:right\">'+tp+'C</span></div>';
|
|
h+='<div class=\"d-flex align-items-center gap-2 mb-1\"><span class=\"small text-secondary\" style=\"min-width:30px\">U</span><div class=\"flex-grow-1\" style=\"height:3px;background:#1e293b;border-radius:2px;overflow:hidden\"><div style=\"height:100%;width:'+ut+'%;background:'+uc+';border-radius:2px\"></div></div><span class=\"small\" style=\"color:'+uc+';min-width:30px;text-align:right\">'+ut+'%</span></div>';
|
|
if(pw>0){var pp=pl>0?Math.round(pw/pl*100):0,pc=pp>90?'#ef4444':pp>70?'#f59e0b':'#22c55e';h+='<div class=\"d-flex align-items-center gap-2\"><span class=\"small text-secondary\" style=\"min-width:30px\">P</span><div class=\"flex-grow-1\" style=\"height:3px;background:#1e293b;border-radius:2px;overflow:hidden\"><div style=\"height:100%;width:'+pp+'%;background:'+pc+';border-radius:2px\"></div></div><span class=\"small\" style=\"color:'+pc+';min-width:30px;text-align:right\">'+pw+'W</span></div>';}
|
|
h+='</div>';});
|
|
el.innerHTML=h;
|
|
}
|
|
|
|
var cp='day';
|
|
function switchPeriod(p){cp=p;document.querySelectorAll('.btn-sm-period').forEach(function(b){b.classList.remove('active')});event.target.classList.add('active');loadTS();}
|
|
function loadTS(){fetch('/api/timeseries?period='+cp).then(function(r){return r.json()}).then(renderTS).catch(function(){})}
|
|
function renderTS(d){
|
|
var models=d.models||{},labels=d.labels||[];
|
|
if(!labels.length)return;
|
|
var cn=$('timeseries-chart'),lg=$('timeseries-legend'),mn=Object.keys(models);
|
|
if(!mn.length){cn.innerHTML='<div class=\"text-secondary small text-center py-4\">-</div>';return;}
|
|
var mv=1;for(var m in models)for(var i=0;i<models[m].length;i++)if(models[m][i]>mv)mv=models[m][i];mv=Math.ceil(mv*1.15)||1;
|
|
var W=labels.length>1?100/(labels.length-1):100,H=130;
|
|
var paths='';for(var mi=0;mi<mn.length;mi++){var m=mn[mi],vals=models[m]||[],d='';for(var i=0;i<vals.length;i++){var x=i*W,y=H-(vals[i]/mv)*H;d+=(i===0?'M':'L')+x.toFixed(1)+','+y.toFixed(1)+' ';}paths+='<path d=\"'+d+'\" fill=\"none\" stroke=\"'+(MC[m]||'#38bdf8')+'\" stroke-width=\"2\" stroke-linecap=\"round\" opacity=\"0.8\"/>';}
|
|
var grid='';for(var g=0;g<=4;g++){var y=(g/4)*H;grid+='<line x1=\"0\" y1=\"'+y.toFixed(1)+'\" x2=\"100\" y2=\"'+y.toFixed(1)+'\" stroke=\"#1e293b\" stroke-width=\"1\"/>';}
|
|
cn.innerHTML='<svg viewBox=\"0 0 100 '+(H+16)+'\" style=\"width:100%;height:'+(H+20)+'px;display:block\" preserveAspectRatio=\"none\">'+grid+paths+'</svg>';
|
|
lg.innerHTML=mn.map(function(m){return'<span class=\"d-flex align-items-center gap-1\"><svg width=\"14\" height=\"8\"><line x1=\"0\" y1=\"4\" x2=\"14\" y2=\"4\" stroke=\"'+(MC[m]||'#38bdf8')+'\" stroke-width=\"2\"/></svg>'+(ML[m]||m)+'</span>';}).join('');
|
|
}
|
|
var perfWindow='1';
|
|
function switchPerfWindow(w){perfWindow=w;document.querySelectorAll('.btn-sm-period').forEach(function(b,i){if(i>=4)b.classList.toggle('active',b.textContent.trim().replace('h','')===w)});loadPerf();}
|
|
function loadPerf(){fetch('/api/performance?window='+perfWindow).then(function(r){return r.json()}).then(renderPerf).catch(function(){})}
|
|
function renderPerf(d){
|
|
var models=d.models||[],reasons=d.reasons||[],agents=d.agents||[],sum=d.summary||{};
|
|
// Latency bars: p50/p95/p99 per model
|
|
var mlab={'qwen3.6-35B-A3B':'35B MoE','qwen3.6-27B-code':'27B Dense','qwen3.5-9b-vlm':'9B VLM'};
|
|
var mcol={'qwen3.6-35B-A3B':'#a78bfa','qwen3.6-27B-code':'#f59e0b','qwen3.5-9b-vlm':'#22c55e'};
|
|
if(!models.length){$('perf-latency').innerHTML='<div class="text-secondary small text-center py-4">Accumulating data...</div>';return;}
|
|
var maxLat=Math.max(...models.map(function(m){return m.latency.p99||0}),1);
|
|
var latHTML=models.map(function(m){
|
|
var l=m.latency||{},p50=l.p50||0,p95=l.p95||0,p99=l.p99||0,c=mcol[m.model]||'#38bdf8';
|
|
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span class="text-secondary">'+m.count+' reqs</span></div>'+
|
|
'<div class="d-flex align-items-center gap-2 mb-1"><span class="text-secondary" style="min-width:28px">p50</span><div class="flex-grow-1" style="height:14px;background:#1e293b;border-radius:4px;overflow:hidden;position:relative"><div style="position:absolute;left:0;top:0;height:100%;width:'+(p50/maxLat*100)+'%;background:'+c+';opacity:0.3;border-radius:4px"></div><div style="position:absolute;left:0;top:0;height:100%;width:'+(p95/maxLat*100)+'%;background:'+c+';opacity:0.5;border-radius:4px"></div><div style="position:absolute;left:0;top:0;height:100%;width:'+(p99/maxLat*100)+'%;background:'+c+';border-radius:4px"></div></div><span style="color:'+c+';min-width:48px;text-align:right;font-variant-numeric:tabular-nums">'+p99+'ms</span></div>'+
|
|
'<div class="d-flex gap-3" style="font-size:10px;color:#64748b;padding-left:32px"><span>p50: '+p50+'ms</span><span>p95: '+p95+'ms</span><span>p99: '+p99+'ms</span></div></div>';
|
|
}).join('');
|
|
$('perf-latency').innerHTML=latHTML;
|
|
// Throughput comparison
|
|
var maxTps=Math.max(...models.map(function(m){return m.throughput.avg_tokens_per_sec||0}),1);
|
|
var tpsHTML=models.map(function(m){
|
|
var t=m.throughput||{},avg=t.avg_tokens_per_sec||0,p50=t.p50||0,c=mcol[m.model]||'#38bdf8';
|
|
var isAllStreaming = avg===0 && p50===0;
|
|
if(isAllStreaming){
|
|
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:#64748b;font-style:italic">streaming only</span></div><div class="text-secondary" style="font-size:10px">t/s available for non-streaming requests only</div></div>';
|
|
}
|
|
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:'+c+'" class="fw-bold">'+avg+' tok/s</span></div>'+
|
|
'<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(Math.max(avg/maxTps*100,6))+'%;background:'+c+';border-radius:3px"></div></div><span class="small" style="color:'+c+';min-width:54px;text-align:right">'+avg+' tok/s</span></div>'+
|
|
'<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(Math.max(p50/maxTps*100,4))+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>';
|
|
}).join('');
|
|
$('perf-throughput').innerHTML=tpsHTML;
|
|
// Routing reasons table
|
|
if(reasons.length){
|
|
var rHTML='<table class="table table-custom mb-0"><thead><tr><th>Reason</th><th>Count</th><th>Avg Lat</th><th>P95 Lat</th></tr></thead><tbody>';
|
|
reasons.forEach(function(r){rHTML+='<tr><td>'+r.reason+'</td><td>'+r.count+'</td><td>'+r.avg_total_ms+'ms</td><td>'+r.p95_total_ms+'ms</td></tr>';});
|
|
rHTML+='</tbody></table>';$('perf-reasons').innerHTML=rHTML;
|
|
}else{$('perf-reasons').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
|
|
// Agent performance
|
|
if(agents.length){
|
|
var maxAc=Math.max(...agents.map(function(a){return a.count||0}),1);
|
|
var aHTML=agents.map(function(a){return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+a.agent+'</span><span class="text-secondary">'+a.count+' reqs</span></div><div class="d-flex align-items-center gap-2"><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(a.count/maxAc*100)+'%;background:#38bdf8;border-radius:2px"></div></div><span class="small" style="color:#38bdf8;min-width:60px;text-align:right">'+a.avg_total_ms+'ms avg</span></div></div>';}).join('');
|
|
$('perf-agents').innerHTML=aHTML;
|
|
}else{$('perf-agents').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
|
|
}
|
|
function poll(){fetch('/api/state').then(function(r){return r.json()}).then(function(data){render(data);$('connection-status').textContent='live';}).catch(function(){$('connection-status').textContent='reconnecting';});}
|
|
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);
|
|
</script>
|
|
</body>
|
|
</html>"""
|
|
|
|
@app.route("/")
|
|
def dashboard(): return render_template_string(DASHBOARD_HTML)
|
|
|
|
@app.route("/api/state")
|
|
def api_state(): return fetch_state()
|
|
|
|
@app.route("/api/performance")
|
|
def api_performance():
|
|
window = request.args.get("window", "24")
|
|
model = request.args.get("model", "all")
|
|
try:
|
|
r = requests.get(f"http://router:9000/metrics/performance?window={window}&model={model}", timeout=10)
|
|
if r.status_code == 200: return r.json()
|
|
except Exception: pass
|
|
return {"models": [], "reasons": [], "agents": [], "summary": {"total_requests": 0}}
|
|
|
|
@app.route("/api/timeseries")
|
|
def api_timeseries():
|
|
period = request.args.get("period", "day")
|
|
try:
|
|
r = requests.get("http://router:9000/metrics/timeseries?period=" + period, timeout=5)
|
|
if r.status_code == 200: return r.json()
|
|
except Exception: pass
|
|
return {"models": {}, "labels": []}
|
|
|
|
@app.route("/api/stream")
|
|
def api_stream():
|
|
def ev():
|
|
q = queue.Queue()
|
|
with sse_lock: sse_subscribers.append(q)
|
|
try:
|
|
yield "data: "+json.dumps(fetch_state())+"\n\n"
|
|
while True:
|
|
try: msg = q.get(timeout=3); yield "data: "+msg+"\n\n"
|
|
except queue.Empty: yield "data: "+json.dumps(fetch_state())+"\n\n"
|
|
except GeneratorExit: pass
|
|
finally:
|
|
with sse_lock:
|
|
if q in sse_subscribers: sse_subscribers.remove(q)
|
|
return Response(stream_with_context(ev()), mimetype="text/event-stream", headers={"Cache-Control":"no-cache","X-Accel-Buffering":"no","Access-Control-Allow-Origin":"*"})
|
|
|
|
@app.route("/health")
|
|
def health(): return {"status":"healthy","service":"harness-dashboard"}
|
|
|
|
if __name__ == "__main__":
|
|
app.run(host="0.0.0.0", port=3000, debug=False)
|