feat: latency vs prompt size scatter plot on dashboard

Router: new /metrics/scatter endpoint returns individual data points
(prompt_tokens, inference_ms, model, agent, reason, stream)
for scatter visualization.

Dashboard: new panel showing latency vs prompt size by model.
- Log-scale X axis (prompt tokens) with model color coding
- Dropdown to filter by individual model or view all
- Hover tooltips with details per point
- Auto-refresh every 30s

Enables direct observation of context-length vs latency
relationship — validates routing tier decisions.
This commit is contained in:
Abiba
2026-05-26 12:18:31 +00:00
parent cfb05fa501
commit f47c3f3304
2 changed files with 92 additions and 3 deletions
+62 -3
View File
@@ -113,7 +113,20 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
<div class="col-md-6"><div class="chart-card"><div class="title">Routing Effectiveness — by Reason</div><div id="perf-reasons"></div></div></div>
<div class="col-md-6"><div class="chart-card"><div class="title">Agent Performance</div><div id="perf-agents"></div></div></div>
<!-- ROW 5: Live Stream -->
<!-- ROW 5: Latency vs Context Scatter -->
<div class="col-12"><div class="chart-card"><div class="title d-flex justify-content-between align-items-center">
<span>Latency vs Prompt Size — by Model</span>
<div class="d-flex gap-2">
<select id="scatter-model" onchange="loadScatter()" style="font-size:10px;background:#1e293b;color:#94a3b8;border:1px solid #334155;border-radius:4px;padding:2px 6px">
<option value="all">All Models</option>
<option value="qwen3.5-9b-vlm">9B VLM</option>
<option value="qwen3.6-27B-code">27B Dense</option>
<option value="qwen3.6-35B-A3B">35B MoE</option>
</select>
</div>
</div><div id="scatter-plot" style="height:200px;position:relative"></div><div id="scatter-legend" class="d-flex justify-content-center gap-3 mt-2 flex-wrap small"></div></div></div>
<!-- ROW 6: Live Stream -->
<div class="col-12"><div class="chart-card"><div class="title">Live Stream</div>
<div class="table-responsive"><table class="table table-custom mb-0">
<thead><tr><th>Time</th><th>Agent</th><th>Model</th><th>Reason</th><th>Tier</th></tr></thead>
@@ -244,7 +257,43 @@ $('perf-agents').innerHTML=aHTML;
}else{$('perf-agents').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
}
function poll(){fetch('/api/state').then(function(r){return r.json()}).then(function(data){render(data);$('connection-status').textContent='live';}).catch(function(){$('connection-status').textContent='reconnecting';});}
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);
function loadScatter(){
var m=$('scatter-model').value;
fetch('/api/scatter?window=24&model='+m).then(function(r){return r.json()}).then(renderScatter).catch(function(){});
}
function renderScatter(d){
var pts=d.points||[],el=$('scatter-plot'),lg=$('scatter-legend');
if(!pts.length){el.innerHTML='<div class="text-secondary small text-center py-5">No data yet</div>';return;}
var mcol={'qwen3.6-35B-A3B':'#a78bfa','qwen3.6-27B-code':'#f59e0b','qwen3.5-9b-vlm':'#22c55e','unknown':'#38bdf8'};
var mlab={'qwen3.6-35B-A3B':'35B MoE','qwen3.6-27B-code':'27B Dense','qwen3.5-9b-vlm':'9B VLM'};
var maxX=Math.max.apply(null,pts.map(function(p){return p.prompt_tokens||0}))||1000;
var maxY=Math.max.apply(null,pts.map(function(p){return p.inference_ms||0}))||5000;
// Log scale for X axis (prompt tokens vary widely)
var toX=function(t){return Math.log10(Math.max(t,1))/Math.log10(Math.max(maxX,10))*100;};
var toY=function(t){return (t/maxY)*100;};
var dots='';
pts.forEach(function(p){
var x=toX(p.prompt_tokens),y=toY(p.inference_ms),c=mcol[p.model]||'#38bdf8';
var r=p.stream?1.5:2.5,o=p.stream?0.4:0.8;
dots+='<circle cx="'+x+'" cy="'+(100-y)+'" r="'+r+'" fill="'+c+'" opacity="'+o+'"><title>'+mlab[p.model]+' | '+p.prompt_tokens+' tok | '+p.inference_ms+'ms | '+p.agent+'</title></circle>';
});
// Grid lines
var grid='';
for(var i=1;i<=4;i++){grid+='<line x1="0" y1="'+(i*20)+'" x2="100" y2="'+(i*20)+'" stroke="#1e293b" stroke-width="0.5"/>';}
for(var i=1;i<=4;i++){grid+='<line x1="'+(i*20)+'" y1="0" x2="'+(i*20)+'" y2="100" stroke="#1e293b" stroke-width="0.5"/>';}
// Axis labels
var xTicks='';
var xVals=[10,100,1000,10000,100000];
xVals.forEach(function(v){if(v<=maxX)xTicks+='<text x="'+toX(v)+'" y="103" text-anchor="middle" font-size="8" fill="#64748b">'+(v>=1000?(v/1000)+'k':v)+'</text>';});
var yTicks='';
var yVals=[500,1000,5000,10000,50000,100000];
yVals.forEach(function(v){if(v<=maxY)yTicks+='<text x="-2" y="'+(97-toY(v))+'" text-anchor="end" font-size="8" fill="#64748b">'+(v>=1000?(v/1000)+'s':v+'ms')+'</text>';});
el.innerHTML='<svg viewBox="-35 0 140 115" style="width:100%;height:200px">'+grid+dots+xTicks+yTicks+'<text x="50" y="112" text-anchor="middle" font-size="9" fill="#475569">Prompt Tokens (log scale)</text><text x="-38" y="50" text-anchor="middle" font-size="9" fill="#475569" transform="rotate(-90,-38,50)">Inference Time</text></svg>';
// Legend
var models=[];pts.forEach(function(p){if(models.indexOf(p.model)===-1)models.push(p.model);});
lg.innerHTML=models.map(function(m){return'<span class="d-flex align-items-center gap-1 small"><svg width="10" height="10"><circle cx="5" cy="5" r="3.5" fill="'+(mcol[m]||'#38bdf8')+'"/></svg>'+mlab[m]+'</span>';}).join('');
}
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);loadScatter();setInterval(loadScatter,30000);
</script>
</body>
</html>"""
@@ -255,7 +304,17 @@ def dashboard(): return render_template_string(DASHBOARD_HTML)
@app.route("/api/state")
def api_state(): return fetch_state()
@app.route("/api/performance")
@app.route("/api/scatter")
def api_scatter():
window = request.args.get("window", "24")
model = request.args.get("model", "all")
try:
r = requests.get(f"http://router:9000/metrics/scatter?window={window}&model={model}", timeout=10)
if r.status_code == 200: return r.json()
except Exception: pass
return {"points": [], "count": 0}
@app.route("/api/timeseries")
def api_performance():
window = request.args.get("window", "24")
model = request.args.get("model", "all")