feat: latency vs prompt size scatter plot on dashboard
Router: new /metrics/scatter endpoint returns individual data points (prompt_tokens, inference_ms, model, agent, reason, stream) for scatter visualization. Dashboard: new panel showing latency vs prompt size by model. - Log-scale X axis (prompt tokens) with model color coding - Dropdown to filter by individual model or view all - Hover tooltips with details per point - Auto-refresh every 30s Enables direct observation of context-length vs latency relationship — validates routing tier decisions.
This commit is contained in:
+62
-3
@@ -113,7 +113,20 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Routing Effectiveness — by Reason</div><div id="perf-reasons"></div></div></div>
|
||||
<div class="col-md-6"><div class="chart-card"><div class="title">Agent Performance</div><div id="perf-agents"></div></div></div>
|
||||
|
||||
<!-- ROW 5: Live Stream -->
|
||||
<!-- ROW 5: Latency vs Context Scatter -->
|
||||
<div class="col-12"><div class="chart-card"><div class="title d-flex justify-content-between align-items-center">
|
||||
<span>Latency vs Prompt Size — by Model</span>
|
||||
<div class="d-flex gap-2">
|
||||
<select id="scatter-model" onchange="loadScatter()" style="font-size:10px;background:#1e293b;color:#94a3b8;border:1px solid #334155;border-radius:4px;padding:2px 6px">
|
||||
<option value="all">All Models</option>
|
||||
<option value="qwen3.5-9b-vlm">9B VLM</option>
|
||||
<option value="qwen3.6-27B-code">27B Dense</option>
|
||||
<option value="qwen3.6-35B-A3B">35B MoE</option>
|
||||
</select>
|
||||
</div>
|
||||
</div><div id="scatter-plot" style="height:200px;position:relative"></div><div id="scatter-legend" class="d-flex justify-content-center gap-3 mt-2 flex-wrap small"></div></div></div>
|
||||
|
||||
<!-- ROW 6: Live Stream -->
|
||||
<div class="col-12"><div class="chart-card"><div class="title">Live Stream</div>
|
||||
<div class="table-responsive"><table class="table table-custom mb-0">
|
||||
<thead><tr><th>Time</th><th>Agent</th><th>Model</th><th>Reason</th><th>Tier</th></tr></thead>
|
||||
@@ -244,7 +257,43 @@ $('perf-agents').innerHTML=aHTML;
|
||||
}else{$('perf-agents').innerHTML='<div class="text-secondary small text-center py-3">-</div>';}
|
||||
}
|
||||
function poll(){fetch('/api/state').then(function(r){return r.json()}).then(function(data){render(data);$('connection-status').textContent='live';}).catch(function(){$('connection-status').textContent='reconnecting';});}
|
||||
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);
|
||||
function loadScatter(){
|
||||
var m=$('scatter-model').value;
|
||||
fetch('/api/scatter?window=24&model='+m).then(function(r){return r.json()}).then(renderScatter).catch(function(){});
|
||||
}
|
||||
function renderScatter(d){
|
||||
var pts=d.points||[],el=$('scatter-plot'),lg=$('scatter-legend');
|
||||
if(!pts.length){el.innerHTML='<div class="text-secondary small text-center py-5">No data yet</div>';return;}
|
||||
var mcol={'qwen3.6-35B-A3B':'#a78bfa','qwen3.6-27B-code':'#f59e0b','qwen3.5-9b-vlm':'#22c55e','unknown':'#38bdf8'};
|
||||
var mlab={'qwen3.6-35B-A3B':'35B MoE','qwen3.6-27B-code':'27B Dense','qwen3.5-9b-vlm':'9B VLM'};
|
||||
var maxX=Math.max.apply(null,pts.map(function(p){return p.prompt_tokens||0}))||1000;
|
||||
var maxY=Math.max.apply(null,pts.map(function(p){return p.inference_ms||0}))||5000;
|
||||
// Log scale for X axis (prompt tokens vary widely)
|
||||
var toX=function(t){return Math.log10(Math.max(t,1))/Math.log10(Math.max(maxX,10))*100;};
|
||||
var toY=function(t){return (t/maxY)*100;};
|
||||
var dots='';
|
||||
pts.forEach(function(p){
|
||||
var x=toX(p.prompt_tokens),y=toY(p.inference_ms),c=mcol[p.model]||'#38bdf8';
|
||||
var r=p.stream?1.5:2.5,o=p.stream?0.4:0.8;
|
||||
dots+='<circle cx="'+x+'" cy="'+(100-y)+'" r="'+r+'" fill="'+c+'" opacity="'+o+'"><title>'+mlab[p.model]+' | '+p.prompt_tokens+' tok | '+p.inference_ms+'ms | '+p.agent+'</title></circle>';
|
||||
});
|
||||
// Grid lines
|
||||
var grid='';
|
||||
for(var i=1;i<=4;i++){grid+='<line x1="0" y1="'+(i*20)+'" x2="100" y2="'+(i*20)+'" stroke="#1e293b" stroke-width="0.5"/>';}
|
||||
for(var i=1;i<=4;i++){grid+='<line x1="'+(i*20)+'" y1="0" x2="'+(i*20)+'" y2="100" stroke="#1e293b" stroke-width="0.5"/>';}
|
||||
// Axis labels
|
||||
var xTicks='';
|
||||
var xVals=[10,100,1000,10000,100000];
|
||||
xVals.forEach(function(v){if(v<=maxX)xTicks+='<text x="'+toX(v)+'" y="103" text-anchor="middle" font-size="8" fill="#64748b">'+(v>=1000?(v/1000)+'k':v)+'</text>';});
|
||||
var yTicks='';
|
||||
var yVals=[500,1000,5000,10000,50000,100000];
|
||||
yVals.forEach(function(v){if(v<=maxY)yTicks+='<text x="-2" y="'+(97-toY(v))+'" text-anchor="end" font-size="8" fill="#64748b">'+(v>=1000?(v/1000)+'s':v+'ms')+'</text>';});
|
||||
el.innerHTML='<svg viewBox="-35 0 140 115" style="width:100%;height:200px">'+grid+dots+xTicks+yTicks+'<text x="50" y="112" text-anchor="middle" font-size="9" fill="#475569">Prompt Tokens (log scale)</text><text x="-38" y="50" text-anchor="middle" font-size="9" fill="#475569" transform="rotate(-90,-38,50)">Inference Time</text></svg>';
|
||||
// Legend
|
||||
var models=[];pts.forEach(function(p){if(models.indexOf(p.model)===-1)models.push(p.model);});
|
||||
lg.innerHTML=models.map(function(m){return'<span class="d-flex align-items-center gap-1 small"><svg width="10" height="10"><circle cx="5" cy="5" r="3.5" fill="'+(mcol[m]||'#38bdf8')+'"/></svg>'+mlab[m]+'</span>';}).join('');
|
||||
}
|
||||
poll();setInterval(poll,3000);loadTS();loadPerf();setInterval(loadPerf,15000);loadScatter();setInterval(loadScatter,30000);
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
@@ -255,7 +304,17 @@ def dashboard(): return render_template_string(DASHBOARD_HTML)
|
||||
@app.route("/api/state")
|
||||
def api_state(): return fetch_state()
|
||||
|
||||
@app.route("/api/performance")
|
||||
@app.route("/api/scatter")
|
||||
def api_scatter():
|
||||
window = request.args.get("window", "24")
|
||||
model = request.args.get("model", "all")
|
||||
try:
|
||||
r = requests.get(f"http://router:9000/metrics/scatter?window={window}&model={model}", timeout=10)
|
||||
if r.status_code == 200: return r.json()
|
||||
except Exception: pass
|
||||
return {"points": [], "count": 0}
|
||||
|
||||
@app.route("/api/timeseries")
|
||||
def api_performance():
|
||||
window = request.args.get("window", "24")
|
||||
model = request.args.get("model", "all")
|
||||
|
||||
Reference in New Issue
Block a user