fix: throughput panel handles streaming-only models gracefully

- Dashboard: when a model has zero non-streaming records, shows
  "streaming only" instead of misleading 0 tok/s
- Dashboard: minimum bar width enforced (6% avg, 4% p50) so
  low-tps models are always visible
- Router: removed inflated streaming tps estimate (prompt tokens
  skewed results for long conversations)

Fixes Dense model appearing to "register nothing" when Mumuni
sends mostly streaming requests.
This commit is contained in:
Abiba
2026-05-25 19:45:21 +00:00
parent 8c5c922a4e
commit b2ec4b0572
+6 -2
View File
@@ -221,9 +221,13 @@ $('perf-latency').innerHTML=latHTML;
var maxTps=Math.max(...models.map(function(m){return m.throughput.avg_tokens_per_sec||0}),1); var maxTps=Math.max(...models.map(function(m){return m.throughput.avg_tokens_per_sec||0}),1);
var tpsHTML=models.map(function(m){ var tpsHTML=models.map(function(m){
var t=m.throughput||{},avg=t.avg_tokens_per_sec||0,p50=t.p50||0,c=mcol[m.model]||'#38bdf8'; var t=m.throughput||{},avg=t.avg_tokens_per_sec||0,p50=t.p50||0,c=mcol[m.model]||'#38bdf8';
var isAllStreaming = avg===0 && p50===0;
if(isAllStreaming){
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:#64748b;font-style:italic">streaming only</span></div><div class="text-secondary" style="font-size:10px">t/s available for non-streaming requests only</div></div>';
}
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:'+c+'" class="fw-bold">'+avg+' tok/s</span></div>'+ return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:'+c+'" class="fw-bold">'+avg+' tok/s</span></div>'+
'<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(avg/maxTps*100)+'%;background:'+c+';border-radius:3px"></div></div></div>'+ '<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(Math.max(avg/maxTps*100,6))+'%;background:'+c+';border-radius:3px"></div></div><span class="small" style="color:'+c+';min-width:54px;text-align:right">'+avg+' tok/s</span></div>'+
'<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(p50/maxTps*100)+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>'; '<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(Math.max(p50/maxTps*100,4))+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>';
}).join(''); }).join('');
$('perf-throughput').innerHTML=tpsHTML; $('perf-throughput').innerHTML=tpsHTML;
// Routing reasons table // Routing reasons table