fix: throughput panel handles streaming-only models gracefully
- Dashboard: when a model has zero non-streaming records, shows "streaming only" instead of misleading 0 tok/s - Dashboard: minimum bar width enforced (6% avg, 4% p50) so low-tps models are always visible - Router: removed inflated streaming tps estimate (prompt tokens skewed results for long conversations) Fixes Dense model appearing to "register nothing" when Mumuni sends mostly streaming requests.
This commit is contained in:
@@ -221,9 +221,13 @@ $('perf-latency').innerHTML=latHTML;
|
||||
var maxTps=Math.max(...models.map(function(m){return m.throughput.avg_tokens_per_sec||0}),1);
|
||||
var tpsHTML=models.map(function(m){
|
||||
var t=m.throughput||{},avg=t.avg_tokens_per_sec||0,p50=t.p50||0,c=mcol[m.model]||'#38bdf8';
|
||||
var isAllStreaming = avg===0 && p50===0;
|
||||
if(isAllStreaming){
|
||||
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:#64748b;font-style:italic">streaming only</span></div><div class="text-secondary" style="font-size:10px">t/s available for non-streaming requests only</div></div>';
|
||||
}
|
||||
return'<div class="mb-2" style="font-size:11px"><div class="d-flex justify-content-between mb-1"><span style="color:#e2e8f0">'+mlab[m.model]+'</span><span style="color:'+c+'" class="fw-bold">'+avg+' tok/s</span></div>'+
|
||||
'<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(avg/maxTps*100)+'%;background:'+c+';border-radius:3px"></div></div></div>'+
|
||||
'<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(p50/maxTps*100)+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>';
|
||||
'<div class="d-flex align-items-center gap-2"><span class="text-secondary" style="min-width:28px">avg</span><div class="flex-grow-1" style="height:6px;background:#1e293b;border-radius:3px;overflow:hidden"><div style="height:100%;width:'+(Math.max(avg/maxTps*100,6))+'%;background:'+c+';border-radius:3px"></div></div><span class="small" style="color:'+c+';min-width:54px;text-align:right">'+avg+' tok/s</span></div>'+
|
||||
'<div class="d-flex align-items-center gap-2 mt-1"><span class="text-secondary" style="min-width:28px;font-size:10px">p50</span><div class="flex-grow-1" style="height:4px;background:#1e293b;border-radius:2px;overflow:hidden"><div style="height:100%;width:'+(Math.max(p50/maxTps*100,4))+'%;background:'+c+';opacity:0.5;border-radius:2px"></div></div><span style="font-size:10px;color:#64748b">'+p50+' tok/s</span></div></div>';
|
||||
}).join('');
|
||||
$('perf-throughput').innerHTML=tpsHTML;
|
||||
// Routing reasons table
|
||||
|
||||
Reference in New Issue
Block a user