May 19, 2026: Full harness update

- Model migration: gemma-4-E4B → qwen3.5-9b-vlm
- Dashboard reorder: Usage Over Time + GPU Metrics to top
- Router counter leak fix (gpu_decr in except handler)
- VLM slot upgrade 1→2
- Redis stale key cleanup
- Automated maintenance cron job
- LiteLLM config update
- GPU router config update
- README update
This commit is contained in:
Abiba
2026-05-19 15:03:34 +00:00
parent 4f032b035c
commit 9c31b5d622
7 changed files with 43 additions and 46 deletions
+17 -17
View File
@@ -80,17 +80,7 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
</div>
<div class="row g-3 align-items-stretch">
<!-- ROW 1: 3 GPU Cards -->
<div class="col-md-4"><div class="gpu-card" id="gpu-moe"><div class="text-secondary small">Loading...</div></div></div>
<div class="col-md-4"><div class="gpu-card" id="gpu-dense"><div class="text-secondary small">Loading...</div></div></div>
<div class="col-md-4"><div class="gpu-card" id="gpu-light"><div class="text-secondary small">Loading...</div></div></div>
<!-- ROW 2: Queue + Model + Agent -->
<div class="col-md-4"><div class="chart-card"><div class="title">Queue Status</div><div class="text-center" id="queue-viz"></div></div></div>
<div class="col-md-4"><div class="chart-card"><div class="title">Model Distribution</div><div id="route-bars"></div></div></div>
<div class="col-md-4"><div class="chart-card"><div class="title">Agent Activity</div><div id="agent-bars"></div></div></div>
<!-- ROW 3: Usage Chart (8) + GPU Metrics (4) -->
<!-- ROW 1: Usage Chart (8) + GPU Metrics (4) -->
<div class="col-md-8"><div class="chart-card"><div class="title d-flex justify-content-between align-items-center">
<span>Usage Over Time</span>
<div class="d-flex gap-1">
@@ -101,6 +91,16 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
</div><div id="timeseries-chart" style="height:150px"></div><div id="timeseries-legend" class="d-flex justify-content-center gap-3 mt-2 flex-wrap small"></div></div></div>
<div class="col-md-4"><div class="chart-card"><div class="title">GPU Metrics</div><div id="gpu-metrics-card"></div></div></div>
<!-- ROW 2: 3 GPU Cards -->
<div class="col-md-4"><div class="gpu-card" id="gpu-moe"><div class="text-secondary small">Loading...</div></div></div>
<div class="col-md-4"><div class="gpu-card" id="gpu-dense"><div class="text-secondary small">Loading...</div></div></div>
<div class="col-md-4"><div class="gpu-card" id="gpu-light"><div class="text-secondary small">Loading...</div></div></div>
<!-- ROW 3: Queue + Model + Agent -->
<div class="col-md-4"><div class="chart-card"><div class="title">Queue Status</div><div class="text-center" id="queue-viz"></div></div></div>
<div class="col-md-4"><div class="chart-card"><div class="title">Model Distribution</div><div id="route-bars"></div></div></div>
<div class="col-md-4"><div class="chart-card"><div class="title">Agent Activity</div><div id="agent-bars"></div></div></div>
<!-- ROW 4: Live Stream -->
<div class="col-12"><div class="chart-card"><div class="title">Live Stream</div>
<div class="table-responsive"><table class="table table-custom mb-0">
@@ -111,9 +111,9 @@ body { background: #0b0f17; color: #bcc3cd; font-family: -apple-system, BlinkMac
</div>
<script>
var MC={'gemma-4-E4B':'#22c55e','qwen3.6-27B-code':'#f59e0b','qwen3.6-35B-A3B':'#a78bfa'};
var ML={'gemma-4-E4B':'Gemma 4B','qwen3.6-27B-code':'Qwen Code','qwen3.6-35B-A3B':'Qwen MoE'};
var GL={'qwen3.6-35B-A3B':'MoE - Strix Halo','qwen3.6-27B-code':'Dense - RTX 3090','gemma-4-E4B':'Light - RTX 5070'};
var MC={'qwen3.5-9b-vlm':'#22c55e','qwen3.6-27B-code':'#f59e0b','qwen3.6-35B-A3B':'#a78bfa'};
var ML={'qwen3.5-9b-vlm':'Qwen3.5 9B VLM','qwen3.6-27B-code':'Qwen Code','qwen3.6-35B-A3B':'Qwen MoE'};
var GL={'qwen3.6-35B-A3B':'MoE - Strix Halo','qwen3.6-27B-code':'Dense - RTX 3090','qwen3.5-9b-vlm':'VLM - RTX 5070'};
function $(id){return document.getElementById(id);}
function render(data){
@@ -122,7 +122,7 @@ var t=Object.values(data.route_counts||{}).reduce((a,b)=>a+b,0);
var ta=0,tm=0;data.gpus.forEach(function(g){ta+=(g.active_requests||0);tm+=(g.max_concurrent||1)});
$('kpi-total').textContent=t;$('kpi-active').textContent=ta+'/'+tm;$('kpi-agents').textContent=Object.keys(data.agent_counts||{}).length;
$('update-time').textContent=new Date().toLocaleTimeString();
var ids={'qwen3.6-35B-A3B':'gpu-moe','qwen3.6-27B-code':'gpu-dense','gemma-4-E4B':'gpu-light'};
var ids={'qwen3.6-35B-A3B':'gpu-moe','qwen3.6-27B-code':'gpu-dense','qwen3.5-9b-vlm':'gpu-light'};
data.gpus.forEach(function(g){
var el=$(ids[g.id]);if(!el)return;
var a=g.active_requests||0,mx=g.max_concurrent||1;
@@ -154,14 +154,14 @@ var sc=pct>=100?'#ef4444':pct>=50?'#f59e0b':'#22c55e';
var circ=188.5,dash=(pct/100)*circ;
var h='<div class=\"d-inline-block position-relative mb-2\"><svg width=\"72\" height=\"72\"><circle cx=\"36\" cy=\"36\" r=\"30\" fill=\"none\" stroke=\"#1e293b\" stroke-width=\"6\"/><circle cx=\"36\" cy=\"36\" r=\"30\" fill=\"none\" stroke=\"'+sc+'\" stroke-width=\"6\" stroke-dasharray=\"'+dash+' '+(circ-dash)+'\" stroke-linecap=\"round\" transform=\"rotate(-90 36 36)\"/></svg><div style=\"position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);text-align:center\"><div class=\"ring-label\" style=\"color:'+sc+'\">'+ta+'</div><div class=\"ring-sublabel\">/ '+tm+' slots</div></div></div>';
h+='<div class=\"fw-bold mb-2 small\" style=\"color:'+sc+'\">'+st+'</div>';
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','gemma-4-E4B':'Gemma'};
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','qwen3.5-9b-vlm':'VLM'};
data.gpus.forEach(function(g){var a=g.active_requests||0,mx=g.max_concurrent||1,gp=mx>0?Math.round(a/mx*100):0;h+='<div class=\"d-flex align-items-center gap-2 mb-1 justify-content-center\"><span class=\"small\" style=\"min-width:32px;text-align:right;font-size:10px\">'+(lb[g.id]||g.id)+'</span><div style=\"flex:1;max-width:70px;height:3px;background:#1e293b;border-radius:2px;overflow:hidden\"><div style=\"height:100%;width:'+gp+'%;background:'+sc+';border-radius:2px\"></div></div><span class=\"small\" style=\"min-width:22px;font-size:10px\">'+a+'/'+mx+'</span></div>'});
el.innerHTML=h;
}
function renderGPUMetrics(data){
var el=$('gpu-metrics-card');if(!el)return;
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','gemma-4-E4B':'Gemma'};
var lb={'qwen3.6-35B-A3B':'MoE','qwen3.6-27B-code':'Dense','qwen3.5-9b-vlm':'VLM'};
var h='';data.gpus.forEach(function(g){
var nm=lb[g.id]||g.id,tp=g.temp_c||0,ut=g.gpu_util_pct||0,pw=g.power_w||0,pl=g.power_limit_w||0;
var tc=tp>85?'#ef4444':tp>70?'#f59e0b':'#22c55e',uc=ut>90?'#ef4444':ut>70?'#f59e0b':'#22c55e';