Initial commit: CT 116 inference harness — nginx, LiteLLM, router, dashboard, Redis

- Complexity-based routing (MoE default, Dense heavy, Gemma light)
- Per-agent API keys with metrics tracking
- Time-series usage graphs (24h/7d/30d)
- Streaming support (SSE passthrough)
- Unicode cleanup (ASCII-only output)
- Vision support (gemma-4-E4B)
- Tier enforcement (starter/professional/enterprise)
- GPU health monitoring via sidecar polling
- Unified dashboard with line graph
This commit is contained in:
Abiba (pi)
2026-05-16 18:51:50 +00:00
commit 7b6c6aabe1
11 changed files with 749 additions and 0 deletions
+290
View File
@@ -0,0 +1,290 @@
"""Harness Dashboard."""
import os, json, time, queue, threading
import requests
from flask import Flask, request, render_template_string, Response, stream_with_context
ROUTER_METRICS = os.environ.get("ROUTER_METRICS_URL", "http://router:9000/metrics")
app = Flask(__name__)
sse_subscribers = []
sse_lock = threading.Lock()
def fetch_state():
try:
r = requests.get(ROUTER_METRICS, timeout=5)
if r.status_code == 200: return r.json()
except Exception: pass
return {"gpus":[],"route_counts":{},"agent_counts":{},"recent":[],"timestamp":time.time()}
def broadcast_loop():
while True:
time.sleep(3)
data = fetch_state()
payload = json.dumps(data)
with sse_lock:
dead = []
for q in sse_subscribers:
try: q.put(payload)
except Exception: dead.append(q)
for q in dead: sse_subscribers.remove(q)
threading.Thread(target=broadcast_loop, daemon=True).start()
DASHBOARD_HTML = r"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Inference Harness - Syslog Solution LLC</title>
<style>
:root {
--bg: #0a0e14; --card: #131820; --border: #1e2a3a; --text: #c9d1d9;
--dim: #5c6670; --accent: #39bae6; --green: #7fd962; --yellow: #ffb454;
--red: #f26d78; --blue: #59c2ff; --purple: #d2a6ff;
}
* { margin:0; padding:0; box-sizing:border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Segoe UI', system-ui, sans-serif;
background: var(--bg); color: var(--text); min-height: 100vh;
padding: clamp(12px, 3vw, 32px);
}
.header {
display: flex; align-items: center; justify-content: space-between;
flex-wrap: wrap; gap: 12px; margin-bottom: 24px;
}
.header h1 { font-size: clamp(18px, 4vw, 26px); font-weight: 700; color: #fff; }
.header h1 span { color: var(--accent); }
.status-bar { display: flex; gap: 16px; align-items: center; flex-wrap: wrap; font-size: 13px; color: var(--dim); }
.status-dot { width: 8px; height: 8px; border-radius: 50%; display: inline-block; }
.status-dot.live { background: var(--green); animation: pulse 2s infinite; }
@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.3} }
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(min(100%, 340px), 1fr)); gap: 16px; }
.card {
background: var(--card); border: 1px solid var(--border);
border-radius: 12px; padding: clamp(12px, 3vw, 20px);
}
.card-title {
font-size: 13px; font-weight: 600; text-transform: uppercase;
letter-spacing: 0.5px; color: var(--dim); margin-bottom: 14px;
}
.gpu-row {
display: flex; align-items: center; gap: 14px; padding: 10px 0;
border-bottom: 1px solid rgba(255,255,255,0.04);
}
.gpu-row:last-child { border-bottom: none; }
.gpu-icon {
width: 40px; height: 40px; border-radius: 10px; display: flex;
align-items: center; justify-content: center; font-size: 18px; flex-shrink: 0;
}
.gpu-icon.green { background: rgba(127,217,98,0.12); color: var(--green); }
.gpu-icon.yellow { background: rgba(255,180,84,0.12); color: var(--yellow); }
.gpu-icon.red { background: rgba(242,109,120,0.12); color: var(--red); }
.gpu-info { flex:1; min-width: 0; }
.gpu-name { font-size: 14px; font-weight: 600; color: #e6edf3; }
.gpu-metrics { display: flex; gap: 20px; flex-wrap: wrap; margin-top: 6px; }
.gpu-metric { font-size: 12px; }
.gpu-metric .label { color: var(--dim); }
.gpu-metric .value { color: #e6edf3; font-weight: 500; font-variant-numeric: tabular-nums; }
.vram-bar { width: 100%; height: 4px; background: rgba(255,255,255,0.06); border-radius: 2px; margin-top: 6px; overflow: hidden; }
.vram-fill { height: 100%; border-radius: 2px; transition: width 0.6s ease; }
.vram-fill.green { background: var(--green); }
.vram-fill.yellow { background: var(--yellow); }
.vram-fill.red { background: var(--red); }
.bar-row { margin-bottom: 10px; }
.bar-label { display: flex; justify-content: space-between; font-size: 12px; margin-bottom: 4px; }
.bar-label .name { color: #e6edf3; }
.bar-label .count { color: var(--dim); font-variant-numeric: tabular-nums; }
.bar-track { height: 6px; background: rgba(255,255,255,0.06); border-radius: 3px; overflow: hidden; }
.bar-fill { height: 100%; border-radius: 3px; transition: width 0.6s ease; }
.route-table { width: 100%; font-size: 12px; border-collapse: collapse; }
.route-table th, .route-table td { text-align: left; padding: 6px 10px; }
.route-table th { color: var(--dim); font-weight: 500; font-size: 11px; text-transform: uppercase; letter-spacing: 0.3px; border-bottom: 1px solid var(--border); }
.route-table td { border-bottom: 1px solid rgba(255,255,255,0.03); color: #b0b8c4; }
.agent-tag { display: inline-block; padding: 1px 7px; border-radius: 10px; font-size: 11px; font-weight: 600; }
.agent-abiba { background: rgba(57,186,230,0.15); color: var(--accent); }
.agent-mumuni { background: rgba(210,166,255,0.15); color: var(--purple); }
.agent-tanko { background: rgba(255,180,84,0.15); color: var(--yellow); }
.agent-koby { background: rgba(89,194,255,0.15); color: var(--blue); }
.agent-kagenz0 { background: rgba(127,217,98,0.15); color: var(--green); }
.agent-unknown { background: rgba(255,255,255,0.06); color: var(--dim); }
.agent-admin { background: rgba(255,255,255,0.08); color: #e6edf3; }
.full { grid-column: 1 / -1; }
.period-btn {
background: var(--card); border: 1px solid var(--border); color: var(--dim);
padding: 4px 12px; border-radius: 6px; font-size: 12px; cursor: pointer;
font-family: inherit; transition: all 0.2s;
}
.period-btn.active { background: var(--accent); color: #000; border-color: var(--accent); }
.period-btn:hover { border-color: var(--accent); color: #e6edf3; }
@media (max-width: 600px) {
.gpu-metrics { gap: 10px; }
.route-table { font-size: 11px; }
.route-table th, .route-table td { padding: 4px 6px; }
}
</style>
</head>
<body>
<div class="header">
<h1><span>&#x26A1;</span> Inference Harness</h1>
<div class="status-bar">
<span class="status-dot" id="live-dot"></span>
<span id="connection-status">connecting...</span>
<span id="update-time"></span>
<span id="total-requests">0 requests</span>
</div>
</div>
<div class="grid">
<div class="card full">
<div class="card-title">GPU Health</div>
<div id="gpu-container">Loading...</div>
</div>
<div class="card">
<div class="card-title">Model Distribution</div>
<div id="route-bars">-</div>
</div>
<div class="card" style="grid-column: span 2">
<div class="card-title" style="display:flex;justify-content:space-between;align-items:center;flex-wrap:wrap;gap:4px">
<span>Usage Over Time</span>
<div style="display:flex;gap:4px">
<button class="period-btn active" onclick="switchPeriod('day')">24h</button>
<button class="period-btn" onclick="switchPeriod('week')">7d</button>
<button class="period-btn" onclick="switchPeriod('month')">30d</button>
</div>
</div>
<div id="timeseries-chart" style="height:140px;position:relative;overflow:hidden">
<div style="color:var(--dim);font-size:13px;padding:50px 0;text-align:center">Loading...</div>
</div>
<div id="timeseries-legend" style="display:flex;gap:16px;justify-content:center;margin-top:8px;flex-wrap:wrap"></div>
</div>
<div class="card">
<div class="card-title">Agent Activity</div>
<div id="agent-bars">-</div>
</div>
<div class="card full">
<div class="card-title">Live Request Stream</div>
<div style="overflow-x:auto">
<table class="route-table">
<thead><tr><th>Time</th><th>Agent</th><th>Model</th><th>Reason</th><th>Tier</th></tr></thead>
<tbody id="route-tbody"><tr><td colspan="5">Waiting for data...</td></tr></tbody>
</table>
</div>
</div>
</div>
<script>
const MODEL_COLORS = {'gemma-4-E4B':'#7fd962','qwen3.6-27B-code':'#ffb454','qwen3.6-35B-A3B':'#d2a6ff'};
const MODEL_LABELS = {'gemma-4-E4B':'Gemma 4B','qwen3.6-27B-code':'Qwen Code 27B','qwen3.6-35B-A3B':'Qwen MoE 35B'};
const GPU_LABELS = {'NVIDIA GeForce RTX 5070':'RTX 5070 - Gemma 4B','NVIDIA GeForce RTX 3090':'RTX 3090 - Qwen Code 27B','AMD Radeon (Strix Halo)':'Strix Halo - Qwen MoE 35B'};
function statusIcon(status) {
if (status === 'healthy') return '<span class="gpu-icon green">&#x25CF;</span>';
if (status === 'saturated') return '<span class="gpu-icon yellow">&#x25C9;</span>';
return '<span class="gpu-icon red">&#x25CB;</span>';
}
function vramClass(pct) { if(pct>90)return'red';if(pct>75)return'yellow';return'green'; }
function render(data) {
if(!data||!data.gpus)return;
const total = Object.values(data.route_counts||{}).reduce((a,b)=>a+b,0);
document.getElementById('total-requests').textContent = total + ' requests';
document.getElementById('update-time').textContent = new Date().toLocaleTimeString();
const gpus = data.gpus||[];
document.getElementById('gpu-container').innerHTML = gpus.map(g => '<div class="gpu-row">'+statusIcon(g.status)+'<div class="gpu-info"><div class="gpu-name">'+(GPU_LABELS[g.gpu_name]||g.gpu_name||g.id||'?')+'</div><div class="gpu-metrics"><div class="gpu-metric"><span class="label">VRAM</span> <span class="value">'+(g.vram_used_mb||'?')+'/'+(g.vram_total_mb||'?')+' MB</span></div><div class="gpu-metric"><span class="label">Temp</span> <span class="value">'+(g.temp_c||'?')+'C</span></div><div class="gpu-metric"><span class="label">Util</span> <span class="value">'+(g.gpu_util_pct||0)+'%</span></div>'+(g.power_w!=null?'<div class="gpu-metric"><span class="label">Power</span> <span class="value">'+g.power_w+'W</span></div>':'')+'</div><div class="vram-bar"><div class="vram-fill '+vramClass(g.vram_pct||0)+'" style="width:'+(g.vram_pct||0)+'%"></div></div></div><div style="font-size:24px;font-weight:700;color:'+(vramClass(g.vram_pct||0)==='red'?'var(--red)':vramClass(g.vram_pct||0)==='yellow'?'var(--yellow)':'var(--green)')+';min-width:50px;text-align:right">'+(g.vram_pct||0)+'%</div></div>').join('');
const rc = data.route_counts||{};
const maxR = Math.max(1,...Object.values(rc));
document.getElementById('route-bars').innerHTML = Object.entries(rc).length ? Object.entries(rc).sort((a,b)=>b[1]-a[1]).map(([m,c])=>'<div class="bar-row"><div class="bar-label"><span class="name">'+(MODEL_LABELS[m]||m)+'</span><span class="count">'+c+' ('+(total?Math.round(c/total*100):0)+'%)</span></div><div class="bar-track"><div class="bar-fill" style="width:'+(c/maxR*100)+'%;background:'+(MODEL_COLORS[m]||'#39bae6')+'"></div></div></div>').join('') : '<div style="color:var(--dim);font-size:13px">No data yet</div>';
const ac = data.agent_counts||{};
const maxA = Math.max(1,...Object.values(ac));
document.getElementById('agent-bars').innerHTML = Object.entries(ac).length ? Object.entries(ac).sort((a,b)=>b[1]-a[1]).map(([a,c])=>'<div class="bar-row"><div class="bar-label"><span class="name agent-'+a.toLowerCase().replace(/[^a-z]/g,'')+'">'+a+'</span><span class="count">'+c+' reqs</span></div><div class="bar-track"><div class="bar-fill" style="width:'+(c/maxA*100)+'%;background:var(--accent)"></div></div></div>').join('') : '<div style="color:var(--dim);font-size:13px">No agent activity yet</div>';
const recent = data.recent||[];
document.getElementById('route-tbody').innerHTML = recent.length ? recent.slice(0,25).map(r=>{const d=new Date(r.ts*1000);const a=r.agent||'?';const cl='agent-'+a.toLowerCase().replace(/[^a-z0-9]/g,'');return'<tr><td style="color:var(--dim);font-size:11px">'+d.toLocaleTimeString()+'</td><td><span class="agent-tag '+cl+'">'+a+'</span></td><td>'+(MODEL_LABELS[r.model]||r.model)+'</td><td style="color:var(--dim);font-size:11px">'+(r.reason||'')+'</td><td style="font-size:11px;text-transform:uppercase;color:'+(r.tier==='enterprise'?'var(--purple)':r.tier==='professional'?'var(--blue)':'var(--dim)')+'">'+(r.tier||'')+'</td></tr>';}).join('') : '<tr><td colspan="5" style="color:var(--dim)">Waiting for requests...</td></tr>';
}
let currentPeriod = 'day';
async function switchPeriod(p) {
currentPeriod = p;
document.querySelectorAll('.period-btn').forEach(b => b.classList.remove('active'));
document.querySelectorAll('.period-btn').forEach(b => { if(b.textContent.trim().startsWith(p==='day'?'24h':p==='week'?'7d':'30d')) b.classList.add('active'); });
await loadTimeseries();
}
async function loadTimeseries() {
try { const r = await fetch('/api/timeseries?period='+currentPeriod); renderTimeseries(await r.json()); } catch(e) {}
}
function renderTimeseries(d) {
const models = d.models||{}, labels = d.labels||[];
if(!labels.length)return;
const container = document.getElementById('timeseries-chart');
const legend = document.getElementById('timeseries-legend');
const modelNames = Object.keys(models);
if(!modelNames.length){container.innerHTML='<div style="color:var(--dim);font-size:13px;padding:50px 0;text-align:center">No data yet</div>';return;}
const colors = {'gemma-4-E4B':'#7fd962','qwen3.6-27B-code':'#ffb454','qwen3.6-35B-A3B':'#d2a6ff'};
const shortNames = {'gemma-4-E4B':'Gemma','qwen3.6-27B-code':'Qwen Code','qwen3.6-35B-A3B':'Qwen MoE'};
let maxVal = 1;
for(const m in models) for(const v of models[m]) if(v>maxVal) maxVal=v;
maxVal = Math.ceil(maxVal*1.15)||1;
const W = labels.length>1?100/(labels.length-1):100, H=130;
let paths='';
for(const m of modelNames){const vals=models[m]||[];let d='';for(let i=0;i<vals.length;i++){const x=i*W,y=H-(vals[i]/maxVal)*H;d+=(i===0?'M':'L')+x.toFixed(1)+','+y.toFixed(1)+' ';}paths+='<path d="'+d+'" fill="none" stroke="'+(colors[m]||'#39bae6')+'" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" opacity="0.85"/>';}
let grid='';
for(let g=0;g<=4;g++){const y=(g/4)*H;grid+='<line x1="0" y1="'+y.toFixed(1)+'" x2="100" y2="'+y.toFixed(1)+'" stroke="rgba(255,255,255,0.05)" stroke-width="1"/>';}
const svg='<svg viewBox="0 0 100 '+(H+16)+'" style="width:100%;height:'+(H+20)+'px;display:block" preserveAspectRatio="none">'+grid+paths+'</svg>';
const step=Math.max(1,Math.floor(labels.length/8));
let lh='<div style="display:flex;margin-top:2px;font-size:10px;color:var(--dim);overflow:hidden">';
for(let i=0;i<labels.length;i+=step) lh+='<div style="flex:1;text-align:center">'+labels[i]+'</div>';
lh+='</div>';
container.innerHTML=svg+lh;
legend.innerHTML=modelNames.map(m=>'<span style="display:flex;align-items:center;gap:6px;font-size:11px;color:var(--dim)"><svg width="18" height="10"><line x1="0" y1="5" x2="18" y2="5" stroke="'+(colors[m]||'#39bae6')+'" stroke-width="2.5"/></svg>'+shortNames[m]+'</span>').join('');
}
function poll(){fetch('/api/state').then(r=>r.json()).then(data=>{render(data);document.getElementById('connection-status').textContent='live';document.getElementById('live-dot').className='status-dot live';}).catch(()=>{document.getElementById('connection-status').textContent='reconnecting...';document.getElementById('live-dot').className='status-dot';});}
poll();setInterval(poll,3000);loadTimeseries();
</script>
</body>
</html>"""
@app.route("/")
def dashboard():
return render_template_string(DASHBOARD_HTML)
@app.route("/api/state")
def api_state():
return fetch_state()
@app.route("/api/timeseries")
def api_timeseries():
period = request.args.get("period", "day")
try:
r = requests.get("http://router:9000/metrics/timeseries?period=" + period, timeout=5)
if r.status_code == 200: return r.json()
except Exception: pass
return {"models": {}, "labels": []}
@app.route("/api/stream")
def api_stream():
def event_stream():
q = queue.Queue()
with sse_lock: sse_subscribers.append(q)
try:
data = fetch_state()
yield "data: " + json.dumps(data) + "\n\n"
while True:
try: msg = q.get(timeout=3); yield "data: " + msg + "\n\n"
except queue.Empty:
data = fetch_state()
yield "data: " + json.dumps(data) + "\n\n"
except GeneratorExit: pass
finally:
with sse_lock:
if q in sse_subscribers: sse_subscribers.remove(q)
return Response(stream_with_context(event_stream()), mimetype="text/event-stream",
headers={"Cache-Control":"no-cache","X-Accel-Buffering":"no","Access-Control-Allow-Origin":"*"})
@app.route("/health")
def health():
return {"status":"healthy","service":"harness-dashboard"}
if __name__ == "__main__":
app.run(host="0.0.0.0", port=3000, debug=False)