feat: latency vs prompt size scatter plot on dashboard
Router: new /metrics/scatter endpoint returns individual data points (prompt_tokens, inference_ms, model, agent, reason, stream) for scatter visualization. Dashboard: new panel showing latency vs prompt size by model. - Log-scale X axis (prompt tokens) with model color coding - Dropdown to filter by individual model or view all - Hover tooltips with details per point - Auto-refresh every 30s Enables direct observation of context-length vs latency relationship — validates routing tier decisions.
This commit is contained in:
@@ -537,6 +537,36 @@ def performance():
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
@app.route("/metrics/scatter")
|
||||
def scatter():
|
||||
"""Return individual data points for scatter plots (prompt_tokens vs latency)."""
|
||||
if not r: return jsonify({"error": "Redis unavailable"}), 503
|
||||
try:
|
||||
window_hours = int(request.args.get("window", "24"))
|
||||
model_filter = request.args.get("model", "all")
|
||||
cutoff = time.time() - (window_hours * 3600)
|
||||
raw = r.lrange("perf:recent", 0, -1)
|
||||
points = []
|
||||
for x in raw:
|
||||
try:
|
||||
rec = json.loads(x)
|
||||
if rec["ts"] >= cutoff:
|
||||
if model_filter == "all" or rec["model"] == model_filter:
|
||||
points.append({
|
||||
"model": rec["model"],
|
||||
"agent": rec["agent"],
|
||||
"reason": rec["reason"],
|
||||
"prompt_tokens": int(rec.get("prompt_tokens", 0)),
|
||||
"completion_tokens": rec.get("completion_tokens", 0),
|
||||
"inference_ms": round(rec["inference_ms"], 1),
|
||||
"tokens_per_sec": rec.get("tokens_per_sec", 0),
|
||||
"stream": rec.get("stream", False)
|
||||
})
|
||||
except: pass
|
||||
return jsonify({"points": points, "count": len(points)})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
@app.route("/v1/models")
|
||||
def models():
|
||||
def _h(m): return check_gpu_health(m, sidecar_timeout=1.5, gpu_timeout=1)
|
||||
|
||||
Reference in New Issue
Block a user