From ddde6646de11853530d0abefd19fc3d7ca2bd226 Mon Sep 17 00:00:00 2001 From: Abiba Date: Sat, 23 May 2026 06:00:37 +0000 Subject: [PATCH] fix: decouple VRAM usage from saturation status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VRAM percentage no longer marks GPU as saturated. Saturation is about slot availability (handled by is_gpu_busy()), not memory usage. Added vram_warning boolean flag (≥95% threshold) for informational monitoring without affecting routing decisions. 27B Dense now correctly shows healthy at 91% VRAM. --- router/router.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/router/router.py b/router/router.py index 12bd57a..6e9cfb8 100644 --- a/router/router.py +++ b/router/router.py @@ -102,7 +102,8 @@ def check_gpu_health(model, sidecar_timeout=5, gpu_timeout=3): if resp.status_code == 200: d = resp.json() pct = (d.get("vram_used_mb",0) / max(d.get("vram_total_mb",1), 1)) * 100 - status = "healthy" if pct < 90 else "saturated" + status = "healthy" # VRAM usage != saturation; busy slots handled by is_gpu_busy() + vram_warning = pct >= 95 # Also check if llama.cpp endpoint is actually responding gpu_url = GPU_URLS.get(model, "") try: @@ -111,7 +112,7 @@ def check_gpu_health(model, sidecar_timeout=5, gpu_timeout=3): status = "down" except Exception: status = "down" - return {"status": status, "vram_used_mb": d.get("vram_used_mb"), "vram_total_mb": d.get("vram_total_mb"), "vram_pct": round(pct,1), "temp_c": d.get("temp_c"), "gpu_util_pct": d.get("gpu_util_pct"), "gpu_name": d.get("gpu_name"), "power_w": d.get("power_w"), "power_limit_w": d.get("power_limit_w")} + return {"status": status, "vram_warning": vram_warning, "vram_used_mb": d.get("vram_used_mb"), "vram_total_mb": d.get("vram_total_mb"), "vram_pct": round(pct,1), "temp_c": d.get("temp_c"), "gpu_util_pct": d.get("gpu_util_pct"), "gpu_name": d.get("gpu_name"), "power_w": d.get("power_w"), "power_limit_w": d.get("power_limit_w")} except Exception: pass return {"status": "down"}