From b7882b2434f2d558acf3fc3906772920089fa3aa Mon Sep 17 00:00:00 2001 From: Abiba Date: Mon, 25 May 2026 00:31:40 +0000 Subject: [PATCH] fix: reduce 27B Dense context to 192K to free VRAM RTX 3090 was at 94.9% VRAM at 262K context. Reduced to 192K (196608), freeing ~2.4GB. VRAM now at 85% with room for active inference. --- router/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/router.py b/router/router.py index 6e9cfb8..c1a239e 100644 --- a/router/router.py +++ b/router/router.py @@ -27,7 +27,7 @@ GPU_MAX_CONCURRENT = { # Context window sizes (tokens) — used for compaction signals GPU_CONTEXT = { "qwen3.6-35B-A3B": 262144, - "qwen3.6-27B-code": 262144, + "qwen3.6-27B-code": 196608, "qwen3.5-9b-vlm": 262144, }