fix: reduce 27B Dense context to 192K to free VRAM

RTX 3090 was at 94.9% VRAM at 262K context. Reduced to 192K (196608),
freeing ~2.4GB. VRAM now at 85% with room for active inference.
This commit is contained in:
Abiba
2026-05-25 00:31:40 +00:00
parent ddde6646de
commit b7882b2434
+1 -1
View File
@@ -27,7 +27,7 @@ GPU_MAX_CONCURRENT = {
# Context window sizes (tokens) — used for compaction signals # Context window sizes (tokens) — used for compaction signals
GPU_CONTEXT = { GPU_CONTEXT = {
"qwen3.6-35B-A3B": 262144, "qwen3.6-35B-A3B": 262144,
"qwen3.6-27B-code": 262144, "qwen3.6-27B-code": 196608,
"qwen3.5-9b-vlm": 262144, "qwen3.5-9b-vlm": 262144,
} }