From 060a47fce93f639e5fdca47ee9a22be8b5b94012 Mon Sep 17 00:00:00 2001
From: Abiba <abiba@sysloggh.com>
Date: Sat, 30 May 2026 13:15:19 +0000
Subject: [PATCH] revert: MoE back to 2 slots (cross-agent spread now prevents
 hotspot)

Cross-agent GPU awareness ensures Tanko+Mumuni never
simultaneously hit MoE. Second agent always overflows
to Dense/VLM. MoE can safely use its extra VRAM with
2 slots since distinct agents never pile on.
---
 router/router.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/router/router.py b/router/router.py
index b480405..242a74f 100644
--- a/router/router.py
+++ b/router/router.py
@@ -19,7 +19,7 @@ GPU_URLS = {
 }
 # Max concurrent requests per GPU (based on llama.cpp --parallel)
 GPU_MAX_CONCURRENT = {
-    "qwen3.6-35B-A3B": 1,   # 1 slot (95C thermal emergency)
+    "qwen3.6-35B-A3B": 2,   # 2 slots (cross-agent spread prevents overheating)
     "qwen3.6-27B-code": 2,  # 2 slots (128K context frees VRAM)
     "qwen3.5-9b-vlm": 2,       # 2 slots (12GB VRAM, 4GB headroom)
 }