From f519a3fa604cb0e085071311b5effa4377d6c5ba Mon Sep 17 00:00:00 2001
From: Abiba <abiba@sysloggh.com>
Date: Tue, 19 May 2026 17:19:29 +0000
Subject: [PATCH] =?UTF-8?q?fix:=20routing=20=E2=80=94=20system=20prompts?=
 =?UTF-8?q?=20no=20longer=20force=20heavy=20tier?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

System messages are common in agent conversations but don't indicate
heavy workload. Now only token count (>4000) and turn count (>8) trigger
heavy routing. Simple conversations with system prompts can now route to VLM.
---
 router/router.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/router/router.py b/router/router.py
index 238d7ff..e7f9aa7 100644
--- a/router/router.py
+++ b/router/router.py
@@ -172,16 +172,16 @@ def route(rd, tier):
         result = select_best_gpu(fallback, "lightweight_fallback")
         if result: return result
     
-    # TIER 2: Simple conversations — low token, short context → VLM preferred
-    if not sys and t <= 1000 and turns <= 4 and "qwen3.5-9b-vlm" in avail:
+    # TIER 2: Simple conversations — short context, any prompt → VLM preferred
+    if t <= 1000 and turns <= 4 and "qwen3.5-9b-vlm" in avail:
         if not is_gpu_busy("qwen3.5-9b-vlm"):
             return {"model":"qwen3.5-9b-vlm","reason":"simple_conv"}
         # VLM busy — try Dense
         if "qwen3.6-27B-code" in avail and not is_gpu_busy("qwen3.6-27B-code"):
             return {"model":"qwen3.6-27B-code","reason":"simple_conv_fallback"}
     
-    # TIER 3: Heavy reasoning — large context, system prompts, long conversations
-    if t > 4000 or sys or turns > 8:
+    # TIER 3: Heavy reasoning — large context or very long conversations
+    if t > 4000 or turns > 8:
         candidates = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"] if m in avail]
         result = select_best_gpu(candidates, "heavy_reasoning")
         if result: return result