diff --git a/router/router.py b/router/router.py
index c3674fb..238d7ff 100644
--- a/router/router.py
+++ b/router/router.py
@@ -160,27 +160,45 @@ def route(rd, tier):
         if hints.get("priority")=="quality" and "qwen3.6-27B-code" in avail:
             return select_best_gpu(["qwen3.6-27B-code"], "hint_quality") or {"model":"qwen3.6-27B-code","reason":"hint_quality"}
     
-    # Heavy -> dense (but fall back to MoE if dense is busy)
-    if t > 4000 or sys or turns > 6:
-        candidates = ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"]
-        candidates = [m for m in candidates if m in avail]
+    first_msg = msgs[0].get("content","") if msgs else ""
+    words = len(first_msg.split()) if isinstance(first_msg, str) else 99
+    
+    # TIER 1: Lightweight — single-turn short queries → VLM first
+    if not sys and turns <= 1 and words <= 100 and "qwen3.5-9b-vlm" in avail:
+        if not is_gpu_busy("qwen3.5-9b-vlm"):
+            return {"model":"qwen3.5-9b-vlm","reason":"lightweight"}
+        # VLM busy — fall back to Dense, then MoE
+        fallback = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B"] if m in avail]
+        result = select_best_gpu(fallback, "lightweight_fallback")
+        if result: return result
+    
+    # TIER 2: Simple conversations — low token, short context → VLM preferred
+    if not sys and t <= 1000 and turns <= 4 and "qwen3.5-9b-vlm" in avail:
+        if not is_gpu_busy("qwen3.5-9b-vlm"):
+            return {"model":"qwen3.5-9b-vlm","reason":"simple_conv"}
+        # VLM busy — try Dense
+        if "qwen3.6-27B-code" in avail and not is_gpu_busy("qwen3.6-27B-code"):
+            return {"model":"qwen3.6-27B-code","reason":"simple_conv_fallback"}
+    
+    # TIER 3: Heavy reasoning — large context, system prompts, long conversations
+    if t > 4000 or sys or turns > 8:
+        candidates = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"] if m in avail]
         result = select_best_gpu(candidates, "heavy_reasoning")
         if result: return result
     
-    # Ultra-light -> VLM
-    first_msg = msgs[0].get("content","") if msgs else ""
-    words = len(first_msg.split()) if isinstance(first_msg, str) else 99
-    if words <= 3 and turns <= 1 and not sys and "qwen3.5-9b-vlm" in avail:
-        if not is_gpu_busy("qwen3.5-9b-vlm"):
-            return {"model":"qwen3.5-9b-vlm","reason":"ultra_light"}
+    # TIER 4: Default — Dense preferred for medium tasks, MoE as workhorse, VLM as overflow
+    if turns <= 6 and t <= 4000:
+        # Medium complexity — try Dense first, then MoE, then VLM
+        candidates = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"] if m in avail]
+        result = select_best_gpu(candidates, "medium_task")
+        if result: return result
     
-    # Default: MoE, fall back to dense if MoE is busy
-    if "qwen3.6-35B-A3B" in avail:
-        if is_gpu_busy("qwen3.6-35B-A3B") and "qwen3.6-27B-code" in avail:
-            return {"model": "qwen3.6-27B-code", "reason": "load_balanced_default"}
+    # Fallback — best available
+    if "qwen3.6-35B-A3B" in avail and not is_gpu_busy("qwen3.6-35B-A3B"):
         return {"model":"qwen3.6-35B-A3B","reason":"default_moe"}
-    
-    return {"model":avail[0],"reason":"fallback"}
+    result = select_best_gpu([m for m in avail], "fallback")
+    if result: return result
+    return {"model":avail[0],"reason":"last_resort"}
 
 def clean_unicode(text):
     if not isinstance(text, str): return text