fix: routing priority — MoE first, VLM second, Dense last
This commit is contained in:
+5
-6
@@ -168,7 +168,7 @@ def route(rd, tier):
|
|||||||
if not is_gpu_busy("qwen3.5-9b-vlm"):
|
if not is_gpu_busy("qwen3.5-9b-vlm"):
|
||||||
return {"model":"qwen3.5-9b-vlm","reason":"lightweight"}
|
return {"model":"qwen3.5-9b-vlm","reason":"lightweight"}
|
||||||
# VLM busy — fall back to Dense, then MoE
|
# VLM busy — fall back to Dense, then MoE
|
||||||
fallback = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B"] if m in avail]
|
fallback = [m for m in ["qwen3.6-35B-A3B","qwen3.6-27B-code"] if m in avail]
|
||||||
result = select_best_gpu(fallback, "lightweight_fallback")
|
result = select_best_gpu(fallback, "lightweight_fallback")
|
||||||
if result: return result
|
if result: return result
|
||||||
|
|
||||||
@@ -182,15 +182,14 @@ def route(rd, tier):
|
|||||||
|
|
||||||
# TIER 3: Heavy reasoning — large context or very long conversations
|
# TIER 3: Heavy reasoning — large context or very long conversations
|
||||||
if t > 4000 or turns > 8:
|
if t > 4000 or turns > 8:
|
||||||
candidates = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"] if m in avail]
|
candidates = [m for m in ["qwen3.6-35B-A3B","qwen3.5-9b-vlm","qwen3.6-27B-code"] if m in avail]
|
||||||
result = select_best_gpu(candidates, "heavy_reasoning")
|
result = select_best_gpu(candidates, "heavy_reasoning")
|
||||||
if result: return result
|
if result: return result
|
||||||
|
|
||||||
# TIER 4: Default — Dense preferred for medium tasks, MoE as workhorse, VLM overflow
|
# TIER 4: Default — MoE first, VLM helps, Dense last (slow)
|
||||||
if t <= 4000:
|
if t <= 4000:
|
||||||
# Catch everything that isn't heavy (≤4000 tokens, any turn count)
|
candidates = [m for m in ["qwen3.6-35B-A3B","qwen3.5-9b-vlm","qwen3.6-27B-code"] if m in avail]
|
||||||
candidates = [m for m in ["qwen3.6-27B-code","qwen3.6-35B-A3B","qwen3.5-9b-vlm"] if m in avail]
|
result = select_best_gpu(candidates, "default")
|
||||||
result = select_best_gpu(candidates, "medium_task")
|
|
||||||
if result: return result
|
if result: return result
|
||||||
|
|
||||||
# Fallback — best available
|
# Fallback — best available
|
||||||
|
|||||||
Reference in New Issue
Block a user