diff --git a/router/router.py b/router/router.py index 53d7b64..b480405 100644 --- a/router/router.py +++ b/router/router.py @@ -161,18 +161,26 @@ def is_gpu_busy(model): return active >= max_c def select_best_gpu(candidates, reason, agent=""): - """Pick best GPU, spreading different agents across GPUs when possible.""" - # Track which GPUs this agent is already using - agent_gpus = set() - if agent and r: + """Pick best GPU, spreading agents across GPUs to prevent hotspots.""" + # Count how many distinct agents are on each GPU + gpu_agent_counts = {} + if r: for m in GPU_URLS: - if r.get("agent_gpu:" + agent + ":" + m): - agent_gpus.add(m) - # First pass: prefer GPUs NOT used by this agent + count = 0 + for ak in API_KEYS.values(): + if r.get("agent_gpu:" + ak["agent"] + ":" + m): + count += 1 + gpu_agent_counts[m] = count + # First pass: prefer GPUs with 0 other agents (fresh GPU for this agent) for m in candidates: - if not is_gpu_busy(m) and m not in agent_gpus: + if not is_gpu_busy(m) and gpu_agent_counts.get(m, 0) == 0: return {"model": m, "reason": reason} - # Second pass: any non-busy GPU (agent reuse is ok) + # Second pass: prefer GPU this agent is NOT already on (skip own GPU) + if agent: + for m in candidates: + if not is_gpu_busy(m) and not r.get("agent_gpu:" + agent + ":" + m): + return {"model": m, "reason": reason} + # Third pass: any non-busy GPU for m in candidates: if not is_gpu_busy(m): return {"model": m, "reason": reason}