fix: cross-agent GPU spreading prevents hotspot hammering
OLD: checked only if CURRENT agent was on a GPU Tanko→MoE, Mumuni also→MoE (didnt see Tanko) NEW: checks if ANY agent is on a GPU (cross-agent awareness) Pass 1: prefer GPUs with 0 agents Pass 2: prefer GPU this agent is not already on Pass 3: any non-busy GPU Prevents Tanko+Mumuni piling onto same GPU simultaneously even when both slots are free. Combined with MoE=1 slot, guarantees overflow goes to idle Dense.
This commit is contained in:
+17
-9
@@ -161,18 +161,26 @@ def is_gpu_busy(model):
|
|||||||
return active >= max_c
|
return active >= max_c
|
||||||
|
|
||||||
def select_best_gpu(candidates, reason, agent=""):
|
def select_best_gpu(candidates, reason, agent=""):
|
||||||
"""Pick best GPU, spreading different agents across GPUs when possible."""
|
"""Pick best GPU, spreading agents across GPUs to prevent hotspots."""
|
||||||
# Track which GPUs this agent is already using
|
# Count how many distinct agents are on each GPU
|
||||||
agent_gpus = set()
|
gpu_agent_counts = {}
|
||||||
if agent and r:
|
if r:
|
||||||
for m in GPU_URLS:
|
for m in GPU_URLS:
|
||||||
if r.get("agent_gpu:" + agent + ":" + m):
|
count = 0
|
||||||
agent_gpus.add(m)
|
for ak in API_KEYS.values():
|
||||||
# First pass: prefer GPUs NOT used by this agent
|
if r.get("agent_gpu:" + ak["agent"] + ":" + m):
|
||||||
|
count += 1
|
||||||
|
gpu_agent_counts[m] = count
|
||||||
|
# First pass: prefer GPUs with 0 other agents (fresh GPU for this agent)
|
||||||
for m in candidates:
|
for m in candidates:
|
||||||
if not is_gpu_busy(m) and m not in agent_gpus:
|
if not is_gpu_busy(m) and gpu_agent_counts.get(m, 0) == 0:
|
||||||
return {"model": m, "reason": reason}
|
return {"model": m, "reason": reason}
|
||||||
# Second pass: any non-busy GPU (agent reuse is ok)
|
# Second pass: prefer GPU this agent is NOT already on (skip own GPU)
|
||||||
|
if agent:
|
||||||
|
for m in candidates:
|
||||||
|
if not is_gpu_busy(m) and not r.get("agent_gpu:" + agent + ":" + m):
|
||||||
|
return {"model": m, "reason": reason}
|
||||||
|
# Third pass: any non-busy GPU
|
||||||
for m in candidates:
|
for m in candidates:
|
||||||
if not is_gpu_busy(m):
|
if not is_gpu_busy(m):
|
||||||
return {"model": m, "reason": reason}
|
return {"model": m, "reason": reason}
|
||||||
|
|||||||
Reference in New Issue
Block a user