Router: 300s timeout, gpu_decr bugfix. Dashboard: Bootstrap 5 modern redesign with KPI stats, equal-height cards, queue ring. Nginx: 600s timeout.

This commit is contained in:
Abiba (pi)
2026-05-16 22:12:21 +00:00
parent 9817fe2ef2
commit 808c9d3d13
3 changed files with 156 additions and 213 deletions
+4 -3
View File
@@ -210,7 +210,7 @@ def chat():
except Exception: pass
start = time.time()
resp = requests.post(url+"/chat/completions", json=rd,
headers={"Content-Type":"application/json","Authorization":"Bearer not-needed"}, timeout=120, stream=is_stream)
headers={"Content-Type":"application/json","Authorization":"Bearer not-needed"}, timeout=300, stream=is_stream)
lat = int((time.time()-start)*1000)
gpu_decr(model) # Release slot
@@ -229,8 +229,9 @@ def chat():
data["routing"] = {"model":model,"reason":reason,"gpu":url,"tier":tier,"agent":agent,"latency_ms":lat,"active_gpu":gpu_active_count(model)}
bcast()
return jsonify(data)
except requests.Timeout:
gpu_decr(model if 'model' in dir() else "unknown")
except requests.Timeout:
try: gpu_decr(model)
except: pass
return jsonify({"error":"timeout"}), 504
except Exception as e:
log.error("Error: %s\n%s", e, traceback.format_exc())