Router: 300s timeout, gpu_decr bugfix. Dashboard: Bootstrap 5 modern redesign with KPI stats, equal-height cards, queue ring. Nginx: 600s timeout.
This commit is contained in:
+4
-3
@@ -210,7 +210,7 @@ def chat():
|
||||
except Exception: pass
|
||||
start = time.time()
|
||||
resp = requests.post(url+"/chat/completions", json=rd,
|
||||
headers={"Content-Type":"application/json","Authorization":"Bearer not-needed"}, timeout=120, stream=is_stream)
|
||||
headers={"Content-Type":"application/json","Authorization":"Bearer not-needed"}, timeout=300, stream=is_stream)
|
||||
lat = int((time.time()-start)*1000)
|
||||
gpu_decr(model) # Release slot
|
||||
|
||||
@@ -229,8 +229,9 @@ def chat():
|
||||
data["routing"] = {"model":model,"reason":reason,"gpu":url,"tier":tier,"agent":agent,"latency_ms":lat,"active_gpu":gpu_active_count(model)}
|
||||
bcast()
|
||||
return jsonify(data)
|
||||
except requests.Timeout:
|
||||
gpu_decr(model if 'model' in dir() else "unknown")
|
||||
except requests.Timeout:
|
||||
try: gpu_decr(model)
|
||||
except: pass
|
||||
return jsonify({"error":"timeout"}), 504
|
||||
except Exception as e:
|
||||
log.error("Error: %s\n%s", e, traceback.format_exc())
|
||||
|
||||
Reference in New Issue
Block a user