diff --git a/router/router.py b/router/router.py index 3b58c54..e06b8b3 100644 --- a/router/router.py +++ b/router/router.py @@ -308,10 +308,10 @@ def chat(): if raw: yield clean_unicode(raw) bcast() ctx_remaining = GPU_CONTEXT.get(model, 65536) - estimate_tokens(rd.get("messages",[])) - r = Response(stream_with_context(gen()), mimetype="text/event-stream") - r.headers["X-Context-Remaining"] = str(max(0, ctx_remaining)) - r.headers["X-Context-Model"] = model - return r + sse_resp = Response(stream_with_context(gen()), mimetype="text/event-stream") + sse_resp.headers["X-Context-Remaining"] = str(max(0, ctx_remaining)) + sse_resp.headers["X-Context-Model"] = model + return sse_resp data = clean_response(resp.json()) for c in data.get("choices",[]): msg = c.get("message",{})