b849cd3395
router/router.py (+158 lines): - store_perf_record(): captures queue_ms, inference_ms, prompt_tokens, completion_tokens, tokens_per_sec per request in Redis - Per-model, per-reason, per-agent rolling windows (last 200-500) - /metrics/performance?window=N endpoint with percentiles (p50/p95/p99) for latency, throughput, and queue time per model/reason/agent - Queue time now surfaced in routing metadata and routes:recent - Streaming requests tracked with estimated prompt tokens nginx/nginx.conf: - Added /metrics/ proxy pass to router_api Enables model performance comparison and routing tier validation.
96 lines
2.8 KiB
Nginx Configuration File
96 lines
2.8 KiB
Nginx Configuration File
worker_processes auto;
|
|
error_log /var/log/nginx/error.log warn;
|
|
pid /var/run/nginx.pid;
|
|
|
|
events { worker_connections 1024; }
|
|
|
|
http {
|
|
include /etc/nginx/mime.types;
|
|
default_type application/octet-stream;
|
|
|
|
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
|
'$status $body_bytes_sent "$http_referer" '
|
|
'"$http_user_agent" rt=$request_time';
|
|
access_log /var/log/nginx/access.log main;
|
|
error_log /var/log/nginx/error.log;
|
|
sendfile on;
|
|
keepalive_timeout 65;
|
|
|
|
upstream router_api { server router:9000; }
|
|
upstream dashboard_ui { server dashboard:3000; }
|
|
upstream litellm_backend { server litellm:4000; }
|
|
|
|
server {
|
|
listen 80;
|
|
|
|
# Security headers
|
|
add_header X-Content-Type-Options nosniff always;
|
|
add_header X-Frame-Options SAMEORIGIN always;
|
|
add_header X-XSS-Protection "1; mode=block" always;
|
|
|
|
# Disable buffering for SSE streams
|
|
proxy_buffering off;
|
|
|
|
# API — through router
|
|
location /v1/ {
|
|
proxy_pass http://router_api;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header Authorization $http_authorization;
|
|
proxy_connect_timeout 10s;
|
|
proxy_read_timeout 600s;
|
|
proxy_buffering off;
|
|
}
|
|
|
|
# SSE streaming endpoint
|
|
location /stream {
|
|
proxy_pass http://router_api;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_set_header Connection "";
|
|
proxy_buffering off;
|
|
chunked_transfer_encoding off;
|
|
}
|
|
|
|
# Dashboard API proxy for SSE
|
|
location /api/ {
|
|
proxy_pass http://dashboard_ui;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_buffering off;
|
|
}
|
|
|
|
# LiteLLM debug
|
|
location /litellm/ {
|
|
rewrite ^/litellm/(.*) /$1 break;
|
|
proxy_pass http://litellm_backend;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_set_header Authorization $http_authorization;
|
|
}
|
|
|
|
# Dashboard
|
|
location / {
|
|
proxy_pass http://dashboard_ui;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_buffering off;
|
|
}
|
|
|
|
# Performance analytics
|
|
location /metrics/ {
|
|
proxy_pass http://router_api;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
}
|
|
|
|
location /health {
|
|
proxy_pass http://router_api/health;
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
}
|
|
}
|
|
}
|