Initial commit: CT 116 inference harness — nginx, LiteLLM, router, dashboard, Redis
- Complexity-based routing (MoE default, Dense heavy, Gemma light) - Per-agent API keys with metrics tracking - Time-series usage graphs (24h/7d/30d) - Streaming support (SSE passthrough) - Unicode cleanup (ASCII-only output) - Vision support (gemma-4-E4B) - Tier enforcement (starter/professional/enterprise) - GPU health monitoring via sidecar polling - Unified dashboard with line graph
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
worker_processes auto;
|
||||
error_log /var/log/nginx/error.log warn;
|
||||
pid /var/run/nginx.pid;
|
||||
|
||||
events { worker_connections 1024; }
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
log_format main launching rt=;
|
||||
access_log /var/log/nginx/access.log main;
|
||||
error_log /var/log/nginx/error.log;
|
||||
sendfile on;
|
||||
keepalive_timeout 65;
|
||||
|
||||
upstream router_api { server router:9000; }
|
||||
upstream dashboard_ui { server dashboard:3000; }
|
||||
upstream litellm_backend { server litellm:4000; }
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
|
||||
# Disable buffering for SSE streams
|
||||
proxy_buffering off;
|
||||
|
||||
# API — through router
|
||||
location /v1/ {
|
||||
proxy_pass http://router_api;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header Authorization $http_authorization;
|
||||
proxy_connect_timeout 10s;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_buffering off;
|
||||
}
|
||||
|
||||
# SSE streaming endpoint
|
||||
location /stream {
|
||||
proxy_pass http://router_api;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
proxy_buffering off;
|
||||
chunked_transfer_encoding off;
|
||||
}
|
||||
|
||||
# Dashboard API proxy for SSE
|
||||
location /api/ {
|
||||
proxy_pass http://dashboard_ui;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_buffering off;
|
||||
}
|
||||
|
||||
# LiteLLM debug
|
||||
location /litellm/ {
|
||||
rewrite ^/litellm/(.*) /$1 break;
|
||||
proxy_pass http://litellm_backend;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Authorization $http_authorization;
|
||||
}
|
||||
|
||||
# Dashboard
|
||||
location / {
|
||||
proxy_pass http://dashboard_ui;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_buffering off;
|
||||
}
|
||||
|
||||
location /health {
|
||||
return 200 "{\"status\":\"healthy\"}";
|
||||
add_header Content-Type application/json;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user