Add GPU dashboard container + Nginx routing
This commit is contained in:
@@ -0,0 +1,122 @@
|
||||
## Syslog GPU Router — Nginx Configuration (Docker-internal)
|
||||
## Routes incoming agent requests to the appropriate GPU backend
|
||||
## based on the X-Syslog-Model header.
|
||||
|
||||
upstream amdpve_pool {
|
||||
## Strix Halo 395 — qwen3.6-35B-A3B (MoE) — Default workhorse
|
||||
server 192.168.68.15:8080;
|
||||
}
|
||||
|
||||
upstream llmgpu_pool {
|
||||
## RTX 3090 — qwen3.5-27B (Dense) — Heavy reasoning
|
||||
server 192.168.68.8:8080;
|
||||
}
|
||||
|
||||
upstream ocu_llm_pool {
|
||||
## RTX 5070 — gemma-4 (Dense 4B) — Ultra-light tasks
|
||||
server 192.168.68.110:8080;
|
||||
}
|
||||
|
||||
upstream queue_service {
|
||||
## Agent queue with circuit breaker (Docker container)
|
||||
server queue-service:8091;
|
||||
}
|
||||
|
||||
upstream dashboard_service {
|
||||
## Harness dashboard (Docker container)
|
||||
server dashboard:3001;
|
||||
}
|
||||
|
||||
upstream gpu_dashboard_pool {
|
||||
## GPU dashboard (Docker container)
|
||||
server syslog-harness-gpu-dashboard-1:8092;
|
||||
}
|
||||
|
||||
## ------------------------------------------------------------------
|
||||
## Mapping: X-Syslog-Model header → upstream backend
|
||||
## ------------------------------------------------------------------
|
||||
map $http_x_syslog_model $gpu_upstream {
|
||||
default amdpve_pool;
|
||||
"standard" amdpve_pool;
|
||||
"heavy" llmgpu_pool;
|
||||
"qwen3.5-27B" llmgpu_pool;
|
||||
"light" ocu_llm_pool;
|
||||
"gemma-4" ocu_llm_pool;
|
||||
}
|
||||
|
||||
## Rate limit zone — 10 req/s per IP, burst of 20
|
||||
limit_req_zone $binary_remote_addr zone=perip:10m rate=10r/s;
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name _;
|
||||
|
||||
## ------------------------------------------------------------------
|
||||
## Dashboard — observability UI (MUST be before / catch-all)
|
||||
## ------------------------------------------------------------------
|
||||
location /dashboard {
|
||||
proxy_pass http://dashboard_service/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
}
|
||||
|
||||
## ------------------------------------------------------------------
|
||||
## GPU Dashboard — observability UI (MUST be before / catch-all)
|
||||
## ------------------------------------------------------------------
|
||||
location /gpu {
|
||||
proxy_pass http://gpu_dashboard_pool/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
## ------------------------------------------------------------------
|
||||
## Main location — proxy to selected upstream
|
||||
## ------------------------------------------------------------------
|
||||
location / {
|
||||
limit_req zone=perip burst=20 nodelay;
|
||||
limit_req_status 503;
|
||||
proxy_pass http://$gpu_upstream;
|
||||
|
||||
## Preserve original host and headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
## Pass through the model header so backends can log it
|
||||
proxy_pass_header X-Syslog-Model;
|
||||
|
||||
## Streaming support (SSE for LLM responses)
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
proxy_read_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
|
||||
## Basic failover — retry on error or timeout
|
||||
proxy_next_upstream error timeout http_502 http_503;
|
||||
proxy_next_upstream_tries 2;
|
||||
|
||||
## Add a response header for observability
|
||||
add_header X-Routed-To $gpu_upstream always;
|
||||
|
||||
## Fallback to queue when all GPU upstreams are down
|
||||
error_page 502 503 504 = @queue_fallback;
|
||||
}
|
||||
|
||||
## ------------------------------------------------------------------
|
||||
## Queue fallback — enqueue when GPUs are unavailable
|
||||
## ------------------------------------------------------------------
|
||||
location @queue_fallback {
|
||||
rewrite ^ /enqueue break;
|
||||
proxy_pass http://queue_service;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Content-Type $content_type;
|
||||
proxy_pass_request_body on;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user