Initial commit: CT 116 inference harness — nginx, LiteLLM, router, dashboard, Redis
- Complexity-based routing (MoE default, Dense heavy, Gemma light) - Per-agent API keys with metrics tracking - Time-series usage graphs (24h/7d/30d) - Streaming support (SSE passthrough) - Unicode cleanup (ASCII-only output) - Vision support (gemma-4-E4B) - Tier enforcement (starter/professional/enterprise) - GPU health monitoring via sidecar polling - Unified dashboard with line graph
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: harness-redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:6379:6379"
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
router:
|
||||
build: ./router
|
||||
container_name: harness-router
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9000:9000"
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- GPU_MOE_URL=http://192.168.68.15:8080/v1
|
||||
- GPU_DENSE_URL=http://192.168.68.8:8080/v1
|
||||
- GPU_LIGHT_URL=http://192.168.68.110:8080/v1
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-stable
|
||||
command: ["--config", "/app/config.yaml", "--port", "4000"]
|
||||
container_name: harness-litellm
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8081:4000"
|
||||
volumes:
|
||||
- ./litellm_config.yaml:/app/config.yaml
|
||||
environment:
|
||||
- LITELLM_MASTER_KEY=sk-syslog-local-master-key
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: harness-nginx
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "80:80"
|
||||
volumes:
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
depends_on:
|
||||
- litellm
|
||||
- dashboard
|
||||
|
||||
dashboard:
|
||||
build: ./dashboard
|
||||
container_name: harness-dashboard
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- GPU_SIDECARS=192.168.68.15:8090,192.168.68.8:8090,192.168.68.110:8090
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
|
||||
# LiteLLM command override to load config
|
||||
# (appended to fix config loading issue)
|
||||
Reference in New Issue
Block a user