28fc57c5c7
- Model migration: gemma-4-E4B → qwen3.5-9b-vlm - Dashboard reorder: Usage Over Time + GPU Metrics to top - Router counter leak fix (gpu_decr in except handler) - VLM slot upgrade 1→2 - Automated maintenance cron job - LiteLLM config update
100 lines
2.6 KiB
YAML
100 lines
2.6 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: harness-redis
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:6379:6379"
|
|
volumes:
|
|
- redis-data:/data
|
|
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 10s
|
|
timeout: 3s
|
|
retries: 5
|
|
|
|
router:
|
|
build: ./router
|
|
container_name: harness-router
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:9000:9000"
|
|
environment:
|
|
- REDIS_URL=redis://redis:6379
|
|
- GPU_MOE_URL=http://192.168.68.15:8080/v1
|
|
- GPU_DENSE_URL=http://192.168.68.8:8080/v1
|
|
- GPU_LIGHT_URL=http://192.168.68.110:8080/v1
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:9000/health')"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
|
|
litellm:
|
|
image: ghcr.io/berriai/litellm:main-stable
|
|
command: ["--config", "/app/config.yaml", "--port", "4000"]
|
|
container_name: harness-litellm
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:8081:4000"
|
|
volumes:
|
|
- ./litellm_config.yaml:/app/config.yaml
|
|
environment:
|
|
- LITELLM_MASTER_KEY=sk-sys...-key
|
|
extra_hosts:
|
|
- "host.docker.internal:host-gateway"
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
|
|
nginx:
|
|
image: nginx:alpine
|
|
container_name: harness-nginx
|
|
restart: unless-stopped
|
|
ports:
|
|
- "80:80"
|
|
volumes:
|
|
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://127.0.0.1/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
depends_on:
|
|
- litellm
|
|
- dashboard
|
|
|
|
dashboard:
|
|
build: ./dashboard
|
|
container_name: harness-dashboard
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:3000:3000"
|
|
environment:
|
|
- REDIS_URL=redis://redis:6379
|
|
- GPU_SIDECARS=192.168.68.15:8090,192.168.68.8:8090,192.168.68.110:8090
|
|
healthcheck:
|
|
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3000/health')"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
depends_on:
|
|
- redis
|
|
|
|
volumes:
|
|
redis-data:
|
|
|
|
# LiteLLM command override to load config
|
|
# (appended to fix config loading issue)
|