May 19, 2026: Full harness update
- Model migration: gemma-4-E4B → qwen3.5-9b-vlm - Dashboard reorder: Usage Over Time + GPU Metrics to top - Router counter leak fix (gpu_decr in except handler) - VLM slot upgrade 1→2 - Automated maintenance cron job - LiteLLM config update
This commit is contained in:
@@ -0,0 +1,97 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: harness-redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "127.0.0.1:6379:6379"
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
router:
|
||||
build: ./router
|
||||
container_name: harness-router
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9000:9000"
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- GPU_MOE_URL=http://192.168.68.15:8080/v1
|
||||
- GPU_DENSE_URL=http://192.168.68.8:8080/v1
|
||||
- GPU_LIGHT_URL=http://192.168.68.110:8080/v1
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:9000/health')"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
litellm:
|
||||
image: ghcr.io/berriai/litellm:main-stable
|
||||
command: ["--config", "/app/config.yaml", "--port", "4000"]
|
||||
container_name: harness-litellm
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8081:4000"
|
||||
volumes:
|
||||
- ./litellm_config.yaml:/app/config.yaml
|
||||
environment:
|
||||
- LITELLM_MASTER_KEY=sk-syslog-local-master-key
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
container_name: harness-nginx
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "80:80"
|
||||
volumes:
|
||||
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://127.0.0.1/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
depends_on:
|
||||
- litellm
|
||||
- dashboard
|
||||
|
||||
dashboard:
|
||||
build: ./dashboard
|
||||
container_name: harness-dashboard
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- REDIS_URL=redis://redis:6379
|
||||
- GPU_SIDECARS=192.168.68.15:8090,192.168.68.8:8090,192.168.68.110:8090
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3000/health')"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
|
||||
# LiteLLM command override to load config
|
||||
# (appended to fix config loading issue)
|
||||
Reference in New Issue
Block a user