May 19, 2026: Full harness update

- Model migration: gemma-4-E4B → qwen3.5-9b-vlm
- Dashboard reorder: Usage Over Time + GPU Metrics to top
- Router counter leak fix (gpu_decr in except handler)
- VLM slot upgrade 1→2
- Automated maintenance cron job
- LiteLLM config update
This commit is contained in:
Abiba
2026-05-19 15:03:47 +00:00
commit 28fc57c5c7
15 changed files with 1455 additions and 0 deletions
+99
View File
@@ -0,0 +1,99 @@
version: '3.8'
services:
redis:
image: redis:7-alpine
container_name: harness-redis
restart: unless-stopped
ports:
- "127.0.0.1:6379:6379"
volumes:
- redis-data:/data
command: redis-server --appendonly yes --maxmemory 256mb --maxmemory-policy allkeys-lru
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 5
router:
build: ./router
container_name: harness-router
restart: unless-stopped
ports:
- "127.0.0.1:9000:9000"
environment:
- REDIS_URL=redis://redis:6379
- GPU_MOE_URL=http://192.168.68.15:8080/v1
- GPU_DENSE_URL=http://192.168.68.8:8080/v1
- GPU_LIGHT_URL=http://192.168.68.110:8080/v1
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:9000/health')"]
interval: 15s
timeout: 5s
retries: 3
depends_on:
redis:
condition: service_healthy
litellm:
image: ghcr.io/berriai/litellm:main-stable
command: ["--config", "/app/config.yaml", "--port", "4000"]
container_name: harness-litellm
restart: unless-stopped
ports:
- "127.0.0.1:8081:4000"
volumes:
- ./litellm_config.yaml:/app/config.yaml
environment:
- LITELLM_MASTER_KEY=sk-sys...-key
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"]
interval: 15s
timeout: 5s
retries: 3
depends_on:
redis:
condition: service_healthy
nginx:
image: nginx:alpine
container_name: harness-nginx
restart: unless-stopped
ports:
- "80:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1/health"]
interval: 15s
timeout: 5s
retries: 3
depends_on:
- litellm
- dashboard
dashboard:
build: ./dashboard
container_name: harness-dashboard
restart: unless-stopped
ports:
- "127.0.0.1:3000:3000"
environment:
- REDIS_URL=redis://redis:6379
- GPU_SIDECARS=192.168.68.15:8090,192.168.68.8:8090,192.168.68.110:8090
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3000/health')"]
interval: 15s
timeout: 5s
retries: 3
depends_on:
- redis
volumes:
redis-data:
# LiteLLM command override to load config
# (appended to fix config loading issue)