From 43382dac5b69ccf47fe063d15ad2fe7d9aac6b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:03 +0000 Subject: [PATCH 01/10] Initial commit: README --- README.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..82ff334 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +# Syslog Harness + +Operational orchestration layer for Syslog's internal AI agents. + +## Architecture + +``` +┌─────────────┐ ┌──────────────┐ ┌─────────────┐ +│ Agent │────>│ Nginx │────>│ GPU Pool │ +│ (Hermes) │ │ Router │ │ (MoE/Dense)│ +└─────────────┘ └──────────────┘ └─────────────┘ + │ + ├──> :8091 Queue Service (Docker) + │ + └──> :3001 Dashboard (Docker) +``` + +## Components + +| Service | Port | Container | Purpose | +|---|---|---|---| +| Nginx Router | 8080 | Host | Routes requests to GPU backends | +| Queue Service | 8091 | `syslog-queue` | Enqueues requests when GPUs are down | +| Dashboard | 3001 | `syslog-dashboard` | Observability UI + API | + +## GPU Routing + +| Header `X-Syslog-Model` | Backend | Model | +|---|---|---| +| (none) / `standard` | amdpve (.15) | qwen3.6-35B-A3B (MoE) | +| `heavy` / `qwen3.5-27B` | llmgpu (.8) | qwen3.5-27B (Dense) | +| `light` / `gemma-4` | ocu_llm (.110) | gemma-4-E4B (Light) | + +## Quick Start + +```bash +# Build & start +docker compose build +docker compose up -d + +# Verify +curl http://localhost:8091/health +curl http://localhost:3001/api/status +``` + +## Dashboard + +- **UI:** `http://:8080/dashboard/harness.html` +- **API:** `http://:8080/dashboard/api/status` + +## Circuit Breaker + +- Rate limit: 10 req/s per IP +- Burst: 20 requests +- Excess returns 503 +- Queue fallback on GPU 502/503 + +## Production Migration + +See [MIGRATION_PLAN.md](./MIGRATION_PLAN.md) + +--- +*Built for Syslog Solution LLC — Quality over speed.* From c85aaa570b04b3803cef77b620e48547ebc29c6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:05 +0000 Subject: [PATCH 02/10] Add docker-compose --- docker-compose.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2aa5890 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +services: + queue-service: + build: ./queue-service + container_name: syslog-queue + restart: unless-stopped + ports: + - "8091:8091" + environment: + - REDIS_HOST=192.168.68.7 + - REDIS_PORT=6379 + networks: + - harness-net + + dashboard: + build: ./dashboard + container_name: syslog-dashboard + restart: unless-stopped + ports: + - "3001:3001" + depends_on: + - queue-service + networks: + - harness-net + +networks: + harness-net: + driver: bridge From b55b954967c5771d835a7665a9f65c31b5dfe136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:05 +0000 Subject: [PATCH 03/10] Add queue service --- queue-service/queue-service.py | 121 +++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 queue-service/queue-service.py diff --git a/queue-service/queue-service.py b/queue-service/queue-service.py new file mode 100644 index 0000000..3e2bbed --- /dev/null +++ b/queue-service/queue-service.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +"""Syslog Inference Queue Service — Circuit breaker + request queuing. + +Ports: 8091 +Endpoints: + /health — liveness probe (Nginx upstream check) + /enqueue — POST inference request into queue (fallback from Nginx) + /status — GET queue depth + circuit breaker state +""" + +import json +import os +import sys +import time +import urllib.request +from flask import Flask, request, jsonify + +app = Flask(__name__) + +# Configuration +REDIS_HOST = os.getenv("REDIS_HOST", "192.168.68.7") +REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) +QUEUE_KEY = "inference:requests" +CIRCUIT_OPEN_THRESHOLD = 50 +CIRCUIT_WARN_THRESHOLD = 30 + +# GPU endpoints for draining +GPUS = { + "amdpve": "192.168.68.15:8080", + "llmgpu": "192.168.68.8:8080", + "ocu_llm": "192.168.68.110:8080", +} + + +def get_redis(): + try: + import redis + return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True) + except Exception: + return None + + +def get_queue_depth(r): + try: + return r.llen(QUEUE_KEY) + except Exception: + return 0 + + +def check_gpu_health(endpoint): + try: + req = urllib.request.Request(f"http://{endpoint}/v1/models") + req.add_header("User-Agent", "queue-service/1.0") + resp = urllib.request.urlopen(req, timeout=3) + return resp.status == 200 + except Exception: + return False + + +@app.route("/health") +def health(): + """Nginx upstream health probe. Returns 200 if service is alive.""" + return jsonify({"status": "ok", "service": "queue-service"}), 200 + + +@app.route("/enqueue", methods=["POST"]) +def enqueue(): + """Fallback endpoint — Nginx calls this when all GPU upstreams are down.""" + r = get_redis() + if not r: + return jsonify({"error": "Redis unavailable"}), 503 + + depth = get_queue_depth(r) + if depth >= CIRCUIT_OPEN_THRESHOLD: + return jsonify({ + "error": "Circuit breaker OPEN", + "queue_depth": depth, + "threshold": CIRCUIT_OPEN_THRESHOLD + }), 503 + + # Store the request in queue + payload = request.get_data(as_text=True) + headers = {k: v for k, v in request.headers if k.startswith("X-")} + r.rpush(QUEUE_KEY, json.dumps({ + "payload": payload, + "headers": headers, + "queued_at": time.time() + })) + + new_depth = get_queue_depth(r) + return jsonify({ + "status": "queued", + "position": new_depth, + "circuit": "warn" if new_depth >= CIRCUIT_WARN_THRESHOLD else "closed" + }), 202 + + +@app.route("/status") +def status(): + """GET queue depth + circuit breaker state + GPU health.""" + r = get_redis() + depth = get_queue_depth(r) if r else -1 + circuit = "open" if depth >= CIRCUIT_OPEN_THRESHOLD else ("warn" if depth >= CIRCUIT_WARN_THRESHOLD else "closed") + + gpu_health = {} + for name, endpoint in GPUS.items(): + gpu_health[name] = "up" if check_gpu_health(endpoint) else "down" + + return jsonify({ + "queue_depth": depth, + "circuit_breaker": circuit, + "gpu_health": gpu_health, + "thresholds": { + "warn": CIRCUIT_WARN_THRESHOLD, + "open": CIRCUIT_OPEN_THRESHOLD + } + }) + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=8091) From e1f12c3462035c833322a983bb193ec497ac1221 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:07 +0000 Subject: [PATCH 04/10] Add dashboard --- dashboard/harness.html | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dashboard/harness.html diff --git a/dashboard/harness.html b/dashboard/harness.html new file mode 100644 index 0000000..e69de29 From c42f3a99792702c2e251d62e84be790ab096a2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:32 +0000 Subject: [PATCH 05/10] Add migration plan --- MIGRATION_PLAN.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 MIGRATION_PLAN.md diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md new file mode 100644 index 0000000..5a005aa --- /dev/null +++ b/MIGRATION_PLAN.md @@ -0,0 +1,71 @@ +# Syslog Harness — Production Migration Plan + +## Current State (Development) +- **Host:** CT 114 (192.168.68.123) +- **Docker containers:** `syslog-queue` (:8091), `syslog-dashboard` (:3001) +- **Nginx:** Local on CT 114, routing to GPUs + Docker services +- **Status:** All components verified and operational + +## Target State (Production) +- **Host:** New CT (e.g., `docker-vm` on 192.168.68.x) +- **Docker containers:** Same queue + dashboard services +- **Nginx:** Containerized on production CT +- **GPU backends:** Same (192.168.68.15, .8, .110) + +## Migration Steps + +### 1. Prepare Production CT +```bash +# Create new CT on Proxmox +# Install Docker +apt update && apt install -y docker.io docker-compose-plugin + +# Pull/cloned harness repo +git clone /root/syslog-harness +cd /root/syslog-harness +``` + +### 2. Update docker-compose.yml for Production +- Change `REDIS_HOST` to production Redis IP +- Update GPU endpoint env vars if IPs change +- Add volume mounts for persistence + +### 3. Build & Deploy +```bash +# Build images +docker compose build + +# Start services +docker compose up -d + +# Verify health +curl http://localhost:8091/health +curl http://localhost:3001/api/status +``` + +### 4. Configure Nginx +- Copy `/etc/nginx/conf.d/gpu-router.conf` to production CT +- Update upstream IPs if needed +- Test and reload + +### 5. DNS / Routing Update +- Point agent traffic to new CT IP +- Update Hermes config `inference_api_url` +- Test agent routing + +### 6. Verification Checklist +- [ ] Queue service health check passes +- [ ] Dashboard API returns GPU health +- [ ] Nginx routes to correct GPU based on header +- [ ] Circuit breaker triggers on excess load +- [ ] Queue fallback works when GPUs down +- [ ] Agent requests reach correct model + +## Rollback Plan +- Keep CT 114 running as backup +- Revert DNS/routing to .123 if issues +- Docker containers can be stopped/started instantly + +--- +*Created: May 15, 2026* +*Status: Development verified, ready for production migration* From a28b3a557da45a9804728d67da92bbea674c2d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:33 +0000 Subject: [PATCH 06/10] Add Nginx router config --- gpu-router.conf | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 gpu-router.conf diff --git a/gpu-router.conf b/gpu-router.conf new file mode 100644 index 0000000..cc07223 --- /dev/null +++ b/gpu-router.conf @@ -0,0 +1,106 @@ +## Syslog GPU Router — Nginx Configuration +## Routes incoming agent requests to the appropriate GPU backend +## based on the X-Syslog-Model header. + +upstream amdpve_pool { + ## Strix Halo 395 — qwen3.6-35B-A3B (MoE) — Default workhorse + server 192.168.68.15:8080; +} + +upstream llmgpu_pool { + ## RTX 3090 — qwen3.5-27B (Dense) — Heavy reasoning + server 192.168.68.8:8080; +} + +upstream ocu_llm_pool { + ## RTX 5070 — gemma-4 (Dense 4B) — Ultra-light tasks + server 192.168.68.110:8080; +} + +upstream queue_service { + ## Agent queue with circuit breaker (Docker container) + server 127.0.0.1:8091; +} + +upstream dashboard_service { + ## Harness dashboard (Docker container) + server 127.0.0.1:3001; +} + +## ------------------------------------------------------------------ +## Mapping: X-Syslog-Model header → upstream backend +## ------------------------------------------------------------------ +map $http_x_syslog_model $gpu_upstream { + default amdpve_pool; # missing header → default workhorse + "standard" amdpve_pool; + "heavy" llmgpu_pool; + "qwen3.5-27B" llmgpu_pool; + "light" ocu_llm_pool; + "gemma-4" ocu_llm_pool; +} + +server { + listen 8080; + server_name _; + + # Rate limit zone — 10 req/s per IP, burst of 20 + limit_req_zone $binary_remote_addr zone=perip:10m rate=10r/s; + + ## ------------------------------------------------------------------ + ## Dashboard — observability UI (MUST be before / catch-all) + ## ------------------------------------------------------------------ + location /dashboard { + proxy_pass http://dashboard_service/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + ## ------------------------------------------------------------------ + ## Main location — proxy to selected upstream + ## ------------------------------------------------------------------ + location / { + limit_req zone=perip burst=20 nodelay; + limit_req_status 503; + proxy_pass http://$gpu_upstream; + + ## Preserve original host and headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + ## Pass through the model header so backends can log it + proxy_pass_header X-Syslog-Model; + + ## Streaming support (SSE for LLM responses) + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 300s; + proxy_send_timeout 300s; + + ## Basic failover — retry on error or timeout + proxy_next_upstream error timeout http_502 http_503; + proxy_next_upstream_tries 2; + + ## Add a response header for observability + add_header X-Routed-To $gpu_upstream always; + + ## Fallback to queue when all GPU upstreams are down + error_page 502 503 504 = @queue_fallback; + } + + ## ------------------------------------------------------------------ + ## Queue fallback — enqueue when GPUs are unavailable + ## ------------------------------------------------------------------ + location @queue_fallback { + rewrite ^ /enqueue break; + proxy_pass http://queue_service; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Content-Type $content_type; + proxy_pass_request_body on; + } +} From 37f7c95b05c5db2164f1b44fe822d55fd7bb1ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:07:34 +0000 Subject: [PATCH 07/10] Add env example --- .env.example | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..528aa95 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +# Syslog Harness Environment +REDIS_HOST=192.168.68.8 +REDIS_PORT=6379 +AMDPVE_ENDPOINT=http://192.168.68.15:8080 +LLMGPU_ENDPOINT=http://192.168.68.8:8080 +OCU_LLM_ENDPOINT=http://192.168.68.110:8080 +CIRCUIT_BREAKER_THRESHOLD=5 +CIRCUIT_BREAKER_TIMEOUT=30 From 7d00bbec0e3cce31bfd295fcb0936b7af72150f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:34:49 +0000 Subject: [PATCH 08/10] Add Dockerfile.queue --- Dockerfile.queue | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Dockerfile.queue diff --git a/Dockerfile.queue b/Dockerfile.queue new file mode 100644 index 0000000..e69de29 From cf7f61650fdaf558098ea83e5bc0f4e545d95278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:34:52 +0000 Subject: [PATCH 09/10] Add Dockerfile.dashboard --- Dockerfile.dashboard | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Dockerfile.dashboard diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard new file mode 100644 index 0000000..e69de29 From b65ea22765fcff6cd96028eedc6c781be1db6125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?= Date: Fri, 15 May 2026 21:35:13 +0000 Subject: [PATCH 10/10] Update Nginx Docker config --- gpu-router-docker.conf | 106 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 gpu-router-docker.conf diff --git a/gpu-router-docker.conf b/gpu-router-docker.conf new file mode 100644 index 0000000..10e930f --- /dev/null +++ b/gpu-router-docker.conf @@ -0,0 +1,106 @@ +## Syslog GPU Router — Nginx Configuration (Docker-internal) +## Routes incoming agent requests to the appropriate GPU backend +## based on the X-Syslog-Model header. + +upstream amdpve_pool { + ## Strix Halo 395 — qwen3.6-35B-A3B (MoE) — Default workhorse + server 192.168.68.15:8080; +} + +upstream llmgpu_pool { + ## RTX 3090 — qwen3.5-27B (Dense) — Heavy reasoning + server 192.168.68.8:8080; +} + +upstream ocu_llm_pool { + ## RTX 5070 — gemma-4 (Dense 4B) — Ultra-light tasks + server 192.168.68.110:8080; +} + +upstream queue_service { + ## Agent queue with circuit breaker (Docker container) + server queue-service:8091; +} + +upstream dashboard_service { + ## Harness dashboard (Docker container) + server dashboard:3001; +} + +## ------------------------------------------------------------------ +## Mapping: X-Syslog-Model header → upstream backend +## ------------------------------------------------------------------ +map $http_x_syslog_model $gpu_upstream { + default amdpve_pool; + "standard" amdpve_pool; + "heavy" llmgpu_pool; + "qwen3.5-27B" llmgpu_pool; + "light" ocu_llm_pool; + "gemma-4" ocu_llm_pool; +} + +## Rate limit zone — 10 req/s per IP, burst of 20 +limit_req_zone $binary_remote_addr zone=perip:10m rate=10r/s; + +server { + listen 80; + server_name _; + + ## ------------------------------------------------------------------ + ## Dashboard — observability UI (MUST be before / catch-all) + ## ------------------------------------------------------------------ + location /dashboard { + proxy_pass http://dashboard_service/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + ## ------------------------------------------------------------------ + ## Main location — proxy to selected upstream + ## ------------------------------------------------------------------ + location / { + limit_req zone=perip burst=20 nodelay; + limit_req_status 503; + proxy_pass http://$gpu_upstream; + + ## Preserve original host and headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + ## Pass through the model header so backends can log it + proxy_pass_header X-Syslog-Model; + + ## Streaming support (SSE for LLM responses) + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 300s; + proxy_send_timeout 300s; + + ## Basic failover — retry on error or timeout + proxy_next_upstream error timeout http_502 http_503; + proxy_next_upstream_tries 2; + + ## Add a response header for observability + add_header X-Routed-To $gpu_upstream always; + + ## Fallback to queue when all GPU upstreams are down + error_page 502 503 504 = @queue_fallback; + } + + ## ------------------------------------------------------------------ + ## Queue fallback — enqueue when GPUs are unavailable + ## ------------------------------------------------------------------ + location @queue_fallback { + rewrite ^ /enqueue break; + proxy_pass http://queue_service; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Content-Type $content_type; + proxy_pass_request_body on; + } +}