From 43382dac5b69ccf47fe063d15ad2fe7d9aac6b9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:03 +0000
Subject: [PATCH 01/10] Initial commit: README

---
 README.md | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 README.md
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..82ff334
--- /dev/null
+++ b/README.md
@@ -0,0 +1,63 @@
+# Syslog Harness
+
+Operational orchestration layer for Syslog's internal AI agents.
+
+## Architecture
+
+```
+┌─────────────┐     ┌──────────────┐     ┌─────────────┐
+│  Agent      │────>│  Nginx       │────>│  GPU Pool   │
+│  (Hermes)   │     │  Router      │     │  (MoE/Dense)│
+└─────────────┘     └──────────────┘     └─────────────┘
+                         │
+                         ├──> :8091 Queue Service (Docker)
+                         │
+                         └──> :3001 Dashboard (Docker)
+```
+
+## Components
+
+| Service | Port | Container | Purpose |
+|---|---|---|---|
+| Nginx Router | 8080 | Host | Routes requests to GPU backends |
+| Queue Service | 8091 | `syslog-queue` | Enqueues requests when GPUs are down |
+| Dashboard | 3001 | `syslog-dashboard` | Observability UI + API |
+
+## GPU Routing
+
+| Header `X-Syslog-Model` | Backend | Model |
+|---|---|---|
+| (none) / `standard` | amdpve (.15) | qwen3.6-35B-A3B (MoE) |
+| `heavy` / `qwen3.5-27B` | llmgpu (.8) | qwen3.5-27B (Dense) |
+| `light` / `gemma-4` | ocu_llm (.110) | gemma-4-E4B (Light) |
+
+## Quick Start
+
+```bash
+# Build & start
+docker compose build
+docker compose up -d
+
+# Verify
+curl http://localhost:8091/health
+curl http://localhost:3001/api/status
+```
+
+## Dashboard
+
+- **UI:** `http://<host>:8080/dashboard/harness.html`
+- **API:** `http://<host>:8080/dashboard/api/status`
+
+## Circuit Breaker
+
+- Rate limit: 10 req/s per IP
+- Burst: 20 requests
+- Excess returns 503
+- Queue fallback on GPU 502/503
+
+## Production Migration
+
+See [MIGRATION_PLAN.md](./MIGRATION_PLAN.md)
+
+---
+*Built for Syslog Solution LLC — Quality over speed.*

From c85aaa570b04b3803cef77b620e48547ebc29c6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:05 +0000
Subject: [PATCH 02/10] Add docker-compose

---
 docker-compose.yml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 docker-compose.yml

diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..2aa5890
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,27 @@
+services:
+  queue-service:
+    build: ./queue-service
+    container_name: syslog-queue
+    restart: unless-stopped
+    ports:
+      - "8091:8091"
+    environment:
+      - REDIS_HOST=192.168.68.7
+      - REDIS_PORT=6379
+    networks:
+      - harness-net
+
+  dashboard:
+    build: ./dashboard
+    container_name: syslog-dashboard
+    restart: unless-stopped
+    ports:
+      - "3001:3001"
+    depends_on:
+      - queue-service
+    networks:
+      - harness-net
+
+networks:
+  harness-net:
+    driver: bridge

From b55b954967c5771d835a7665a9f65c31b5dfe136 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:05 +0000
Subject: [PATCH 03/10] Add queue service

---
 queue-service/queue-service.py | 121 +++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 queue-service/queue-service.py

diff --git a/queue-service/queue-service.py b/queue-service/queue-service.py
new file mode 100644
index 0000000..3e2bbed
--- /dev/null
+++ b/queue-service/queue-service.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+"""Syslog Inference Queue Service — Circuit breaker + request queuing.
+
+Ports: 8091
+Endpoints:
+  /health          — liveness probe (Nginx upstream check)
+  /enqueue         — POST inference request into queue (fallback from Nginx)
+  /status          — GET queue depth + circuit breaker state
+"""
+
+import json
+import os
+import sys
+import time
+import urllib.request
+from flask import Flask, request, jsonify
+
+app = Flask(__name__)
+
+# Configuration
+REDIS_HOST = os.getenv("REDIS_HOST", "192.168.68.7")
+REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
+QUEUE_KEY = "inference:requests"
+CIRCUIT_OPEN_THRESHOLD = 50
+CIRCUIT_WARN_THRESHOLD = 30
+
+# GPU endpoints for draining
+GPUS = {
+    "amdpve": "192.168.68.15:8080",
+    "llmgpu": "192.168.68.8:8080",
+    "ocu_llm": "192.168.68.110:8080",
+}
+
+
+def get_redis():
+    try:
+        import redis
+        return redis.Redis(host=REDIS_HOST, port=REDIS_PORT, decode_responses=True)
+    except Exception:
+        return None
+
+
+def get_queue_depth(r):
+    try:
+        return r.llen(QUEUE_KEY)
+    except Exception:
+        return 0
+
+
+def check_gpu_health(endpoint):
+    try:
+        req = urllib.request.Request(f"http://{endpoint}/v1/models")
+        req.add_header("User-Agent", "queue-service/1.0")
+        resp = urllib.request.urlopen(req, timeout=3)
+        return resp.status == 200
+    except Exception:
+        return False
+
+
+@app.route("/health")
+def health():
+    """Nginx upstream health probe. Returns 200 if service is alive."""
+    return jsonify({"status": "ok", "service": "queue-service"}), 200
+
+
+@app.route("/enqueue", methods=["POST"])
+def enqueue():
+    """Fallback endpoint — Nginx calls this when all GPU upstreams are down."""
+    r = get_redis()
+    if not r:
+        return jsonify({"error": "Redis unavailable"}), 503
+
+    depth = get_queue_depth(r)
+    if depth >= CIRCUIT_OPEN_THRESHOLD:
+        return jsonify({
+            "error": "Circuit breaker OPEN",
+            "queue_depth": depth,
+            "threshold": CIRCUIT_OPEN_THRESHOLD
+        }), 503
+
+    # Store the request in queue
+    payload = request.get_data(as_text=True)
+    headers = {k: v for k, v in request.headers if k.startswith("X-")}
+    r.rpush(QUEUE_KEY, json.dumps({
+        "payload": payload,
+        "headers": headers,
+        "queued_at": time.time()
+    }))
+
+    new_depth = get_queue_depth(r)
+    return jsonify({
+        "status": "queued",
+        "position": new_depth,
+        "circuit": "warn" if new_depth >= CIRCUIT_WARN_THRESHOLD else "closed"
+    }), 202
+
+
+@app.route("/status")
+def status():
+    """GET queue depth + circuit breaker state + GPU health."""
+    r = get_redis()
+    depth = get_queue_depth(r) if r else -1
+    circuit = "open" if depth >= CIRCUIT_OPEN_THRESHOLD else ("warn" if depth >= CIRCUIT_WARN_THRESHOLD else "closed")
+
+    gpu_health = {}
+    for name, endpoint in GPUS.items():
+        gpu_health[name] = "up" if check_gpu_health(endpoint) else "down"
+
+    return jsonify({
+        "queue_depth": depth,
+        "circuit_breaker": circuit,
+        "gpu_health": gpu_health,
+        "thresholds": {
+            "warn": CIRCUIT_WARN_THRESHOLD,
+            "open": CIRCUIT_OPEN_THRESHOLD
+        }
+    })
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8091)

From e1f12c3462035c833322a983bb193ec497ac1221 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:07 +0000
Subject: [PATCH 04/10] Add dashboard

---
 dashboard/harness.html | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 dashboard/harness.html

diff --git a/dashboard/harness.html b/dashboard/harness.html
new file mode 100644
index 0000000..e69de29

From c42f3a99792702c2e251d62e84be790ab096a2fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:32 +0000
Subject: [PATCH 05/10] Add migration plan

---
 MIGRATION_PLAN.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 MIGRATION_PLAN.md

diff --git a/MIGRATION_PLAN.md b/MIGRATION_PLAN.md
new file mode 100644
index 0000000..5a005aa
--- /dev/null
+++ b/MIGRATION_PLAN.md
@@ -0,0 +1,71 @@
+# Syslog Harness — Production Migration Plan
+
+## Current State (Development)
+- **Host:** CT 114 (192.168.68.123)
+- **Docker containers:** `syslog-queue` (:8091), `syslog-dashboard` (:3001)
+- **Nginx:** Local on CT 114, routing to GPUs + Docker services
+- **Status:** All components verified and operational
+
+## Target State (Production)
+- **Host:** New CT (e.g., `docker-vm` on 192.168.68.x)
+- **Docker containers:** Same queue + dashboard services
+- **Nginx:** Containerized on production CT
+- **GPU backends:** Same (192.168.68.15, .8, .110)
+
+## Migration Steps
+
+### 1. Prepare Production CT
+```bash
+# Create new CT on Proxmox
+# Install Docker
+apt update && apt install -y docker.io docker-compose-plugin
+
+# Pull/cloned harness repo
+git clone <repo-url> /root/syslog-harness
+cd /root/syslog-harness
+```
+
+### 2. Update docker-compose.yml for Production
+- Change `REDIS_HOST` to production Redis IP
+- Update GPU endpoint env vars if IPs change
+- Add volume mounts for persistence
+
+### 3. Build & Deploy
+```bash
+# Build images
+docker compose build
+
+# Start services
+docker compose up -d
+
+# Verify health
+curl http://localhost:8091/health
+curl http://localhost:3001/api/status
+```
+
+### 4. Configure Nginx
+- Copy `/etc/nginx/conf.d/gpu-router.conf` to production CT
+- Update upstream IPs if needed
+- Test and reload
+
+### 5. DNS / Routing Update
+- Point agent traffic to new CT IP
+- Update Hermes config `inference_api_url`
+- Test agent routing
+
+### 6. Verification Checklist
+- [ ] Queue service health check passes
+- [ ] Dashboard API returns GPU health
+- [ ] Nginx routes to correct GPU based on header
+- [ ] Circuit breaker triggers on excess load
+- [ ] Queue fallback works when GPUs down
+- [ ] Agent requests reach correct model
+
+## Rollback Plan
+- Keep CT 114 running as backup
+- Revert DNS/routing to .123 if issues
+- Docker containers can be stopped/started instantly
+
+---
+*Created: May 15, 2026*
+*Status: Development verified, ready for production migration*

From a28b3a557da45a9804728d67da92bbea674c2d71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:33 +0000
Subject: [PATCH 06/10] Add Nginx router config

---
 gpu-router.conf | 106 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 gpu-router.conf

diff --git a/gpu-router.conf b/gpu-router.conf
new file mode 100644
index 0000000..cc07223
--- /dev/null
+++ b/gpu-router.conf
@@ -0,0 +1,106 @@
+## Syslog GPU Router — Nginx Configuration
+## Routes incoming agent requests to the appropriate GPU backend
+## based on the X-Syslog-Model header.
+
+upstream amdpve_pool {
+    ## Strix Halo 395 — qwen3.6-35B-A3B (MoE) — Default workhorse
+    server 192.168.68.15:8080;
+}
+
+upstream llmgpu_pool {
+    ## RTX 3090 — qwen3.5-27B (Dense) — Heavy reasoning
+    server 192.168.68.8:8080;
+}
+
+upstream ocu_llm_pool {
+    ## RTX 5070 — gemma-4 (Dense 4B) — Ultra-light tasks
+    server 192.168.68.110:8080;
+}
+
+upstream queue_service {
+    ## Agent queue with circuit breaker (Docker container)
+    server 127.0.0.1:8091;
+}
+
+upstream dashboard_service {
+    ## Harness dashboard (Docker container)
+    server 127.0.0.1:3001;
+}
+
+## ------------------------------------------------------------------
+## Mapping: X-Syslog-Model header → upstream backend
+## ------------------------------------------------------------------
+map $http_x_syslog_model $gpu_upstream {
+    default          amdpve_pool;   # missing header → default workhorse
+    "standard"       amdpve_pool;
+    "heavy"          llmgpu_pool;
+    "qwen3.5-27B"    llmgpu_pool;
+    "light"          ocu_llm_pool;
+    "gemma-4"        ocu_llm_pool;
+}
+
+server {
+    listen 8080;
+    server_name _;
+
+    # Rate limit zone — 10 req/s per IP, burst of 20
+    limit_req_zone $binary_remote_addr zone=perip:10m rate=10r/s;
+
+    ## ------------------------------------------------------------------
+    ## Dashboard — observability UI (MUST be before / catch-all)
+    ## ------------------------------------------------------------------
+    location /dashboard {
+        proxy_pass http://dashboard_service/;
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+    }
+
+    ## ------------------------------------------------------------------
+    ## Main location — proxy to selected upstream
+    ## ------------------------------------------------------------------
+    location / {
+        limit_req zone=perip burst=20 nodelay;
+        limit_req_status 503;
+        proxy_pass http://$gpu_upstream;
+
+        ## Preserve original host and headers
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        ## Pass through the model header so backends can log it
+        proxy_pass_header X-Syslog-Model;
+
+        ## Streaming support (SSE for LLM responses)
+        proxy_buffering off;
+        proxy_cache     off;
+        proxy_read_timeout  300s;
+        proxy_send_timeout  300s;
+
+        ## Basic failover — retry on error or timeout
+        proxy_next_upstream error timeout http_502 http_503;
+        proxy_next_upstream_tries 2;
+
+        ## Add a response header for observability
+        add_header X-Routed-To $gpu_upstream always;
+
+        ## Fallback to queue when all GPU upstreams are down
+        error_page 502 503 504 = @queue_fallback;
+    }
+
+    ## ------------------------------------------------------------------
+    ## Queue fallback — enqueue when GPUs are unavailable
+    ## ------------------------------------------------------------------
+    location @queue_fallback {
+        rewrite ^ /enqueue break;
+        proxy_pass http://queue_service;
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_set_header Content-Type      $content_type;
+        proxy_pass_request_body            on;
+    }
+}

From 37f7c95b05c5db2164f1b44fe822d55fd7bb1ff1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:07:34 +0000
Subject: [PATCH 07/10] Add env example

---
 .env.example | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..528aa95
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,8 @@
+# Syslog Harness Environment
+REDIS_HOST=192.168.68.8
+REDIS_PORT=6379
+AMDPVE_ENDPOINT=http://192.168.68.15:8080
+LLMGPU_ENDPOINT=http://192.168.68.8:8080
+OCU_LLM_ENDPOINT=http://192.168.68.110:8080
+CIRCUIT_BREAKER_THRESHOLD=5
+CIRCUIT_BREAKER_TIMEOUT=30

From 7d00bbec0e3cce31bfd295fcb0936b7af72150f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:34:49 +0000
Subject: [PATCH 08/10] Add Dockerfile.queue

---
 Dockerfile.queue | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 Dockerfile.queue

diff --git a/Dockerfile.queue b/Dockerfile.queue
new file mode 100644
index 0000000..e69de29

From cf7f61650fdaf558098ea83e5bc0f4e545d95278 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:34:52 +0000
Subject: [PATCH 09/10] Add Dockerfile.dashboard

---
 Dockerfile.dashboard | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 Dockerfile.dashboard

diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard
new file mode 100644
index 0000000..e69de29

From b65ea22765fcff6cd96028eedc6c781be1db6125 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mumuni=20=F0=9F=A6=85=20=28Syslog=20Falcon=29?=
 <bot@sysloggh.com>
Date: Fri, 15 May 2026 21:35:13 +0000
Subject: [PATCH 10/10] Update Nginx Docker config

---
 gpu-router-docker.conf | 106 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 gpu-router-docker.conf

diff --git a/gpu-router-docker.conf b/gpu-router-docker.conf
new file mode 100644
index 0000000..10e930f
--- /dev/null
+++ b/gpu-router-docker.conf
@@ -0,0 +1,106 @@
+## Syslog GPU Router — Nginx Configuration (Docker-internal)
+## Routes incoming agent requests to the appropriate GPU backend
+## based on the X-Syslog-Model header.
+
+upstream amdpve_pool {
+    ## Strix Halo 395 — qwen3.6-35B-A3B (MoE) — Default workhorse
+    server 192.168.68.15:8080;
+}
+
+upstream llmgpu_pool {
+    ## RTX 3090 — qwen3.5-27B (Dense) — Heavy reasoning
+    server 192.168.68.8:8080;
+}
+
+upstream ocu_llm_pool {
+    ## RTX 5070 — gemma-4 (Dense 4B) — Ultra-light tasks
+    server 192.168.68.110:8080;
+}
+
+upstream queue_service {
+    ## Agent queue with circuit breaker (Docker container)
+    server queue-service:8091;
+}
+
+upstream dashboard_service {
+    ## Harness dashboard (Docker container)
+    server dashboard:3001;
+}
+
+## ------------------------------------------------------------------
+## Mapping: X-Syslog-Model header → upstream backend
+## ------------------------------------------------------------------
+map $http_x_syslog_model $gpu_upstream {
+    default          amdpve_pool;
+    "standard"       amdpve_pool;
+    "heavy"          llmgpu_pool;
+    "qwen3.5-27B"    llmgpu_pool;
+    "light"          ocu_llm_pool;
+    "gemma-4"        ocu_llm_pool;
+}
+
+## Rate limit zone — 10 req/s per IP, burst of 20
+limit_req_zone $binary_remote_addr zone=perip:10m rate=10r/s;
+
+server {
+    listen 80;
+    server_name _;
+
+    ## ------------------------------------------------------------------
+    ## Dashboard — observability UI (MUST be before / catch-all)
+    ## ------------------------------------------------------------------
+    location /dashboard {
+        proxy_pass http://dashboard_service/;
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+    }
+
+    ## ------------------------------------------------------------------
+    ## Main location — proxy to selected upstream
+    ## ------------------------------------------------------------------
+    location / {
+        limit_req zone=perip burst=20 nodelay;
+        limit_req_status 503;
+        proxy_pass http://$gpu_upstream;
+
+        ## Preserve original host and headers
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        ## Pass through the model header so backends can log it
+        proxy_pass_header X-Syslog-Model;
+
+        ## Streaming support (SSE for LLM responses)
+        proxy_buffering off;
+        proxy_cache     off;
+        proxy_read_timeout  300s;
+        proxy_send_timeout  300s;
+
+        ## Basic failover — retry on error or timeout
+        proxy_next_upstream error timeout http_502 http_503;
+        proxy_next_upstream_tries 2;
+
+        ## Add a response header for observability
+        add_header X-Routed-To $gpu_upstream always;
+
+        ## Fallback to queue when all GPU upstreams are down
+        error_page 502 503 504 = @queue_fallback;
+    }
+
+    ## ------------------------------------------------------------------
+    ## Queue fallback — enqueue when GPUs are unavailable
+    ## ------------------------------------------------------------------
+    location @queue_fallback {
+        rewrite ^ /enqueue break;
+        proxy_pass http://queue_service;
+        proxy_set_header Host              $host;
+        proxy_set_header X-Real-IP         $remote_addr;
+        proxy_set_header X-Forwarded-For   $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_set_header Content-Type      $content_type;
+        proxy_pass_request_body            on;
+    }
+}