May 19, 2026: Full harness update

- Model migration: gemma-4-E4B → qwen3.5-9b-vlm
- Dashboard reorder: Usage Over Time + GPU Metrics to top
- Router counter leak fix (gpu_decr in except handler)
- VLM slot upgrade 1→2
- Automated maintenance cron job
- LiteLLM config update
This commit is contained in:
Abiba
2026-05-19 15:03:47 +00:00
commit 28fc57c5c7
15 changed files with 1455 additions and 0 deletions
+80
View File
@@ -0,0 +1,80 @@
# Add time-series tracking and endpoint to router
with open('/opt/inference-harness/router/router.py') as f:
code = f.read()
# Add time-series tracking in the chat handler (after Redis incr)
old_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
r.lpush('routes:recent', json.dumps'''
new_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
# Time-series: hourly bucket
hour_key = 'ts:'+model+':'+time.strftime('%Y%m%d%H')
r.incr(hour_key)
r.expire(hour_key, 86400*31) # keep 31 days
r.lpush('routes:recent', json.dumps'''
code = code.replace(old_track, new_track)
# Add /metrics/timeseries endpoint before if __name__
ts_endpoint = '''
@app.route('/metrics/timeseries')
def metrics_timeseries():
period = request.args.get('period', 'day')
models = list(GPU_URLS.keys())
data = {'models': {}, 'labels': []}
if period == 'day':
# Last 24 hours, hourly buckets
buckets = []
for h in range(23, -1, -1):
t = time.time() - h * 3600
buckets.append(time.strftime('%Y%m%d%H', time.gmtime(t)))
data['labels'] = [time.strftime('%H:00', time.gmtime(time.time() - h*3600)) for h in range(23, -1, -1)]
elif period == 'week':
# Last 7 days, daily buckets
buckets = []
for d in range(6, -1, -1):
t = time.time() - d * 86400
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
data['labels'] = [time.strftime('%a', time.gmtime(time.time() - d*86400)) for d in range(6, -1, -1)]
else:
# Month — last 30 days, 3-day buckets
buckets = []
for d in range(29, -1, -3):
t = time.time() - d * 86400
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
data['labels'] = [time.strftime('%m/%d', time.gmtime(time.time() - d*86400)) for d in range(29, -1, -3)]
if r:
for model in models:
counts = []
for bucket in buckets:
if period == 'month':
# Sum 3 consecutive days per bucket
total = 0
base = time.strptime(bucket, '%Y%m%d')
for offset in range(3):
d = time.strftime('%Y%m%d', time.gmtime(time.mktime(base) + offset*86400))
total += int(r.get('ts:'+model+':'+d) or 0)
# Also check hourly keys for today
for hh in range(24):
total += int(r.get('ts:'+model+':'+d+'{:02d}'.format(hh)) or 0)
counts.append(total)
else:
key = 'ts:'+model+':'+bucket
if period == 'week':
# Sum all hours in the day
total = sum(int(r.get(key+'{:02d}'.format(h)) or 0) for h in range(24))
else:
total = int(r.get(key) or 0)
counts.append(total)
data['models'][model] = counts
return jsonify(data)
'''
# Insert before if __name__
code = code.replace(if __name__ == __main__:, ts_endpoint + nif __name__ == __main__:)
with open('/opt/inference-harness/router/router.py', 'w') as f:
f.write(code)
print('Time-series tracking and endpoint added')