May 19, 2026: Full harness update
- Model migration: gemma-4-E4B → qwen3.5-9b-vlm - Dashboard reorder: Usage Over Time + GPU Metrics to top - Router counter leak fix (gpu_decr in except handler) - VLM slot upgrade 1→2 - Automated maintenance cron job - LiteLLM config update
This commit is contained in:
@@ -0,0 +1,80 @@
|
||||
# Add time-series tracking and endpoint to router
|
||||
with open('/opt/inference-harness/router/router.py') as f:
|
||||
code = f.read()
|
||||
|
||||
# Add time-series tracking in the chat handler (after Redis incr)
|
||||
old_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
|
||||
r.lpush('routes:recent', json.dumps'''
|
||||
new_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
|
||||
# Time-series: hourly bucket
|
||||
hour_key = 'ts:'+model+':'+time.strftime('%Y%m%d%H')
|
||||
r.incr(hour_key)
|
||||
r.expire(hour_key, 86400*31) # keep 31 days
|
||||
r.lpush('routes:recent', json.dumps'''
|
||||
|
||||
code = code.replace(old_track, new_track)
|
||||
|
||||
# Add /metrics/timeseries endpoint before if __name__
|
||||
ts_endpoint = '''
|
||||
@app.route('/metrics/timeseries')
|
||||
def metrics_timeseries():
|
||||
period = request.args.get('period', 'day')
|
||||
models = list(GPU_URLS.keys())
|
||||
data = {'models': {}, 'labels': []}
|
||||
|
||||
if period == 'day':
|
||||
# Last 24 hours, hourly buckets
|
||||
buckets = []
|
||||
for h in range(23, -1, -1):
|
||||
t = time.time() - h * 3600
|
||||
buckets.append(time.strftime('%Y%m%d%H', time.gmtime(t)))
|
||||
data['labels'] = [time.strftime('%H:00', time.gmtime(time.time() - h*3600)) for h in range(23, -1, -1)]
|
||||
elif period == 'week':
|
||||
# Last 7 days, daily buckets
|
||||
buckets = []
|
||||
for d in range(6, -1, -1):
|
||||
t = time.time() - d * 86400
|
||||
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
|
||||
data['labels'] = [time.strftime('%a', time.gmtime(time.time() - d*86400)) for d in range(6, -1, -1)]
|
||||
else:
|
||||
# Month — last 30 days, 3-day buckets
|
||||
buckets = []
|
||||
for d in range(29, -1, -3):
|
||||
t = time.time() - d * 86400
|
||||
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
|
||||
data['labels'] = [time.strftime('%m/%d', time.gmtime(time.time() - d*86400)) for d in range(29, -1, -3)]
|
||||
|
||||
if r:
|
||||
for model in models:
|
||||
counts = []
|
||||
for bucket in buckets:
|
||||
if period == 'month':
|
||||
# Sum 3 consecutive days per bucket
|
||||
total = 0
|
||||
base = time.strptime(bucket, '%Y%m%d')
|
||||
for offset in range(3):
|
||||
d = time.strftime('%Y%m%d', time.gmtime(time.mktime(base) + offset*86400))
|
||||
total += int(r.get('ts:'+model+':'+d) or 0)
|
||||
# Also check hourly keys for today
|
||||
for hh in range(24):
|
||||
total += int(r.get('ts:'+model+':'+d+'{:02d}'.format(hh)) or 0)
|
||||
counts.append(total)
|
||||
else:
|
||||
key = 'ts:'+model+':'+bucket
|
||||
if period == 'week':
|
||||
# Sum all hours in the day
|
||||
total = sum(int(r.get(key+'{:02d}'.format(h)) or 0) for h in range(24))
|
||||
else:
|
||||
total = int(r.get(key) or 0)
|
||||
counts.append(total)
|
||||
data['models'][model] = counts
|
||||
|
||||
return jsonify(data)
|
||||
'''
|
||||
|
||||
# Insert before if __name__
|
||||
code = code.replace(if __name__ == __main__:, ts_endpoint + nif __name__ == __main__:)
|
||||
|
||||
with open('/opt/inference-harness/router/router.py', 'w') as f:
|
||||
f.write(code)
|
||||
print('Time-series tracking and endpoint added')
|
||||
Reference in New Issue
Block a user