28fc57c5c7
- Model migration: gemma-4-E4B → qwen3.5-9b-vlm - Dashboard reorder: Usage Over Time + GPU Metrics to top - Router counter leak fix (gpu_decr in except handler) - VLM slot upgrade 1→2 - Automated maintenance cron job - LiteLLM config update
81 lines
3.4 KiB
Python
81 lines
3.4 KiB
Python
# Add time-series tracking and endpoint to router
|
|
with open('/opt/inference-harness/router/router.py') as f:
|
|
code = f.read()
|
|
|
|
# Add time-series tracking in the chat handler (after Redis incr)
|
|
old_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
|
|
r.lpush('routes:recent', json.dumps'''
|
|
new_track = '''r.incr('routes:'+model); r.incr('routes:tier:'+tier); r.incr('routes:agent:'+agent)
|
|
# Time-series: hourly bucket
|
|
hour_key = 'ts:'+model+':'+time.strftime('%Y%m%d%H')
|
|
r.incr(hour_key)
|
|
r.expire(hour_key, 86400*31) # keep 31 days
|
|
r.lpush('routes:recent', json.dumps'''
|
|
|
|
code = code.replace(old_track, new_track)
|
|
|
|
# Add /metrics/timeseries endpoint before if __name__
|
|
ts_endpoint = '''
|
|
@app.route('/metrics/timeseries')
|
|
def metrics_timeseries():
|
|
period = request.args.get('period', 'day')
|
|
models = list(GPU_URLS.keys())
|
|
data = {'models': {}, 'labels': []}
|
|
|
|
if period == 'day':
|
|
# Last 24 hours, hourly buckets
|
|
buckets = []
|
|
for h in range(23, -1, -1):
|
|
t = time.time() - h * 3600
|
|
buckets.append(time.strftime('%Y%m%d%H', time.gmtime(t)))
|
|
data['labels'] = [time.strftime('%H:00', time.gmtime(time.time() - h*3600)) for h in range(23, -1, -1)]
|
|
elif period == 'week':
|
|
# Last 7 days, daily buckets
|
|
buckets = []
|
|
for d in range(6, -1, -1):
|
|
t = time.time() - d * 86400
|
|
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
|
|
data['labels'] = [time.strftime('%a', time.gmtime(time.time() - d*86400)) for d in range(6, -1, -1)]
|
|
else:
|
|
# Month — last 30 days, 3-day buckets
|
|
buckets = []
|
|
for d in range(29, -1, -3):
|
|
t = time.time() - d * 86400
|
|
buckets.append(time.strftime('%Y%m%d', time.gmtime(t)))
|
|
data['labels'] = [time.strftime('%m/%d', time.gmtime(time.time() - d*86400)) for d in range(29, -1, -3)]
|
|
|
|
if r:
|
|
for model in models:
|
|
counts = []
|
|
for bucket in buckets:
|
|
if period == 'month':
|
|
# Sum 3 consecutive days per bucket
|
|
total = 0
|
|
base = time.strptime(bucket, '%Y%m%d')
|
|
for offset in range(3):
|
|
d = time.strftime('%Y%m%d', time.gmtime(time.mktime(base) + offset*86400))
|
|
total += int(r.get('ts:'+model+':'+d) or 0)
|
|
# Also check hourly keys for today
|
|
for hh in range(24):
|
|
total += int(r.get('ts:'+model+':'+d+'{:02d}'.format(hh)) or 0)
|
|
counts.append(total)
|
|
else:
|
|
key = 'ts:'+model+':'+bucket
|
|
if period == 'week':
|
|
# Sum all hours in the day
|
|
total = sum(int(r.get(key+'{:02d}'.format(h)) or 0) for h in range(24))
|
|
else:
|
|
total = int(r.get(key) or 0)
|
|
counts.append(total)
|
|
data['models'][model] = counts
|
|
|
|
return jsonify(data)
|
|
'''
|
|
|
|
# Insert before if __name__
|
|
code = code.replace(if __name__ == __main__:, ts_endpoint + nif __name__ == __main__:)
|
|
|
|
with open('/opt/inference-harness/router/router.py', 'w') as f:
|
|
f.write(code)
|
|
print('Time-series tracking and endpoint added')
|