fix: add /v1/props and /v1/models/<id> endpoints
This commit is contained in:
@@ -299,6 +299,38 @@ def chat():
|
|||||||
@app.route("/v1/models")
|
@app.route("/v1/models")
|
||||||
def models(): return jsonify({"object":"list","data":[{"id":m,"object":"model","owned_by":"syslog","status":check_gpu_health(m).get("status"),"gpu":check_gpu_health(m).get("gpu_name")} for m in GPU_URLS]})
|
def models(): return jsonify({"object":"list","data":[{"id":m,"object":"model","owned_by":"syslog","status":check_gpu_health(m).get("status"),"gpu":check_gpu_health(m).get("gpu_name")} for m in GPU_URLS]})
|
||||||
|
|
||||||
|
@app.route("/v1/props")
|
||||||
|
def props():
|
||||||
|
"""Ollama-compatible model properties endpoint."""
|
||||||
|
props = {}
|
||||||
|
for m in GPU_URLS:
|
||||||
|
h = check_gpu_health(m)
|
||||||
|
props[m] = {
|
||||||
|
"status": h.get("status", "unknown"),
|
||||||
|
"gpu": h.get("gpu_name", m),
|
||||||
|
"max_concurrent": GPU_MAX_CONCURRENT.get(m, 1),
|
||||||
|
"active_requests": gpu_active_count(m),
|
||||||
|
"vram_used_mb": h.get("vram_used_mb"),
|
||||||
|
"vram_total_mb": h.get("vram_total_mb"),
|
||||||
|
}
|
||||||
|
return jsonify({"models": props})
|
||||||
|
|
||||||
|
@app.route("/v1/models/<model_id>")
|
||||||
|
def model_detail(model_id):
|
||||||
|
"""Single model detail — Ollama-compatible."""
|
||||||
|
if model_id in GPU_URLS:
|
||||||
|
h = check_gpu_health(model_id)
|
||||||
|
return jsonify({
|
||||||
|
"id": model_id,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "syslog",
|
||||||
|
"status": h.get("status"),
|
||||||
|
"gpu": h.get("gpu_name"),
|
||||||
|
"max_concurrent": GPU_MAX_CONCURRENT.get(model_id, 1),
|
||||||
|
"active_requests": gpu_active_count(model_id),
|
||||||
|
})
|
||||||
|
return jsonify({"error": "model not found"}), 404
|
||||||
|
|
||||||
@app.route("/health")
|
@app.route("/health")
|
||||||
def health():
|
def health():
|
||||||
gpus = {}
|
gpus = {}
|
||||||
|
|||||||
Reference in New Issue
Block a user