diff --git a/router/router.py b/router/router.py index c3674fb..1dcd4e9 100644 --- a/router/router.py +++ b/router/router.py @@ -299,6 +299,38 @@ def chat(): @app.route("/v1/models") def models(): return jsonify({"object":"list","data":[{"id":m,"object":"model","owned_by":"syslog","status":check_gpu_health(m).get("status"),"gpu":check_gpu_health(m).get("gpu_name")} for m in GPU_URLS]}) +@app.route("/v1/props") +def props(): + """Ollama-compatible model properties endpoint.""" + props = {} + for m in GPU_URLS: + h = check_gpu_health(m) + props[m] = { + "status": h.get("status", "unknown"), + "gpu": h.get("gpu_name", m), + "max_concurrent": GPU_MAX_CONCURRENT.get(m, 1), + "active_requests": gpu_active_count(m), + "vram_used_mb": h.get("vram_used_mb"), + "vram_total_mb": h.get("vram_total_mb"), + } + return jsonify({"models": props}) + +@app.route("/v1/models/") +def model_detail(model_id): + """Single model detail — Ollama-compatible.""" + if model_id in GPU_URLS: + h = check_gpu_health(model_id) + return jsonify({ + "id": model_id, + "object": "model", + "owned_by": "syslog", + "status": h.get("status"), + "gpu": h.get("gpu_name"), + "max_concurrent": GPU_MAX_CONCURRENT.get(model_id, 1), + "active_requests": gpu_active_count(model_id), + }) + return jsonify({"error": "model not found"}), 404 + @app.route("/health") def health(): gpus = {}