From 241de4f38cb370a9a848b3d50434ca64588a30f5 Mon Sep 17 00:00:00 2001 From: Abiba Date: Tue, 19 May 2026 16:57:04 +0000 Subject: [PATCH] revert: remove Ollama endpoints (llama.cpp uses OpenAI format, not Ollama) --- router/router.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/router/router.py b/router/router.py index 1dcd4e9..c3674fb 100644 --- a/router/router.py +++ b/router/router.py @@ -299,38 +299,6 @@ def chat(): @app.route("/v1/models") def models(): return jsonify({"object":"list","data":[{"id":m,"object":"model","owned_by":"syslog","status":check_gpu_health(m).get("status"),"gpu":check_gpu_health(m).get("gpu_name")} for m in GPU_URLS]}) -@app.route("/v1/props") -def props(): - """Ollama-compatible model properties endpoint.""" - props = {} - for m in GPU_URLS: - h = check_gpu_health(m) - props[m] = { - "status": h.get("status", "unknown"), - "gpu": h.get("gpu_name", m), - "max_concurrent": GPU_MAX_CONCURRENT.get(m, 1), - "active_requests": gpu_active_count(m), - "vram_used_mb": h.get("vram_used_mb"), - "vram_total_mb": h.get("vram_total_mb"), - } - return jsonify({"models": props}) - -@app.route("/v1/models/") -def model_detail(model_id): - """Single model detail — Ollama-compatible.""" - if model_id in GPU_URLS: - h = check_gpu_health(model_id) - return jsonify({ - "id": model_id, - "object": "model", - "owned_by": "syslog", - "status": h.get("status"), - "gpu": h.get("gpu_name"), - "max_concurrent": GPU_MAX_CONCURRENT.get(model_id, 1), - "active_requests": gpu_active_count(model_id), - }) - return jsonify({"error": "model not found"}), 404 - @app.route("/health") def health(): gpus = {}