revert: remove Ollama endpoints (llama.cpp uses OpenAI format, not Ollama)

2026-05-19 16:57:04 +00:00
parent beb2d1790a
commit 241de4f38c
1 changed files with 0 additions and 32 deletions
@@ -299,38 +299,6 @@ def chat():
@app.route("/v1/models")
 def models(): return jsonify({"object":"list","data":[{"id":m,"object":"model","owned_by":"syslog","status":check_gpu_health(m).get("status"),"gpu":check_gpu_health(m).get("gpu_name")} for m in GPU_URLS]})
@app.route("/v1/props")
 def props():
    """Ollama-compatible model properties endpoint."""
    props = {}
    for m in GPU_URLS:
        h = check_gpu_health(m)
        props[m] = {
            "status": h.get("status", "unknown"),
            "gpu": h.get("gpu_name", m),
            "max_concurrent": GPU_MAX_CONCURRENT.get(m, 1),
            "active_requests": gpu_active_count(m),
            "vram_used_mb": h.get("vram_used_mb"),
            "vram_total_mb": h.get("vram_total_mb"),
        }
    return jsonify({"models": props})
@app.route("/v1/models/<model_id>")
 def model_detail(model_id):
    """Single model detail — Ollama-compatible."""
    if model_id in GPU_URLS:
        h = check_gpu_health(model_id)
        return jsonify({
            "id": model_id,
            "object": "model",
            "owned_by": "syslog",
            "status": h.get("status"),
            "gpu": h.get("gpu_name"),
            "max_concurrent": GPU_MAX_CONCURRENT.get(model_id, 1),
            "active_requests": gpu_active_count(model_id),
        })
    return jsonify({"error": "model not found"}), 404
@app.route("/health")
 def health():
    gpus = {}