fix: resolve GGUF paths through HF cache, add model_id to GGUF config

llama-cpp-python backend now uses huggingface_hub to resolve GGUF file paths within the HF cache structure instead of assuming flat /models/ directory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 21:33:36 +02:00
parent 38e1523d7e
commit f24a225baf
2 changed files with 16 additions and 4 deletions
--- a/kischdle/llmux/config/models.yaml
+++ b/kischdle/llmux/config/models.yaml
@@ -10,6 +10,7 @@ physical_models:
  qwen3.5-9b-fp8-uncensored:
    type: llm
    backend: llamacpp
+    model_id: "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive"
    model_file: "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf"
    mmproj_file: "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
    estimated_vram_gb: 9