feat: replace gpt-oss-20b-uncensored with HauhauCS MXFP4 GGUF

aoxo model had no quantization (BF16, ~40GB OOM). HauhauCS model uses MXFP4 GGUF format, loads at 11.9GB via llama-cpp backend. All three reasoning levels (Low/Medium/High) work. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 16:41:41 +02:00
parent 7c4bbe0b29
commit 61308703dc
1 changed files with 3 additions and 2 deletions
--- a/kischdle/llmux/config/models.yaml
+++ b/kischdle/llmux/config/models.yaml
@@ -36,8 +36,9 @@ physical_models:

  gpt-oss-20b-uncensored:
    type: llm
-    backend: transformers
-    model_id: "aoxo/gpt-oss-20b-uncensored"
+    backend: llamacpp
+    model_id: "HauhauCS/GPT-OSS-20B-Uncensored-HauhauCS-Aggressive"
+    model_file: "GPT-OSS-20B-Uncensored-HauhauCS-MXFP4-Aggressive.gguf"
    estimated_vram_gb: 13
    supports_vision: false
    supports_tools: true