diff --git a/kischdle/llmux/llmux/backends/llamacpp.py b/kischdle/llmux/llmux/backends/llamacpp.py index f2da464..aae0991 100644 --- a/kischdle/llmux/llmux/backends/llamacpp.py +++ b/kischdle/llmux/llmux/backends/llamacpp.py @@ -38,13 +38,10 @@ class LlamaCppBackend(BaseBackend): logger.info(f"Loading GGUF model {model_path} with n_gpu_layers={n_gpu_layers}") def _load(): - from llama_cpp import GGML_TYPE_Q8_0 kwargs = { "model_path": model_path, "n_gpu_layers": n_gpu_layers, "n_ctx": 4096, - "type_k": GGML_TYPE_Q8_0, - "type_v": GGML_TYPE_Q8_0, "verbose": False, } if physical.mmproj_file: