From 449e37d318a352e4aeae8b09fa8f3af6897f5bb063361070c7c4d8bded8c92ac Mon Sep 17 00:00:00 2001 From: tlg Date: Sat, 4 Apr 2026 09:29:35 +0200 Subject: [PATCH] feat: abstract base class for model backends Co-Authored-By: Claude Opus 4.6 (1M context) --- kischdle/llmux/llmux/backends/base.py | 48 +++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 kischdle/llmux/llmux/backends/base.py diff --git a/kischdle/llmux/llmux/backends/base.py b/kischdle/llmux/llmux/backends/base.py new file mode 100644 index 0000000..d93ef74 --- /dev/null +++ b/kischdle/llmux/llmux/backends/base.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +from typing import AsyncIterator + + +class BaseBackend(ABC): + """Abstract base for all model backends.""" + + @abstractmethod + async def load(self, model_id: str, **kwargs) -> None: + """Load model weights into GPU VRAM. + + Backends accept optional kwargs: + - device: "cuda" or "cpu" (transformers backends, chatterbox) + - n_gpu_layers: int (llamacpp backend, -1=all GPU, 0=CPU only) + """ + + @abstractmethod + async def unload(self, model_id: str) -> None: + """Unload model weights from GPU VRAM.""" + + @abstractmethod + async def generate( + self, + model_id: str, + messages: list[dict], + params: dict, + stream: bool = False, + tools: list[dict] | None = None, + ) -> AsyncIterator[str] | dict: + """Run chat inference. Returns full response dict or async iterator of SSE chunks.""" + + async def transcribe( + self, + model_id: str, + audio_data: bytes, + language: str = "en", + ) -> dict: + """Transcribe audio. Only implemented by ASR backends.""" + raise NotImplementedError(f"{self.__class__.__name__} does not support transcription") + + async def synthesize( + self, + model_id: str, + text: str, + voice: str = "default", + ) -> bytes: + """Synthesize speech. Only implemented by TTS backends.""" + raise NotImplementedError(f"{self.__class__.__name__} does not support speech synthesis")