feat: abstract base class for model backends
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
48
kischdle/llmux/llmux/backends/base.py
Normal file
48
kischdle/llmux/llmux/backends/base.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import AsyncIterator
|
||||
|
||||
|
||||
class BaseBackend(ABC):
|
||||
"""Abstract base for all model backends."""
|
||||
|
||||
@abstractmethod
|
||||
async def load(self, model_id: str, **kwargs) -> None:
|
||||
"""Load model weights into GPU VRAM.
|
||||
|
||||
Backends accept optional kwargs:
|
||||
- device: "cuda" or "cpu" (transformers backends, chatterbox)
|
||||
- n_gpu_layers: int (llamacpp backend, -1=all GPU, 0=CPU only)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def unload(self, model_id: str) -> None:
|
||||
"""Unload model weights from GPU VRAM."""
|
||||
|
||||
@abstractmethod
|
||||
async def generate(
|
||||
self,
|
||||
model_id: str,
|
||||
messages: list[dict],
|
||||
params: dict,
|
||||
stream: bool = False,
|
||||
tools: list[dict] | None = None,
|
||||
) -> AsyncIterator[str] | dict:
|
||||
"""Run chat inference. Returns full response dict or async iterator of SSE chunks."""
|
||||
|
||||
async def transcribe(
|
||||
self,
|
||||
model_id: str,
|
||||
audio_data: bytes,
|
||||
language: str = "en",
|
||||
) -> dict:
|
||||
"""Transcribe audio. Only implemented by ASR backends."""
|
||||
raise NotImplementedError(f"{self.__class__.__name__} does not support transcription")
|
||||
|
||||
async def synthesize(
|
||||
self,
|
||||
model_id: str,
|
||||
text: str,
|
||||
voice: str = "default",
|
||||
) -> bytes:
|
||||
"""Synthesize speech. Only implemented by TTS backends."""
|
||||
raise NotImplementedError(f"{self.__class__.__name__} does not support speech synthesis")
|
||||
Reference in New Issue
Block a user