feat: model registry with virtual-to-physical resolution
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
36
kischdle/llmux/llmux/model_registry.py
Normal file
36
kischdle/llmux/llmux/model_registry.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from llmux.config import PhysicalModel, VirtualModel, load_models_config
|
||||
|
||||
|
||||
class ModelRegistry:
|
||||
def __init__(
|
||||
self,
|
||||
physical: dict[str, PhysicalModel],
|
||||
virtual: dict[str, VirtualModel],
|
||||
):
|
||||
self._physical = physical
|
||||
self._virtual = virtual
|
||||
|
||||
@classmethod
|
||||
def from_config(cls) -> "ModelRegistry":
|
||||
physical, virtual = load_models_config()
|
||||
return cls(physical, virtual)
|
||||
|
||||
def list_virtual_models(self) -> list[dict]:
|
||||
return [
|
||||
{
|
||||
"id": name,
|
||||
"object": "model",
|
||||
"created": 0,
|
||||
"owned_by": "llmux",
|
||||
}
|
||||
for name in self._virtual
|
||||
]
|
||||
|
||||
def resolve(self, virtual_name: str) -> tuple[str, PhysicalModel, dict]:
|
||||
"""Resolve a virtual model name to (physical_id, PhysicalModel, params)."""
|
||||
vm = self._virtual[virtual_name] # raises KeyError if unknown
|
||||
pm = self._physical[vm.physical]
|
||||
return vm.physical, pm, dict(vm.params)
|
||||
|
||||
def get_physical(self, physical_id: str) -> PhysicalModel:
|
||||
return self._physical[physical_id] # raises KeyError if unknown
|
||||
66
kischdle/llmux/tests/test_model_registry.py
Normal file
66
kischdle/llmux/tests/test_model_registry.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pytest
|
||||
|
||||
from llmux.model_registry import ModelRegistry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def registry():
|
||||
return ModelRegistry.from_config()
|
||||
|
||||
|
||||
def test_list_virtual_models(registry):
|
||||
models = registry.list_virtual_models()
|
||||
assert len(models) == 16
|
||||
names = [m["id"] for m in models]
|
||||
assert "Qwen3.5-9B-FP8-Thinking" in names
|
||||
assert "GPT-OSS-20B-High" in names
|
||||
assert "cohere-transcribe" in names
|
||||
assert "Chatterbox-Multilingual" in names
|
||||
|
||||
|
||||
def test_virtual_model_openai_format(registry):
|
||||
models = registry.list_virtual_models()
|
||||
m = next(m for m in models if m["id"] == "Qwen3.5-9B-FP8-Thinking")
|
||||
assert m["object"] == "model"
|
||||
assert m["owned_by"] == "llmux"
|
||||
|
||||
|
||||
def test_resolve_virtual_to_physical(registry):
|
||||
physical_id, physical, params = registry.resolve("Qwen3.5-9B-FP8-Thinking")
|
||||
assert physical_id == "qwen3.5-9b-fp8"
|
||||
assert physical.backend == "transformers"
|
||||
assert params == {"enable_thinking": True}
|
||||
|
||||
|
||||
def test_resolve_instruct_variant(registry):
|
||||
physical_id, physical, params = registry.resolve("Qwen3.5-9B-FP8-Instruct")
|
||||
assert physical_id == "qwen3.5-9b-fp8"
|
||||
assert params == {"enable_thinking": False}
|
||||
|
||||
|
||||
def test_resolve_gpt_oss_reasoning(registry):
|
||||
physical_id, physical, params = registry.resolve("GPT-OSS-20B-Medium")
|
||||
assert physical_id == "gpt-oss-20b"
|
||||
assert params == {"system_prompt_prefix": "Reasoning: medium"}
|
||||
|
||||
|
||||
def test_resolve_same_physical_for_variants(registry):
|
||||
pid1, _, _ = registry.resolve("Qwen3.5-9B-FP8-Thinking")
|
||||
pid2, _, _ = registry.resolve("Qwen3.5-9B-FP8-Instruct")
|
||||
assert pid1 == pid2
|
||||
|
||||
|
||||
def test_resolve_unknown_model_raises(registry):
|
||||
with pytest.raises(KeyError):
|
||||
registry.resolve("nonexistent-model")
|
||||
|
||||
|
||||
def test_get_physical(registry):
|
||||
physical = registry.get_physical("qwen3.5-9b-fp8")
|
||||
assert physical.type == "llm"
|
||||
assert physical.estimated_vram_gb == 9
|
||||
|
||||
|
||||
def test_get_physical_unknown_raises(registry):
|
||||
with pytest.raises(KeyError):
|
||||
registry.get_physical("nonexistent")
|
||||
Reference in New Issue
Block a user