Files
DesTEngSsv006_swd/kischdle/llmux/tests/test_model_registry.py
tlg 3edc055299 fix: Open WebUI integration — Harmony stripping, VRAM eviction, concurrency lock
- Add harmony.py: strip GPT-OSS-20B analysis/thinking channel from both
  streaming and non-streaming responses (HarmonyStreamFilter + extract_final_text)
- Add per-model asyncio.Lock in llamacpp backend to prevent concurrent C++
  access that caused container segfaults (exit 139)
- Fix chat handler swap for streaming: move inside _stream_generate within
  lock scope (was broken by try/finally running before stream was consumed)
- Filter /v1/models to return only LLM models (hide ASR/TTS from chat dropdown)
- Correct Qwen3.5-4B estimated_vram_gb: 4 → 9 (actual allocation ~8GB)
- Add GPU memory verification after eviction with retry loop in vram_manager
- Add HF_TOKEN_PATH support in main.py for gated model access
- Add /v1/audio/models and /v1/audio/voices discovery endpoints (no auth)
- Add OOM error handling in both backends and chat route
- Add AUDIO_STT_SUPPORTED_CONTENT_TYPES for webm/wav/mp3/ogg
- Add performance test script (scripts/perf_test.py)
- Update tests to match current config (42 tests pass)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:50:39 +02:00

67 lines
2.0 KiB
Python

import pytest
from llmux.model_registry import ModelRegistry
@pytest.fixture
def registry():
return ModelRegistry.from_config()
def test_list_virtual_models(registry):
models = registry.list_virtual_models()
assert len(models) == 12 # only LLM models, not ASR/TTS
names = [m["id"] for m in models]
assert "Qwen3.5-9B-FP8-Thinking" in names
assert "GPT-OSS-20B-High" in names
assert "cohere-transcribe" not in names
assert "Chatterbox-Multilingual" not in names
def test_virtual_model_openai_format(registry):
models = registry.list_virtual_models()
m = next(m for m in models if m["id"] == "Qwen3.5-9B-FP8-Thinking")
assert m["object"] == "model"
assert m["owned_by"] == "llmux"
def test_resolve_virtual_to_physical(registry):
physical_id, physical, params = registry.resolve("Qwen3.5-9B-FP8-Thinking")
assert physical_id == "qwen3.5-9b-fp8"
assert physical.backend == "llamacpp"
assert params == {"enable_thinking": True}
def test_resolve_instruct_variant(registry):
physical_id, physical, params = registry.resolve("Qwen3.5-9B-FP8-Instruct")
assert physical_id == "qwen3.5-9b-fp8"
assert params == {"enable_thinking": False}
def test_resolve_gpt_oss_reasoning(registry):
physical_id, physical, params = registry.resolve("GPT-OSS-20B-Medium")
assert physical_id == "gpt-oss-20b"
assert params == {"system_prompt_prefix": "Reasoning: medium"}
def test_resolve_same_physical_for_variants(registry):
pid1, _, _ = registry.resolve("Qwen3.5-9B-FP8-Thinking")
pid2, _, _ = registry.resolve("Qwen3.5-9B-FP8-Instruct")
assert pid1 == pid2
def test_resolve_unknown_model_raises(registry):
with pytest.raises(KeyError):
registry.resolve("nonexistent-model")
def test_get_physical(registry):
physical = registry.get_physical("qwen3.5-9b-fp8")
assert physical.type == "llm"
assert physical.estimated_vram_gb == 10
def test_get_physical_unknown_raises(registry):
with pytest.raises(KeyError):
registry.get_physical("nonexistent")