feat: project scaffolding with config files and test fixtures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
tlg
2026-04-04 07:23:14 +02:00
parent cf7c77b3b5
commit a64f32b590
8 changed files with 159 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
api_keys:
- key: "sk-llmux-openwebui-hMD6pAka1czM53MtTkmmlFP8tF5zuiiDRgt-PCBnj-c"
name: "Open WebUI"
- key: "sk-llmux-whisper-ReHko1u-VpVHFbMANyhYLY2Oseswu2gSyKQR32gSyMY"
name: "Remote Whisper clients"
- key: "sk-llmux-opencode-PUqKAAtevYfUsKtjawqb3tKaLvT-DHZZBKJHwGZIvmo"
name: "OpenCode"

View File

@@ -0,0 +1,117 @@
physical_models:
qwen3.5-9b-fp8:
type: llm
backend: transformers
model_id: "lovedheart/Qwen3.5-9B-FP8"
estimated_vram_gb: 9
supports_vision: true
supports_tools: true
qwen3.5-9b-fp8-uncensored:
type: llm
backend: llamacpp
model_file: "Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q8_0.gguf"
mmproj_file: "mmproj-Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-BF16.gguf"
estimated_vram_gb: 9
supports_vision: true
supports_tools: true
qwen3.5-4b:
type: llm
backend: transformers
model_id: "Qwen/Qwen3.5-4B"
estimated_vram_gb: 4
supports_vision: true
supports_tools: true
gpt-oss-20b:
type: llm
backend: transformers
model_id: "openai/gpt-oss-20b"
estimated_vram_gb: 13
supports_vision: false
supports_tools: true
gpt-oss-20b-uncensored:
type: llm
backend: transformers
model_id: "aoxo/gpt-oss-20b-uncensored"
estimated_vram_gb: 13
supports_vision: false
supports_tools: true
cohere-transcribe:
type: asr
backend: transformers
model_id: "CohereLabs/cohere-transcribe-03-2026"
estimated_vram_gb: 4
default_language: "en"
chatterbox-turbo:
type: tts
backend: chatterbox
variant: "turbo"
estimated_vram_gb: 2
chatterbox-multilingual:
type: tts
backend: chatterbox
variant: "multilingual"
estimated_vram_gb: 2
chatterbox:
type: tts
backend: chatterbox
variant: "default"
estimated_vram_gb: 2
virtual_models:
Qwen3.5-9B-FP8-Thinking:
physical: qwen3.5-9b-fp8
params: { enable_thinking: true }
Qwen3.5-9B-FP8-Instruct:
physical: qwen3.5-9b-fp8
params: { enable_thinking: false }
Qwen3.5-9B-FP8-Uncensored-Thinking:
physical: qwen3.5-9b-fp8-uncensored
params: { enable_thinking: true }
Qwen3.5-9B-FP8-Uncensored-Instruct:
physical: qwen3.5-9b-fp8-uncensored
params: { enable_thinking: false }
Qwen3.5-4B-Thinking:
physical: qwen3.5-4b
params: { enable_thinking: true }
Qwen3.5-4B-Instruct:
physical: qwen3.5-4b
params: { enable_thinking: false }
GPT-OSS-20B-Low:
physical: gpt-oss-20b
params: { system_prompt_prefix: "Reasoning: low" }
GPT-OSS-20B-Medium:
physical: gpt-oss-20b
params: { system_prompt_prefix: "Reasoning: medium" }
GPT-OSS-20B-High:
physical: gpt-oss-20b
params: { system_prompt_prefix: "Reasoning: high" }
GPT-OSS-20B-Uncensored-Low:
physical: gpt-oss-20b-uncensored
params: { system_prompt_prefix: "Reasoning: low" }
GPT-OSS-20B-Uncensored-Medium:
physical: gpt-oss-20b-uncensored
params: { system_prompt_prefix: "Reasoning: medium" }
GPT-OSS-20B-Uncensored-High:
physical: gpt-oss-20b-uncensored
params: { system_prompt_prefix: "Reasoning: high" }
cohere-transcribe:
physical: cohere-transcribe
Chatterbox-Turbo:
physical: chatterbox-turbo
Chatterbox-Multilingual:
physical: chatterbox-multilingual
Chatterbox:
physical: chatterbox

View File

View File

View File

@@ -0,0 +1,24 @@
# Web framework
fastapi>=0.115.0
uvicorn[standard]>=0.34.0
python-multipart>=0.0.18
# AI runtimes
torch>=2.7.0
transformers>=5.4.0
llama-cpp-python>=0.3.0
chatterbox-tts>=0.1.0
# Audio processing
soundfile>=0.12.0
librosa>=0.10.0
# Config & utilities
pyyaml>=6.0
sentencepiece>=0.2.0
protobuf>=5.0.0
# Testing
pytest>=8.0.0
pytest-asyncio>=0.24.0
httpx>=0.28.0

View File

View File

@@ -0,0 +1,11 @@
import os
import pytest
from pathlib import Path
# Point config to the project's config directory for tests
@pytest.fixture(autouse=True)
def set_config_dir(tmp_path, monkeypatch):
"""Use the project's config files for tests by default."""
config_dir = Path(__file__).parent.parent / "config"
monkeypatch.setenv("LLMUX_CONFIG_DIR", str(config_dir))
return config_dir