FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime

# System dependencies for audio processing
RUN apt-get update && apt-get install -y --no-install-recommends \
    libsndfile1 \
    ffmpeg \
    && rm -rf /var/lib/apt/lists/*

# Install Python dependencies (torch, numpy, pyyaml already in base image)
RUN pip install --no-cache-dir --break-system-packages \
    "fastapi>=0.115.0" \
    "uvicorn[standard]>=0.34.0" \
    "python-multipart>=0.0.18" \
    "transformers>=5.4.0" \
    "chatterbox-tts>=0.1.0" \
    "soundfile>=0.12.0" \
    "sentencepiece>=0.2.0" \
    "protobuf>=5.0.0"

# llama-cpp-python needs separate CUDA build
RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --break-system-packages \
    "llama-cpp-python>=0.3.0"

# Copy application code
COPY llmux/ /app/llmux/
WORKDIR /app

# Run the server
EXPOSE 8081
CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"]