FROM docker.io/pytorch/pytorch:2.11.0-cuda12.8-cudnn9-runtime # System dependencies for audio processing RUN apt-get update && apt-get install -y --no-install-recommends \ libsndfile1 \ ffmpeg \ && rm -rf /var/lib/apt/lists/* # Install Python dependencies COPY requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt # llama-cpp-python needs CUDA build RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install --no-cache-dir --force-reinstall llama-cpp-python>=0.3.0 # Copy application code COPY llmux/ /app/llmux/ WORKDIR /app # Run the server EXPOSE 8081 CMD ["uvicorn", "llmux.main:app", "--host", "0.0.0.0", "--port", "8081"]