Pod and python for text embedding with colnomic-embed-multimodal-7b.py

2025-11-21 17:01:49 +01:00
commit ad39e7c5c7
2 changed files with 380 additions and 0 deletions
--- a/bin/create_pod_pytorch.sh
+++ b/bin/create_pod_pytorch.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+
+# To be run by user llm to create the pod and container with
+# PyTorch + HTTP API, to pull ColNomic embedding model if missing and
+# to create the systemd service
+
+set -e
+
+# Environment variables
+POD_NAME='pytorch_pod'
+CTR_NAME='pytorch_ctr'
+# NVIDIA NGC PyTorch container with CUDA 13.0 (25.08 release)
+BASE_IMAGE='nvcr.io/nvidia/pytorch:25.08-py3'
+CUSTOM_IMAGE='localhost/pytorch-api:25.08-cuda13.0'
+HF_MODEL_ID='nomic-ai/colnomic-embed-multimodal-7b'
+HF_MODEL_URL='https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b'
+HOST_LOCAL_IP='127.0.0.1'
+PYTORCH_HOST_PORT='8086'
+PYTORCH_CONTAINER_PORT='8000'
+BIND_DIR="$HOME/.local/share/$POD_NAME"
+AI_MODELS_DIR="$BIND_DIR/ai-models"
+PYTHON_APPS_DIR="$BIND_DIR/python-apps"
+USER_SYSTEMD_DIR="$HOME/.config/systemd/user"
+CONTAINERFILE="$BIND_DIR/containerfile"
+PY_APP="$PYTHON_APPS_DIR/colnomic-embed-multimodal-7b.py"
+
+# Prepare directories
+mkdir -p "$AI_MODELS_DIR" "$PYTHON_APPS_DIR" "$USER_SYSTEMD_DIR"
+
+# Generate containerfile
+cat >"$CONTAINERFILE" <<'EOF'
+# Containerfile for PyTorch + FastAPI + ColPali (ColNomic embed model support)
+
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+# Hugging Face caches and Python apps directory (bind-mounted at runtime)
+ENV HF_HOME=/models/hf \
+    TRANSFORMERS_CACHE=/models/hf/transformers \
+    PYTHON_APPS_DIR=/python-apps
+
+# Ensure directories exist
+RUN mkdir -p /models/hf/transformers /python-apps
+
+# Install git (for colpali) and clean apt lists
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends git && \
+    rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip and install runtime dependencies:
+# - fastapi, uvicorn for the HTTP API
+# - transformers, accelerate, peft for HF + ColPali ecosystem
+# - flash-attn to provide FlashAttention-2 kernels
+# - colpali pinned to specific commit, installed WITHOUT deps to avoid
+#   overriding the PyTorch provided by the base image.
+RUN python -m pip install --upgrade pip && \
+    python -m pip install --no-cache-dir \
+        fastapi \
+        "uvicorn[standard]" \
+        transformers \
+        accelerate \
+        peft && \
+    python -m pip install --no-cache-dir flash-attn --no-build-isolation && \
+    python -m pip install --no-cache-dir --no-deps \
+        "git+https://github.com/illuin-tech/colpali.git@97e389a" && \
+    python -m pip cache purge
+
+# Make /python-apps importable by default
+ENV PYTHONPATH=/python-apps:${PYTHONPATH}
+
+WORKDIR /workspace
+
+# Default command can be overridden by podman run.
+CMD ["bash"]
+EOF
+
+# Build custom container image
+podman build \
+  --build-arg BASE_IMAGE="$BASE_IMAGE" \
+  -t "$CUSTOM_IMAGE" \
+  -f "$CONTAINERFILE" \
+  "$(dirname "$CONTAINERFILE")"
+
+# Create pod if not yet existing
+if ! podman pod exists "$POD_NAME"; then
+  podman pod create -n "$POD_NAME" \
+    -p "$HOST_LOCAL_IP:$PYTORCH_HOST_PORT:$PYTORCH_CONTAINER_PORT"
+  echo "Pod '$POD_NAME' created (rc=$?)"
+else
+  echo "Pod '$POD_NAME' already exists."
+fi
+
+# PyTorch + HTTP API container
+# Remove old container
+podman rm -f "$CTR_NAME"
+# New container
+podman run -d --name "$CTR_NAME" --pod "$POD_NAME" \
+  --device nvidia.com/gpu=all \
+  -e HF_MODEL_ID="$HF_MODEL_ID" \
+  -e HF_MODEL_URL="$HF_MODEL_URL" \
+  -e PYTORCH_CONTAINER_PORT="$PYTORCH_CONTAINER_PORT" \
+  -v "$AI_MODELS_DIR":/models \
+  -v "$PYTHON_APPS_DIR":/python-apps \
+  "$CUSTOM_IMAGE" \
+  python "$PY_APP"
+
+# Wait for API readiness (/health)
+HEALTH_URL="http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT/health"
+echo -n "Waiting for PyTorch API at $HEALTH_URL ..."
+for attempt in $(seq 1 30); do
+  if curl -fsS "$HEALTH_URL" >/dev/null 2>&1; then
+    echo "ready."
+    break
+  fi
+  sleep 2
+  if [ "$attempt" -eq 30 ]; then
+    echo "timeout error." >&2
+    echo "Container logs:" >&2
+    podman logs "$CTR_NAME"
+    exit 1
+  fi
+done
+
+# Smoke tests
+
+# GPU availability
+GPU_JSON="$(
+  podman exec "$CTR_NAME" python -c '
+import json, sys
+try:
+    import torch
+except Exception as e:
+    # Exit code 1 -> internal error (import torch failed, etc.)
+    print(json.dumps({"error": f"import torch failed: {e}"}))
+    sys.exit(1)
+
+data = {
+    "cuda_available": bool(torch.cuda.is_available()),
+    "device_count": int(torch.cuda.device_count()),
+}
+print(json.dumps(data))
+# Exit code 0 -> cuda_available is True
+# Exit code 2 -> cuda_available is False
+sys.exit(0 if data["cuda_available"] else 2)
+'
+)"
+GPU_RC=$?
+# echo "podman exec exit code: $GPU_RC"
+# echo "GPU_JSON: $GPU_JSON"
+if [ "$GPU_RC" -eq 0 ]; then
+  echo "GPU is available in container $CTR_NAME (cuda_available == true)."
+elif [ "$GPU_RC" -eq 2 ]; then
+  echo "ERROR: CUDA GPU is NOT available inside the container." >&2
+  echo "Details: $GPU_JSON" >&2
+  echo "This may be due to missing NVIDIA CDI configuration or SELinux labeling." >&2
+  exit 1
+else
+  echo "ERROR: podman exec GPU test failed (exit code $GPU_RC)." >&2
+  echo "Details: $GPU_JSON" >&2
+  echo "Container logs for debugging:" >&2
+  podman logs "$CTR_NAME" || true
+  exit 1
+fi
+
+# Python API /health
+HEALTH_JSON="$(curl -fsS "$HEALTH_URL")"
+echo "$HEALTH_JSON"
+if ! printf '%s' "$HEALTH_JSON" | grep -q '"status":"ok"'; then
+  echo "ERROR: /health endpoint did not report status \"ok\"." >&2
+  exit 1
+fi
+
+# Python API /embed
+EMBED_URL="http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT/embed"
+EMBED_JSON="$(curl -fsS -X POST "$EMBED_URL" \
+  -H "Content-Type: application/json" \
+  -d '{"texts":["hello world from colnomic"]}')"
+echo "$EMBED_JSON"
+if ! printf '%s' "$EMBED_JSON" | grep -q '"results"'; then
+  echo "ERROR: /embed endpoint did not return embeddings as expected." >&2
+  exit 1
+fi
+
+# Generate systemd service files
+cd "$USER_SYSTEMD_DIR"
+podman generate systemd --name --new --files "$POD_NAME"
+echo "Generated systemd service files (rc=$?)"
+
+# Stop & remove live pod and containers
+podman pod stop --ignore --time 15 "$POD_NAME"
+podman pod rm -f --ignore "$POD_NAME"
+if podman pod exists "$POD_NAME"; then
+  echo "ERROR: Pod $POD_NAME still exists." >&2
+  exit 1
+else
+  echo "Stopped & removed live pod $POD_NAME and containers"
+fi
+
+# Enable systemd service
+systemctl --user daemon-reload
+systemctl --user enable --now "pod-${POD_NAME}.service"
+systemctl --user is-enabled "pod-$POD_NAME.service"
+systemctl --user is-active "pod-$POD_NAME.service"
+echo "Enabled systemd service pod-${POD_NAME}.service (rc=$?)"
+echo "To view status: systemctl --user status pod-${POD_NAME}.service"
+echo "To view logs: journalctl --user -u pod-${POD_NAME}.service -f"
+systemctl --user enable --now "container-${CTR_NAME}.service"
+systemctl --user is-enabled "container-${CTR_NAME}.service"
+systemctl --user is-active "container-${CTR_NAME}.service"
+echo "Enabled systemd service container-${CTR_NAME}.service (rc=$?)"
+echo "To view status: systemctl --user status container-${CTR_NAME}.service"
+echo "To view logs: journalctl --user -u container-${CTR_NAME}.service -f"
+
+echo "PyTorch API is reachable at http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT"