`# syntax=docker/dockerfile:1.4
FROM nvcr.io/nvidia/pytorch:25.02-py3 as base
WORKDIR /tmp
Install required packages.
RUN apt-get update && apt-get install -y --no-install-recommends
kmod
git
python3-pip
ccache
&& apt-get clean && rm -rf /var/lib/apt/lists/*
Set environment variable required by vLLM.
ENV VLLM_FLASH_ATTN_VERSION=2
Clone the vLLM repository.
RUN git clone https://github.com/vllm-project/vllm.git
Change working directory to the cloned repository.
WORKDIR /tmp/vllm
Run the preparatory script and install build dependencies.
RUN python3 use_existing_torch.py &&
pip install -r requirements/build.txt &&
pip install setuptools_scm
Build vLLM from source in develop mode.
RUN --mount=type=cache,target=/root/.cache/ccache
MAX_JOBS=10 CCACHE_DIR=/root/.cache/ccache
python3 setup.py develop &&
cd /tmp && rm -rf vllm
Test the installation by printing the vLLM version.
RUN python3 -c "import vllm; print(vllm.version)"
Set the entrypoint to start the vLLM OpenAI-compatible API server.
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]`