ARG CUDA_VERSION=12.8.1
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubi9 AS builder

COPY container-images/scripts/build_llama_and_whisper.sh \
     container-images/scripts/lib.sh \
     /src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh cuda

# Final runtime image
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9

# Copy the entire installation directory from the builder
COPY --from=builder /tmp/install /usr

# Install python3.12 and ramalama to support a non-standard use-case
RUN dnf -y install python3.12 && dnf -y clean all && ln -sf python3.12 /usr/bin/python3
COPY . /src/ramalama
WORKDIR /src/ramalama
RUN python3 -m ensurepip && \
    python3 -m pip \
      --no-cache-dir \
      --disable-pip-version-check \
      install \
        --compile \
        --prefix=/usr \
        .
WORKDIR /
ENTRYPOINT []
CMD ["/bin/bash"]
