# syntax = docker/dockerfile:1.0-experimental
# https://docs.docker.com/develop/develop-images/build_enhancements/#overriding-default-frontends


# xxx move from cudagl to cuda, for runtime?
ARG CUDA_DEVEL="12.6.2-devel-ubuntu22.04"
ARG CUDA_RUNTIME="12.6.2-runtime-ubuntu22.04"

# runtime starts as 2.3GB image
# devel is runtime+ and is 4.2GB image
FROM nvidia/cuda:$CUDA_DEVEL AS builder


# v7.5 == Turing
# v6.1 == Pascal
ARG CUDA_ARCH_75=75
ARG CUDA_ARCH_61=61

ARG FFMPEG_TGZ=https://ffmpeg.org/releases/ffmpeg-7.1.tar.gz


# create an ffmpeg (w/ shared libs) that can utilize nvidia GPU
WORKDIR /tmp

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get -yqq update  &&  apt-get -yqq install  \
  # get ffmpeg wanted typicals:
  autoconf  automake  build-essential  cmake  git-core  wget \
  pkg-config  texinfo  wget  yasm  sudo \
  libass-dev  libfreetype6-dev  libgnutls28-dev  libsdl2-dev  libtool  libva-dev  libvdpau-dev \
  libvorbis-dev  libxcb1-dev  libxcb-shm0-dev  libxcb-xfixes0-dev  meson  ninja-build \
  zlib1g-dev \
  \
  # needed to make archive.org mp4 derivatives:
  libx264-dev libfdk-aac-dev \
  # needed for https:// source urls:
  openssl libssl-dev \
  # these allow us to make any rarely encoded source file decoding avail:
  libx265-dev libvpx-dev libopus-dev


# install nvidia headers (got moved out of ffmpeg)
RUN git clone https://git.videolan.org/git/ffmpeg/nv-codec-headers  && \
    cd nv-codec-headers  && git checkout n12.2.72.0 && \
    sudo make install  && \
    cd ..


# build ffmpeg from source, so we can add in all the nvidia/cuda options
RUN wget -qO- ${FFMPEG_TGZ} | tar xzf -  &&  mv $(basename ${FFMPEG_TGZ} .tar.gz) ffmpeg
WORKDIR /tmp/ffmpeg

# Compile ffmpeg twice - same stanzas just `$CUDA_ARCH_..` and final `cp` differ

# patch `configure` since Tesla T4 is on Turing architecture GPU (o/w --enable-libnpp fails)
#   https://en.wikipedia.org/wiki/CUDA#GPUs_supported
#   https://github.com/NVIDIA/cuda-samples/issues/46
RUN sed -i -e "s/gencode arch=compute_..,code=sm_../gencode arch=compute_${CUDA_ARCH_75},code=sm_${CUDA_ARCH_75}/" ./configure\
    &&  make distclean || echo  &&  \
    ./configure --enable-nonfree --enable-gpl \
      --enable-libfdk-aac \
      --enable-libfreetype --enable-libharfbuzz \
      --enable-libopus \
      --enable-libvpx \
      --enable-libx264 \
      --enable-libx265 \
      --enable-openssl \
      # --enable-libvorbis  # xxxx
      --enable-cuda --enable-cuda-sdk --enable-cuda-nvcc --enable-nvenc --enable-cuvid --enable-libnpp \
      --extra-cflags=-I/usr/local/cuda/include --extra-ldflags=-L/usr/local/cuda/lib64  && \
    make -j4  && \
    cp ffmpeg /tmp/ffmpeg-turing && \
    cp ffplay /tmp/ffplay-turing

RUN sed -i -e "s/gencode arch=compute_..,code=sm_../gencode arch=compute_${CUDA_ARCH_61},code=sm_${CUDA_ARCH_61}/" ./configure\
    &&  make distclean || echo  &&  \
    ./configure --enable-nonfree --enable-gpl \
      --enable-libfdk-aac \
      --enable-libfreetype --enable-libharfbuzz \
      --enable-libopus \
      --enable-libvpx \
      --enable-libx264 \
      --enable-libx265 \
      --enable-openssl \
      # --enable-libvorbis  # xxxx
      --enable-cuda --enable-cuda-sdk --enable-cuda-nvcc --enable-nvenc --enable-cuvid --enable-libnpp \
      --extra-cflags=-I/usr/local/cuda/include --extra-ldflags=-L/usr/local/cuda/lib64  && \
    make -j4  && \
    cp ffmpeg /tmp/ffmpeg-pascal && \
    cp ffplay /tmp/ffplay-pascal

# now collect up all the .so files we'll need for the runtime, into new lib/ subdir
RUN BIN=ffmpeg  &&  mkdir lib  &&  ( \
    ldd ${BIN?} |awk '{if(substr($3,0,1)=="/") print $3}'; \
  ) |xargs -d '\n' -I{} cp --copy-contents {} ./lib


# switch to the smaller "runtime" baseline.
# now we just keep the executable(s) and .so files they need and chuck everything else above.
FROM nvidia/cuda:$CUDA_RUNTIME
COPY --from=builder /tmp/ffmpeg-pascal /ffmpeg-pascal
COPY --from=builder /tmp/ffmpeg-turing /ffmpeg
COPY --from=builder /tmp/ffplay-pascal /ffplay-pascal
COPY --from=builder /tmp/ffplay-turing /ffplay
COPY --from=builder /tmp/ffmpeg/lib/   /fflib

# @see cuda-runtime.sh for where this small image of three cuda runtime .so files came from
# COPY --from=registry.archive.org/www/ffmpeg-gpu/cuda /cuda/*.so.1 /fflib

# /cuda we volume mount from the container's host /usr/lib/x86_64-linux-gnu/ so we runtime load:
#    libcuda.so.1
#    libnvcuvid.so.1
#    libnvidia-encode.so.1

ENV         LD_LIBRARY_PATH=/fflib:/cuda
ENTRYPOINT  ["/ffmpeg"]
CMD         ["--help"]