#!/usr/bin/env bash set -euo pipefail ### ===== USER TUNABLES ===== SWAPFILE="/swapfile" SWAPSIZE="16G" # small but life-saving for bursty allocs DOCKER_VER="" # empty = distro default; or pin like "5:27.2.0-1~ubuntu.24.04~noble" NVIDIA_DRIVER_PKG="nvidia-driver-575-open" # empty = skip; or e.g. "nvidia-driver-550" ENABLE_ZSWAP="yes" # "yes" enables zswap (with zstd); requires reboot THP_MODE="madvise" # "madvise" is a good default for mixed HPC/AI CGROUP_OPTS="cgroup_enable=memory swapaccount=1" # for cgroup v1 memory accounting ZSWAP_OPTS="zswap.enabled=1 zswap.compressor=zstd zswap.zpool=zsmalloc" KERNEL_NET_TUNE="yes" # moderate network sysctls TARGET_KERNEL="6.8.0-79-generic" # Target kernel version from autoinstall ### ========================== need_cmd() { command -v "$1" >/dev/null 2>&1; } log(){ echo -e "\033[1;32m[+] $*\033[0m"; } warn(){ echo -e "\033[1;33m[!] $*\033[0m"; } die(){ echo -e "\033[1;31m[ERROR] $*\033[0m"; exit 1; } if [ "$(id -u)" -ne 0 ]; then die "Run as root (sudo)."; fi # Check current kernel version CURRENT_KERNEL=$(uname -r) log "Current kernel: ${CURRENT_KERNEL}" log "Target kernel: ${TARGET_KERNEL}" # Determine which kernel headers to install if [ "${CURRENT_KERNEL}" = "${TARGET_KERNEL}" ]; then log "Already running target kernel, using current headers" HEADER_PACKAGE="linux-headers-$(uname -r)" elif dpkg -l | grep -q "linux-image-${TARGET_KERNEL}"; then log "Target kernel installed but not active, using target headers" HEADER_PACKAGE="linux-headers-${TARGET_KERNEL}" warn "System needs reboot to activate kernel ${TARGET_KERNEL}" else log "Target kernel not found, using current headers" HEADER_PACKAGE="linux-headers-$(uname -r)" fi # 0) Basic packages log "Installing base packages..." export DEBIAN_FRONTEND=noninteractive apt-get update -y apt-get install -y curl wget ca-certificates gnupg lsb-release \ software-properties-common net-tools iproute2 ethtool pciutils dmidecode \ jq htop unzip chrony sysstat \ ${HEADER_PACKAGE} build-essential dkms # 1) Persistent journald (debuggable incidents) log "Enabling persistent journal..." mkdir -p /var/log/journal sed -i 's/^#\?Storage=.*/Storage=persistent/' /etc/systemd/journald.conf || true systemctl restart systemd-journald # 2) Transparent Huge Pages mode log "Setting THP -> ${THP_MODE} (takes effect now and on boot)..." for f in /sys/kernel/mm/transparent_hugepage/enabled /sys/kernel/mm/transparent_hugepage/defrag; do [ -f "$f" ] && echo "${THP_MODE}" > "$f" || true done mkdir -p /etc/tmpfiles.d cat >/etc/tmpfiles.d/10-thp.conf <> /etc/fstab else log "Swap already present; skipping creation." fi # 4) Kernel cmdline: cgroup memory + optional zswap log "Ensuring kernel cmdline includes cgroup memory accounting..." GRUB_DEFAULT=/etc/default/grub [ -f "$GRUB_DEFAULT" ] || die "Missing $GRUB_DEFAULT" EXTRA_OPTS="${CGROUP_OPTS}" if [ "${ENABLE_ZSWAP}" = "yes" ]; then EXTRA_OPTS="${EXTRA_OPTS} ${ZSWAP_OPTS}"; fi if ! grep -q "${CGROUP_OPTS%% *}" "$GRUB_DEFAULT"; then sed -i "s/^\(GRUB_CMDLINE_LINUX_DEFAULT=\"[^\"]*\)\"/\1 ${EXTRA_OPTS}\"/" "$GRUB_DEFAULT" log "Updated GRUB cmdline; will run update-grub." update-grub else log "CGROUP options already present in GRUB." fi # 5) Sysctls (sane defaults for HPC/containers) log "Applying sysctls..." cat >/etc/sysctl.d/90-hpc-ai.conf <<'EOF' # Allow many file watchers (builds, ML frameworks) fs.inotify.max_user_watches = 1048576 fs.inotify.max_user_instances = 1024 vm.max_map_count = 1048576 # Keep some free memory to avoid high-order alloc failures vm.min_free_kbytes = 262144 # Swappiness low but not zero: let swap act as safety valve vm.swappiness = 10 # (Optional) net stack tuning — moderate & safe net.core.rmem_max = 134217728 net.core.wmem_max = 134217728 net.core.rmem_default = 262144 net.core.wmem_default = 262144 net.core.netdev_max_backlog = 16384 net.core.somaxconn = 4096 net.ipv4.tcp_rmem = 4096 87380 134217728 net.ipv4.tcp_wmem = 4096 65536 134217728 net.ipv4.tcp_mtu_probing = 1 EOF sysctl --system >/dev/null # 6) NVIDIA driver (optional) if [ -n "${NVIDIA_DRIVER_PKG}" ]; then log "Installing NVIDIA driver package: ${NVIDIA_DRIVER_PKG}" apt-get install -y "${NVIDIA_DRIVER_PKG}" else warn "Skipping NVIDIA driver install (NVIDIA_DRIVER_PKG empty). Ensure drivers are present." fi # 7) Docker + NVIDIA Container Toolkit log "Installing Docker & nvidia-container-toolkit..." if ! need_cmd docker; then # Official Docker repo install -m 0755 -d /etc/apt/keyrings curl -fsSL https://download.docker.com/linux/$(. /etc/os-release; echo $ID)/gpg \ | gpg --dearmor -o /etc/apt/keyrings/docker.gpg chmod a+r /etc/apt/keyrings/docker.gpg echo \ "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/$(. /etc/os-release; echo $ID) \ $(. /etc/os-release; echo $VERSION_CODENAME) stable" \ > /etc/apt/sources.list.d/docker.list apt-get update -y if [ -n "$DOCKER_VER" ]; then apt-get install -y docker-ce="$DOCKER_VER" docker-ce-cli="$DOCKER_VER" containerd.io docker-buildx-plugin docker-compose-plugin else apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin fi systemctl enable --now docker usermod -aG docker ops 2>/dev/null || true else log "Docker already installed." fi # NVIDIA container toolkit distribution=$(. /etc/os-release; echo $ID$VERSION_ID) curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit.gpg curl -fsSL https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list \ | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit.gpg] https://#' \ | tee /etc/apt/sources.list.d/nvidia-container-toolkit.list >/dev/null apt-get update -y apt-get install -y nvidia-container-toolkit log "Configuring Docker default runtime to NVIDIA..." mkdir -p /etc/docker if [ -f /etc/docker/daemon.json ]; then cp /etc/docker/daemon.json /etc/docker/daemon.json.bak.$(date +%s) fi cat >/etc/docker/daemon.json <<'EOF' { "default-runtime": "nvidia", "runtimes": { "nvidia": { "path": "nvidia-container-runtime", "runtimeArgs": [] } }, "log-driver": "local", "live-restore": true, "log-opts": { "max-size": "50m", "max-file": "3" } } EOF systemctl restart docker # 8) GPU persistence & ECC (if supported) at boot log "Creating systemd service for GPU persistence..." cat >/etc/systemd/system/nvidia-persist.service <<'EOF' [Unit] Description=NVIDIA Persistence & Basic GPU Settings After=multi-user.target [Service] Type=oneshot ExecStart=/bin/bash -c ' if command -v nvidia-smi >/dev/null 2>&1; then nvidia-smi -pm 1 || true # Uncomment to prefer maximum performance (can raise power draw) # nvidia-smi -acp UNRESTRICTED || true # for i in $(nvidia-smi --query-gpu=index --format=csv,noheader); do # nvidia-smi -i "$i" -lgc 0 || true # reset app clocks # done fi ' RemainAfterExit=yes [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable --now nvidia-persist.service || true # 9) Useful observability log "Enabling sysstat (sar) for historical net/CPU/mem series..." sed -i 's/ENABLED="false"/ENABLED="true"/' /etc/default/sysstat || true systemctl enable --now sysstat # 10) Sanity prints log "Kernel cmdline now:" cat /proc/cmdline log "THP status:" cat /sys/kernel/mm/transparent_hugepage/enabled || true cat /sys/kernel/mm/transparent_hugepage/defrag || true log "Swap status:" swapon --show || true log "Docker info (short):" docker info --format 'CgroupDriver: {{.CgroupDriver}} | Runtimes: {{.Runtimes}} | DefaultRuntime: {{.DefaultRuntime}}' || true echo -e "\n=== DONE ===" if [ "${CURRENT_KERNEL}" != "${TARGET_KERNEL}" ]; then echo "REBOOT REQUIRED: Target kernel ${TARGET_KERNEL} is installed but not active." fi echo "If GRUB was changed, please REBOOT to activate cgroup memory & zswap settings."