Blancco: playbook now produces working Ubuntu-kernel initramfs out of the box

Companion to the previous commit (4550d43). Three files that should have
been in the same commit but got left out of `git add`:

- .gitignore: negate rule for boot-tools/blancco/grub-blancco.cfg so the
  tracked cfg (source of truth for grubx64.efi rebuilds) survives
  the blanket boot-tools/ ignore.

- playbook/blancco-init.sh: rewritten for modprobe-with-deps, full NIC
  driver coverage, set -x trace to /dev/console, dmesg + PCI-device +
  /proc/modules dump + interactive shell on "no NIC after 60s".
  Replaces the narrow insmod-loop version that silently hung on
  unsupported NICs.

- playbook/pxe_server_setup.yml "Build Blancco PXE initramfs" task now
  sweeps the full drivers/net/ tree (ethernet + phy + mdio + usb + fddi
  + wan) plus overlay / squashfs / loop / ptp / libphy / mii deps, runs
  depmod to regenerate modules.dep inside the initramfs (required for
  modprobe dependency resolution), and symlinks the full applet list
  blancco-init.sh needs (modprobe, insmod, dmesg, find, env, etc).
  Result: ~20 MB initramfs vs the old 2 MB narrow build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
cproudlock
2026-04-22 18:08:57 -04:00
parent 4550d43d9d
commit 70f176650b
3 changed files with 140 additions and 154 deletions

View File

@@ -2,167 +2,125 @@
# Blancco PXE Loader - init script for custom initramfs
# Boot chain: iPXE -> GRUB EFI -> Ubuntu kernel + this initramfs -> switch_root to Blancco
#
# Blancco's own kernel freezes on Dell Precision towers during PXE boot.
# Workaround: boot Ubuntu kernel, download Blancco rootfs (squashfs), mount
# overlay filesystem, and switch_root into Blancco's userspace.
# Blancco's own kernel freezes / lacks NIC drivers for some Dell Precision
# hardware during PXE boot. Workaround: boot Ubuntu kernel (which has a wider
# NIC driver set), download Blancco rootfs (squashfs), overlay-mount, and
# switch_root into Blancco's userspace.
#
# Verbose trace + shell-on-NIC-failure because silent hangs during Blancco
# PXE boot are painful to debug. set -x goes to /dev/console so the screen
# shows every step; if no NIC appears after the modprobe sweep, we dump
# dmesg / lspci / /proc/modules and drop to sh so the operator can
# investigate without re-imaging.
exec >/dev/console 2>&1
set -x
echo ""
echo "============================================"
echo " Blancco PXE loader (verbose)"
echo "============================================"
echo ""
export PATH=/bin:/sbin
echo ""
echo "============================================"
echo " Blancco PXE loader"
echo "============================================"
echo ""
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs devtmpfs /dev 2>/dev/null
mkdir -p /tmp /run
mount -t devtmpfs devtmpfs /dev 2>/dev/null || mount -t tmpfs tmpfs /dev
mkdir -p /tmp /run /run/lower /run/upper /run/work /run/newroot
KVER=$(uname -r)
echo "Running kernel: $KVER"
ls /lib/modules/ 2>/dev/null
echo "[1/4] Loading NIC drivers..."
for mod in /lib/modules/*.ko; do
echo " insmod $(basename $mod)"
insmod $mod 2>/dev/null || true
echo "[1/5] Loading NIC drivers via modprobe (resolves deps automatically)..."
# Throw the full common-NIC driver list at the wall. modprobe resolves the
# deps from /lib/modules/$KVER/modules.dep (built by prepare-boot-tools).
# Anything missing is silently ignored; whatever matches PCI IDs will bind.
for drv in \
mii libphy ptp \
e1000 e1000e igb igc ixgbe ixgbevf i40e ice iavf \
tg3 bnx2 bnx2x bnxt_en b44 \
r8169 r8152 atlantic \
vmxnet3 virtio_net virtio_pci \
pcnet32 8139too 8139cp \
sfc sfc_ef100 mlx4_en mlx5_core \
alx atl1c atl1e atl2 \
via_rhine via_velocity forcedeth \
pegasus dm9601 asix ax88179_178a cdc_ether cdc_ncm rndis_host; do
modprobe -v "$drv" 2>/dev/null && echo " OK $drv" || true
done
sleep 5
sleep 3
echo " Interfaces after driver load:"
ls /sys/class/net/ 2>/dev/null
echo "[2/5] /sys/class/net after driver load:"
ls /sys/class/net/ || true
ip link || true
echo " Waiting for network interface..."
echo " Waiting up to 60s for non-lo interface..."
IFACE=""
COUNT=0
while [ $COUNT -lt 60 ]; do
for i in /sys/class/net/*; do
ifname="${i##*/}"
if [ "$ifname" != "lo" ] && [ -d "$i" ]; then
IFACE=$ifname
break 2
fi
for i in $(seq 1 60); do
for n in /sys/class/net/*; do
name="${n##*/}"
[ "$name" = "lo" ] && continue
[ -d "$n" ] && IFACE="$name" && break 2
done
COUNT=$((COUNT + 1))
sleep 1
echo -n "."
done
echo ""
if [ -z "$IFACE" ]; then
echo "ERROR: No network interface found!"
echo "Available interfaces:"
ls /sys/class/net/ 2>/dev/null
exec sh
echo "ERROR: No network interface after 60s"
echo "=== dmesg tail ==="; dmesg | tail -40
echo "=== PCI devices (sysfs) ==="; ls /sys/bus/pci/devices/ 2>/dev/null
echo "=== loaded modules ==="; cat /proc/modules
echo "Dropping to shell - type 'exit' to reboot."
exec /bin/sh
fi
echo " Interface: $IFACE"
ip link set $IFACE up
echo " IFACE=$IFACE, bringing up..."
ip link set "$IFACE" up || ifconfig "$IFACE" up
sleep 2
SERVER=10.9.100.1
ifconfig $IFACE 10.9.100.250 netmask 255.255.255.0 up
ifconfig "$IFACE" 10.9.100.250 netmask 255.255.255.0 up
sleep 1
echo " IP: 10.9.100.250"
echo " IP: 10.9.100.250 SERVER: $SERVER"
ip addr
echo "[2/4] Downloading Blancco rootfs (666MB)..."
echo "[3/5] Downloading airootfs.sfs (~756 MB)..."
wget -O /tmp/airootfs.sfs http://$SERVER/blancco/arch/x86_64/airootfs.sfs 2>&1
if [ ! -s /tmp/airootfs.sfs ]; then
echo "ERROR: Failed to download rootfs!"
exec sh
fi
echo " OK ($(wc -c < /tmp/airootfs.sfs) bytes)"
echo "[3/4] Mounting rootfs..."
mkdir -p /run/lower /run/upper /run/work /run/newroot
[ -s /tmp/airootfs.sfs ] || { echo "ERROR: download failed"; exec /bin/sh; }
echo "[4/5] Mounting rootfs + overlay..."
modprobe overlay 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/overlayfs/overlay.ko 2>/dev/null
modprobe squashfs 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/squashfs/squashfs.ko 2>/dev/null
modprobe loop 2>/dev/null
losetup /dev/loop0 /tmp/airootfs.sfs
mount -t squashfs -o ro /dev/loop0 /run/lower
if [ $? -ne 0 ]; then
echo "ERROR: squashfs mount failed!"
exec sh
fi
insmod /lib/modules/overlay.ko 2>/dev/null
mount -t tmpfs -o size=50% tmpfs /run/upper
mkdir -p /run/upper/upper /run/upper/work
mount -t overlay overlay -o lowerdir=/run/lower,upperdir=/run/upper/upper,workdir=/run/upper/work /run/newroot
if [ $? -ne 0 ]; then
echo "ERROR: overlay mount failed!"
exec sh
fi
echo "[4/5] Installing kernel modules (132MB)..."
echo "[5/5] Fetching kmod tarball + config..."
wget -O /tmp/kmod.tar.gz http://$SERVER/blancco/kmod.tar.gz 2>&1
if [ -s /tmp/kmod.tar.gz ]; then
cd /run/newroot
gunzip -c /tmp/kmod.tar.gz | tar xf -
rm -f /tmp/kmod.tar.gz
cd /
echo " OK"
else
echo " WARNING: Failed to download kernel modules"
fi
[ -s /tmp/kmod.tar.gz ] && (cd /run/newroot && gunzip -c /tmp/kmod.tar.gz | tar xf - && rm -f /tmp/kmod.tar.gz)
echo "[5/6] Switching root to Blancco..."
mkdir -p /run/newroot/run /run/newroot/proc /run/newroot/sys /run/newroot/dev /run/newroot/tmp
mkdir -p /run/newroot/albus
wget -O /run/newroot/albus/config.xml http://$SERVER/blancco/config-clean.xml 2>&1 || true
wget -O /run/newroot/albus/preferences.xml http://$SERVER/blancco/preferences.xml 2>&1 || true
cp -f /run/newroot/albus/preferences.xml /run/newroot/albus/preferences.save 2>/dev/null || true
echo "[6/6] Downloading Blancco config..."
wget -O /run/newroot/albus/config.xml http://$SERVER/blancco/config-clean.xml 2>&1
wget -O /run/newroot/albus/preferences.xml http://$SERVER/blancco/preferences.xml 2>&1
if [ -s /run/newroot/albus/config.xml ]; then
echo " config.xml: $(wc -c < /run/newroot/albus/config.xml) bytes"
else
echo " WARNING: Failed to download config.xml"
fi
if [ -s /run/newroot/albus/preferences.xml ]; then
cp -f /run/newroot/albus/preferences.xml /run/newroot/albus/preferences.save
echo " preferences.xml: $(wc -c < /run/newroot/albus/preferences.xml) bytes"
else
echo " WARNING: Failed to download preferences.xml"
fi
# Pre-configure X.org to use modesetting driver (generic KMS, works with all GPUs)
mkdir -p /run/newroot/etc/X11/xorg.conf.d
echo " X.org: forcing modesetting driver"
cat > /run/newroot/etc/X11/xorg.conf.d/20-failsafeDriver.conf << 'XEOF'
cat > /run/newroot/etc/X11/xorg.conf.d/20-failsafeDriver.conf << XEOF
Section "Device"
Identifier "Failsafe Video Device"
Driver "modesetting"
EndSection
XEOF
# Enable SSH for remote debugging
echo " Enabling SSH (root:blancco)..."
if [ -f /run/newroot/etc/ssh/sshd_config ]; then
sed 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /run/newroot/etc/ssh/sshd_config > /run/newroot/etc/ssh/sshd_config.new || true
mv /run/newroot/etc/ssh/sshd_config.new /run/newroot/etc/ssh/sshd_config || true
fi
cat > /run/newroot/etc/rc.local << 'RCEOF'
#!/bin/bash
echo 'root:blancco' | chpasswd
ssh-keygen -A 2>/dev/null
/usr/bin/sshd 2>/dev/null
RCEOF
chmod +x /run/newroot/etc/rc.local
cat > /run/newroot/etc/systemd/system/pxe-debug.service << 'SVCEOF'
[Unit]
Description=PXE Debug SSH
After=network.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/etc/rc.local
[Install]
WantedBy=multi-user.target
SVCEOF
ln -sf /etc/systemd/system/pxe-debug.service /run/newroot/etc/systemd/system/multi-user.target.wants/pxe-debug.service 2>/dev/null
mkdir -p /run/newroot/proc /run/newroot/sys /run/newroot/dev /run/newroot/run /run/newroot/tmp
mount --move /proc /run/newroot/proc
mount --move /sys /run/newroot/sys
mount --move /dev /run/newroot/dev
echo " Starting Blancco..."
echo "Switching root..."
exec switch_root /run/newroot /sbin/init