Blancco: playbook now produces working Ubuntu-kernel initramfs out of the box

Companion to the previous commit (4550d43). Three files that should have
been in the same commit but got left out of `git add`:

- .gitignore: negate rule for boot-tools/blancco/grub-blancco.cfg so the
  tracked cfg (source of truth for grubx64.efi rebuilds) survives
  the blanket boot-tools/ ignore.

- playbook/blancco-init.sh: rewritten for modprobe-with-deps, full NIC
  driver coverage, set -x trace to /dev/console, dmesg + PCI-device +
  /proc/modules dump + interactive shell on "no NIC after 60s".
  Replaces the narrow insmod-loop version that silently hung on
  unsupported NICs.

- playbook/pxe_server_setup.yml "Build Blancco PXE initramfs" task now
  sweeps the full drivers/net/ tree (ethernet + phy + mdio + usb + fddi
  + wan) plus overlay / squashfs / loop / ptp / libphy / mii deps, runs
  depmod to regenerate modules.dep inside the initramfs (required for
  modprobe dependency resolution), and symlinks the full applet list
  blancco-init.sh needs (modprobe, insmod, dmesg, find, env, etc).
  Result: ~20 MB initramfs vs the old 2 MB narrow build.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
cproudlock
2026-04-22 18:08:57 -04:00
parent 4550d43d9d
commit 70f176650b
3 changed files with 140 additions and 154 deletions

3
.gitignore vendored
View File

@@ -24,6 +24,9 @@ offline-packages/
# Boot tool binaries (built by prepare-boot-tools.sh)
boot-tools/
# Track the Blancco GRUB config as source-of-truth for grubx64.efi rebuilds.
# prepare-boot-tools.sh rebuilds grubx64.efi from this file via grub-mkstandalone.
!boot-tools/blancco/grub-blancco.cfg
# WinPE boot files (wimboot, boot.wim, BCD, ipxe.efi, etc.)
boot-files/

View File

@@ -2,167 +2,125 @@
# Blancco PXE Loader - init script for custom initramfs
# Boot chain: iPXE -> GRUB EFI -> Ubuntu kernel + this initramfs -> switch_root to Blancco
#
# Blancco's own kernel freezes on Dell Precision towers during PXE boot.
# Workaround: boot Ubuntu kernel, download Blancco rootfs (squashfs), mount
# overlay filesystem, and switch_root into Blancco's userspace.
# Blancco's own kernel freezes / lacks NIC drivers for some Dell Precision
# hardware during PXE boot. Workaround: boot Ubuntu kernel (which has a wider
# NIC driver set), download Blancco rootfs (squashfs), overlay-mount, and
# switch_root into Blancco's userspace.
#
# Verbose trace + shell-on-NIC-failure because silent hangs during Blancco
# PXE boot are painful to debug. set -x goes to /dev/console so the screen
# shows every step; if no NIC appears after the modprobe sweep, we dump
# dmesg / lspci / /proc/modules and drop to sh so the operator can
# investigate without re-imaging.
exec >/dev/console 2>&1
set -x
echo ""
echo "============================================"
echo " Blancco PXE loader (verbose)"
echo "============================================"
echo ""
export PATH=/bin:/sbin
echo ""
echo "============================================"
echo " Blancco PXE loader"
echo "============================================"
echo ""
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs devtmpfs /dev 2>/dev/null
mkdir -p /tmp /run
mount -t devtmpfs devtmpfs /dev 2>/dev/null || mount -t tmpfs tmpfs /dev
mkdir -p /tmp /run /run/lower /run/upper /run/work /run/newroot
KVER=$(uname -r)
echo "Running kernel: $KVER"
ls /lib/modules/ 2>/dev/null
echo "[1/4] Loading NIC drivers..."
for mod in /lib/modules/*.ko; do
echo " insmod $(basename $mod)"
insmod $mod 2>/dev/null || true
echo "[1/5] Loading NIC drivers via modprobe (resolves deps automatically)..."
# Throw the full common-NIC driver list at the wall. modprobe resolves the
# deps from /lib/modules/$KVER/modules.dep (built by prepare-boot-tools).
# Anything missing is silently ignored; whatever matches PCI IDs will bind.
for drv in \
mii libphy ptp \
e1000 e1000e igb igc ixgbe ixgbevf i40e ice iavf \
tg3 bnx2 bnx2x bnxt_en b44 \
r8169 r8152 atlantic \
vmxnet3 virtio_net virtio_pci \
pcnet32 8139too 8139cp \
sfc sfc_ef100 mlx4_en mlx5_core \
alx atl1c atl1e atl2 \
via_rhine via_velocity forcedeth \
pegasus dm9601 asix ax88179_178a cdc_ether cdc_ncm rndis_host; do
modprobe -v "$drv" 2>/dev/null && echo " OK $drv" || true
done
sleep 5
sleep 3
echo " Interfaces after driver load:"
ls /sys/class/net/ 2>/dev/null
echo "[2/5] /sys/class/net after driver load:"
ls /sys/class/net/ || true
ip link || true
echo " Waiting for network interface..."
echo " Waiting up to 60s for non-lo interface..."
IFACE=""
COUNT=0
while [ $COUNT -lt 60 ]; do
for i in /sys/class/net/*; do
ifname="${i##*/}"
if [ "$ifname" != "lo" ] && [ -d "$i" ]; then
IFACE=$ifname
break 2
fi
for i in $(seq 1 60); do
for n in /sys/class/net/*; do
name="${n##*/}"
[ "$name" = "lo" ] && continue
[ -d "$n" ] && IFACE="$name" && break 2
done
COUNT=$((COUNT + 1))
sleep 1
echo -n "."
done
echo ""
if [ -z "$IFACE" ]; then
echo "ERROR: No network interface found!"
echo "Available interfaces:"
ls /sys/class/net/ 2>/dev/null
exec sh
echo "ERROR: No network interface after 60s"
echo "=== dmesg tail ==="; dmesg | tail -40
echo "=== PCI devices (sysfs) ==="; ls /sys/bus/pci/devices/ 2>/dev/null
echo "=== loaded modules ==="; cat /proc/modules
echo "Dropping to shell - type 'exit' to reboot."
exec /bin/sh
fi
echo " Interface: $IFACE"
ip link set $IFACE up
echo " IFACE=$IFACE, bringing up..."
ip link set "$IFACE" up || ifconfig "$IFACE" up
sleep 2
SERVER=10.9.100.1
ifconfig $IFACE 10.9.100.250 netmask 255.255.255.0 up
ifconfig "$IFACE" 10.9.100.250 netmask 255.255.255.0 up
sleep 1
echo " IP: 10.9.100.250"
echo " IP: 10.9.100.250 SERVER: $SERVER"
ip addr
echo "[2/4] Downloading Blancco rootfs (666MB)..."
echo "[3/5] Downloading airootfs.sfs (~756 MB)..."
wget -O /tmp/airootfs.sfs http://$SERVER/blancco/arch/x86_64/airootfs.sfs 2>&1
if [ ! -s /tmp/airootfs.sfs ]; then
echo "ERROR: Failed to download rootfs!"
exec sh
fi
echo " OK ($(wc -c < /tmp/airootfs.sfs) bytes)"
echo "[3/4] Mounting rootfs..."
mkdir -p /run/lower /run/upper /run/work /run/newroot
[ -s /tmp/airootfs.sfs ] || { echo "ERROR: download failed"; exec /bin/sh; }
echo "[4/5] Mounting rootfs + overlay..."
modprobe overlay 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/overlayfs/overlay.ko 2>/dev/null
modprobe squashfs 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/squashfs/squashfs.ko 2>/dev/null
modprobe loop 2>/dev/null
losetup /dev/loop0 /tmp/airootfs.sfs
mount -t squashfs -o ro /dev/loop0 /run/lower
if [ $? -ne 0 ]; then
echo "ERROR: squashfs mount failed!"
exec sh
fi
insmod /lib/modules/overlay.ko 2>/dev/null
mount -t tmpfs -o size=50% tmpfs /run/upper
mkdir -p /run/upper/upper /run/upper/work
mount -t overlay overlay -o lowerdir=/run/lower,upperdir=/run/upper/upper,workdir=/run/upper/work /run/newroot
if [ $? -ne 0 ]; then
echo "ERROR: overlay mount failed!"
exec sh
fi
echo "[4/5] Installing kernel modules (132MB)..."
echo "[5/5] Fetching kmod tarball + config..."
wget -O /tmp/kmod.tar.gz http://$SERVER/blancco/kmod.tar.gz 2>&1
if [ -s /tmp/kmod.tar.gz ]; then
cd /run/newroot
gunzip -c /tmp/kmod.tar.gz | tar xf -
rm -f /tmp/kmod.tar.gz
cd /
echo " OK"
else
echo " WARNING: Failed to download kernel modules"
fi
[ -s /tmp/kmod.tar.gz ] && (cd /run/newroot && gunzip -c /tmp/kmod.tar.gz | tar xf - && rm -f /tmp/kmod.tar.gz)
echo "[5/6] Switching root to Blancco..."
mkdir -p /run/newroot/run /run/newroot/proc /run/newroot/sys /run/newroot/dev /run/newroot/tmp
mkdir -p /run/newroot/albus
wget -O /run/newroot/albus/config.xml http://$SERVER/blancco/config-clean.xml 2>&1 || true
wget -O /run/newroot/albus/preferences.xml http://$SERVER/blancco/preferences.xml 2>&1 || true
cp -f /run/newroot/albus/preferences.xml /run/newroot/albus/preferences.save 2>/dev/null || true
echo "[6/6] Downloading Blancco config..."
wget -O /run/newroot/albus/config.xml http://$SERVER/blancco/config-clean.xml 2>&1
wget -O /run/newroot/albus/preferences.xml http://$SERVER/blancco/preferences.xml 2>&1
if [ -s /run/newroot/albus/config.xml ]; then
echo " config.xml: $(wc -c < /run/newroot/albus/config.xml) bytes"
else
echo " WARNING: Failed to download config.xml"
fi
if [ -s /run/newroot/albus/preferences.xml ]; then
cp -f /run/newroot/albus/preferences.xml /run/newroot/albus/preferences.save
echo " preferences.xml: $(wc -c < /run/newroot/albus/preferences.xml) bytes"
else
echo " WARNING: Failed to download preferences.xml"
fi
# Pre-configure X.org to use modesetting driver (generic KMS, works with all GPUs)
mkdir -p /run/newroot/etc/X11/xorg.conf.d
echo " X.org: forcing modesetting driver"
cat > /run/newroot/etc/X11/xorg.conf.d/20-failsafeDriver.conf << 'XEOF'
cat > /run/newroot/etc/X11/xorg.conf.d/20-failsafeDriver.conf << XEOF
Section "Device"
Identifier "Failsafe Video Device"
Driver "modesetting"
EndSection
XEOF
# Enable SSH for remote debugging
echo " Enabling SSH (root:blancco)..."
if [ -f /run/newroot/etc/ssh/sshd_config ]; then
sed 's/^#*PermitRootLogin.*/PermitRootLogin yes/' /run/newroot/etc/ssh/sshd_config > /run/newroot/etc/ssh/sshd_config.new || true
mv /run/newroot/etc/ssh/sshd_config.new /run/newroot/etc/ssh/sshd_config || true
fi
cat > /run/newroot/etc/rc.local << 'RCEOF'
#!/bin/bash
echo 'root:blancco' | chpasswd
ssh-keygen -A 2>/dev/null
/usr/bin/sshd 2>/dev/null
RCEOF
chmod +x /run/newroot/etc/rc.local
cat > /run/newroot/etc/systemd/system/pxe-debug.service << 'SVCEOF'
[Unit]
Description=PXE Debug SSH
After=network.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/etc/rc.local
[Install]
WantedBy=multi-user.target
SVCEOF
ln -sf /etc/systemd/system/pxe-debug.service /run/newroot/etc/systemd/system/multi-user.target.wants/pxe-debug.service 2>/dev/null
mkdir -p /run/newroot/proc /run/newroot/sys /run/newroot/dev /run/newroot/run /run/newroot/tmp
mount --move /proc /run/newroot/proc
mount --move /sys /run/newroot/sys
mount --move /dev /run/newroot/dev
echo " Starting Blancco..."
echo "Switching root..."
exec switch_root /run/newroot /sbin/init

View File

@@ -700,42 +700,66 @@
# Boot Ubuntu kernel, download Blancco rootfs, overlay mount, switch_root.
- name: "Build Blancco PXE initramfs"
# The narrow hand-picked NIC driver list used before 2026-04-22 produced
# a 2 MB initramfs that hung on "waiting for network interface" for any
# hardware outside e1000e/igb/tg3/bnx2/bnxt_en/b44. This rewrite sweeps
# the full drivers/net/ tree (ethernet + phy + mdio + usb + fddi + wan)
# plus overlay / squashfs / loop / ptp / libphy / mii deps, runs depmod
# so blancco-init.sh can use modprobe with proper dependency resolution,
# and produces a ~20 MB initramfs. Size isn't a concern - HTTP loads it
# in under two seconds at gigabit, versus the many minutes of hangtime
# the narrow build cost us when a NIC was unsupported.
args:
executable: /bin/bash
creates: "{{ web_root }}/blancco/kexec-initrd.img"
shell: |
set -e
WORK=$(mktemp -d)
mkdir -p "$WORK"/{bin,lib/modules,lib64,sbin,usr/share/udhcpc}
KVER=$(uname -r)
mkdir -p "$WORK"/{bin,lib/modules/$KVER/kernel,lib64,sbin,usr/share/udhcpc,etc,run,proc,sys,dev}
# Busybox (static) - bundled on USB at playbook/busybox-static
if [ -f /bin/busybox ]; then
cp /bin/busybox "$WORK/bin/"
cp /bin/busybox "$WORK/bin/busybox"
elif [ -f "{{ usb_root }}/playbook/busybox-static" ]; then
cp "{{ usb_root }}/playbook/busybox-static" "$WORK/bin/busybox"
chmod +x "$WORK/bin/busybox"
else
echo "ERROR: No busybox available (not at /bin/busybox or on USB)"
exit 1
fi
for cmd in sh awk cat chmod echo grep gunzip ifconfig ip ln losetup ls mkdir mknod mount reboot route sed sleep switch_root tar udhcpc umount wget cpio; do
chmod +x "$WORK/bin/busybox"
# All applets blancco-init.sh uses: modprobe, insmod, dmesg, find, env
# and export added vs the old narrow list.
for cmd in sh ash awk cat chmod cp dd echo grep gunzip ifconfig ip insmod ln losetup ls mkdir mknod modprobe mount mv reboot rm rmdir route sed sleep switch_root tar udhcpc umount wget cpio dmesg env export find; do
ln -sf busybox "$WORK/bin/$cmd"
done
# NIC drivers (common server NICs)
KVER=$(uname -r)
KMOD="/lib/modules/$KVER/kernel/drivers/net/ethernet"
for drv in intel/e1000e/e1000e.ko.zst intel/igb/igb.ko.zst broadcom/tg3.ko.zst broadcom/bnx2.ko.zst broadcom/bnxt/bnxt_en.ko.zst broadcom/b44.ko.zst; do
if [ -f "$KMOD/$drv" ]; then
zstd -d "$KMOD/$drv" -o "$WORK/lib/modules/$(basename ${drv%.zst})" 2>/dev/null
# Full drivers/net/ tree - ethernet + phy + mdio + usb + fddi + wan.
# Preserve path under /lib/modules/$KVER so depmod can resolve deps.
NETDIR=/lib/modules/$KVER/kernel/drivers/net
for sub in ethernet mdio phy usb fddi wan; do
if [ -d "$NETDIR/$sub" ]; then
mkdir -p "$WORK/lib/modules/$KVER/kernel/drivers/net/$sub"
cp -r "$NETDIR/$sub/"* "$WORK/lib/modules/$KVER/kernel/drivers/net/$sub/" 2>/dev/null || true
fi
done
# Overlay + squashfs + loop + usb + hid + ptp + mii + net/core deps
for modpath in fs/overlayfs fs/squashfs drivers/block drivers/usb/core drivers/usb/host drivers/hid drivers/ptp drivers/net/mii.ko net/core; do
if [ -e "/lib/modules/$KVER/kernel/$modpath" ]; then
mkdir -p "$WORK/lib/modules/$KVER/kernel/$(dirname $modpath)"
cp -r "/lib/modules/$KVER/kernel/$modpath" "$WORK/lib/modules/$KVER/kernel/$modpath" 2>/dev/null || true
fi
done
# Overlay module
OVMOD="/lib/modules/$KVER/kernel/fs/overlayfs/overlay.ko.zst"
if [ -f "$OVMOD" ]; then
zstd -d "$OVMOD" -o "$WORK/lib/modules/overlay.ko" 2>/dev/null
fi
# Decompress zstd modules in-place (busybox insmod can't handle .zst)
find "$WORK/lib/modules" -name '*.ko.zst' -print0 | \
xargs -0 -I {} sh -c 'zstd -d --rm -o "${1%.zst}" "$1" 2>/dev/null' _ {} || true
# Preserve modules.builtin / modules.order, regenerate modules.dep so
# modprobe can resolve dependencies inside the initramfs.
cp /lib/modules/$KVER/modules.builtin "$WORK/lib/modules/$KVER/" 2>/dev/null || true
cp /lib/modules/$KVER/modules.order "$WORK/lib/modules/$KVER/" 2>/dev/null || true
(cd "$WORK" && depmod -b . $KVER) 2>/dev/null || echo "depmod warning (non-fatal)"
# Init script
cp "{{ usb_root }}/playbook/blancco-init.sh" "$WORK/init"
@@ -743,7 +767,8 @@
# Build CPIO
cd "$WORK"
find . | cpio -o -H newc 2>/dev/null | gzip > "{{ web_root }}/blancco/kexec-initrd.img"
find . | cpio -o -H newc --quiet | gzip -1 > "{{ web_root }}/blancco/kexec-initrd.img"
cd /
rm -rf "$WORK"
echo "Built kexec-initrd.img: $(stat -c %s '{{ web_root }}/blancco/kexec-initrd.img') bytes"
ignore_errors: yes