From d7ec6a2b5f133c2989d14072f87aa93cf2d59f24 Mon Sep 17 00:00:00 2001 From: cproudlock Date: Wed, 22 Apr 2026 13:29:35 -0400 Subject: [PATCH] Blancco: sweep full NIC driver tree into kexec-initrd + verbose init Previous approach listed ~6 specific drivers (e1000e, igb, tg3, bnx2, bnxt_en, b44) and silenced insmod errors (2>/dev/null). On modern Dell fleet (Latitude 5330/5440, Pro-series, newer OptiPlex) this missed igc (Intel I225/I226) entirely, and for the drivers we did include, dependency modules they need at insmod time (libeth, libie, dca, i2c-algo-bit, macsec, mii, libphy, ptp, ...) were never bundled. insmod does not resolve dependencies, so NIC drivers that need helpers failed to load silently. playbook/pxe_server_setup.yml (kexec-initrd build): - Sweep the whole drivers/net/ethernet tree (~170 drivers, all vendors, ~15 MB total). Drivers for hardware not present skip without binding. - Add common helper dirs: drivers/net/{phy,mdio}, drivers/i2c/algos, drivers/dca, drivers/ptp, net/macsec, drivers/ssb. - overlay.ko kept. playbook/blancco-init.sh: - Load helpers BEFORE main NIC drivers (libeth/libie, dca, i2c-algo-bit, macsec, mii, ssb, libphy, mdio*, phy*, ptp*), then iterate remaining modules. - Remove 2>/dev/null on insmod so actual failures surface on the boot console. - Print kernel version + /sys/class/net before/after driver load, plus dmesg grep for NIC driver activity. - On "no interface found" failure, dump dmesg tail and drop to a busybox shell for manual debug rather than just hanging. Separate from this commit but related: kexec-initrd.img on both PXE servers (.1 and .2) was rebuilt inline with these changes. Pre-rebuild binary kept as kexec-initrd.img.bak-. Co-Authored-By: Claude Opus 4.7 (1M context) --- playbook/blancco-init.sh | 42 +++++++++++++++++++++++++++++------ playbook/pxe_server_setup.yml | 35 +++++++++++++++++++++++------ 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/playbook/blancco-init.sh b/playbook/blancco-init.sh index 1a4dcd7..6450876 100644 --- a/playbook/blancco-init.sh +++ b/playbook/blancco-init.sh @@ -20,14 +20,38 @@ mount -t devtmpfs devtmpfs /dev 2>/dev/null mkdir -p /tmp /run echo "[1/4] Loading NIC drivers..." +echo " Kernel: $(uname -r)" +echo " Interfaces before driver load: $(ls /sys/class/net/ 2>/dev/null)" + +# Dependency modules first. insmod does not resolve deps; main NIC +# drivers like igb need dca + i2c-algo-bit, atlantic needs macsec, +# some Intel drivers need libeth/libie. Load helpers first so the +# main driver module has its required symbols available. +for mod in /lib/modules/libeth.ko /lib/modules/libie*.ko /lib/modules/dca.ko \ + /lib/modules/i2c-algo-bit.ko /lib/modules/macsec.ko \ + /lib/modules/mii.ko /lib/modules/ssb.ko /lib/modules/libphy.ko \ + /lib/modules/mdio*.ko /lib/modules/phy*.ko /lib/modules/ptp*.ko; do + [ -f "$mod" ] || continue + insmod "$mod" 2>&1 | grep -v '^$' | head -1 || true +done + +# Main NIC drivers (everything else in /lib/modules/ that is not a helper +# or the overlay/squashfs modules). Errors are now VISIBLE so if a +# driver fails to load we can see why. for mod in /lib/modules/*.ko; do - echo " insmod $(basename $mod)" - insmod $mod 2>/dev/null || true + base=$(basename "$mod") + case "$base" in + libeth.ko|libie*.ko|dca.ko|i2c-algo-bit.ko|macsec.ko|mii.ko|ssb.ko|libphy.ko|overlay.ko|squashfs.ko|mdio*.ko|phy*.ko|ptp*.ko) + continue ;; + esac + echo " insmod $base" + insmod "$mod" 2>&1 | head -1 || true done sleep 5 -echo " Interfaces after driver load:" -ls /sys/class/net/ 2>/dev/null +echo " Interfaces after driver load: $(ls /sys/class/net/ 2>/dev/null)" +echo " Kernel messages mentioning NIC driver activity:" +dmesg 2>/dev/null | grep -iE "eth|igc|igb|bnxt|tg3|r8169|atlantic|e1000|ixgbe|i40e|eno|ens|enp" | tail -20 echo " Waiting for network interface..." IFACE="" @@ -47,9 +71,13 @@ done echo "" if [ -z "$IFACE" ]; then - echo "ERROR: No network interface found!" - echo "Available interfaces:" - ls /sys/class/net/ 2>/dev/null + echo "" + echo "ERROR: No network interface found after 60s." + echo " /sys/class/net/: $(ls /sys/class/net/ 2>/dev/null)" + echo " Last 40 lines of dmesg (look for probe failures):" + dmesg 2>/dev/null | tail -40 + echo "" + echo " Dropping to busybox shell for manual debug. reboot with 'reboot -f'." exec sh fi diff --git a/playbook/pxe_server_setup.yml b/playbook/pxe_server_setup.yml index 71200c6..2d9b4f1 100644 --- a/playbook/pxe_server_setup.yml +++ b/playbook/pxe_server_setup.yml @@ -671,16 +671,37 @@ ln -sf busybox "$WORK/bin/$cmd" done - # NIC drivers (common server NICs) + # NIC drivers: sweep the whole drivers/net/ethernet tree. The + # earlier targeted list (e1000e, igb, tg3, bnx2, bnxt_en, b44) + # missed igc (Intel I225/I226 on modern Dell Latitude 5330/5440, + # Pro-series), plus helper modules (libeth, libie, dca, + # i2c-algo-bit, macsec) needed as dependencies by the main + # drivers. insmod does not resolve deps; bundling the full + # tree + helpers is the reliable way to cover any NIC. KVER=$(uname -r) - KMOD="/lib/modules/$KVER/kernel/drivers/net/ethernet" - for drv in intel/e1000e/e1000e.ko.zst intel/igb/igb.ko.zst broadcom/tg3.ko.zst broadcom/bnx2.ko.zst broadcom/bnxt/bnxt_en.ko.zst broadcom/b44.ko.zst; do - if [ -f "$KMOD/$drv" ]; then - zstd -d "$KMOD/$drv" -o "$WORK/lib/modules/$(basename ${drv%.zst})" 2>/dev/null - fi + ETH="/lib/modules/$KVER/kernel/drivers/net/ethernet" + find "$ETH" -name "*.ko.zst" -type f 2>/dev/null | while read -r src; do + zstd -d "$src" -o "$WORK/lib/modules/$(basename ${src%.zst})" 2>/dev/null done - # Overlay module + # Helper modules (PHY, MDIO, I2C, DCA, PTP, macsec, ssb) - loaded + # first in blancco-init.sh before the main NIC drivers. + for helper_dir in \ + "/lib/modules/$KVER/kernel/drivers/net/phy" \ + "/lib/modules/$KVER/kernel/drivers/net/mdio" \ + "/lib/modules/$KVER/kernel/drivers/i2c/algos" \ + "/lib/modules/$KVER/kernel/drivers/dca" \ + "/lib/modules/$KVER/kernel/drivers/ptp" \ + "/lib/modules/$KVER/kernel/net/macsec" \ + "/lib/modules/$KVER/kernel/drivers/ssb" \ + ; do + [ -d "$helper_dir" ] || continue + find "$helper_dir" -name "*.ko.zst" -type f 2>/dev/null | while read -r src; do + zstd -d "$src" -o "$WORK/lib/modules/$(basename ${src%.zst})" 2>/dev/null + done + done + + # Overlay module (switch_root overlay mount) OVMOD="/lib/modules/$KVER/kernel/fs/overlayfs/overlay.ko.zst" if [ -f "$OVMOD" ]; then zstd -d "$OVMOD" -o "$WORK/lib/modules/overlay.ko" 2>/dev/null