Files
pxe-server/playbook/blancco-init.sh
cproudlock ce604adcda Renumber PXE LAN from 10.9.100.0/24 to 172.16.9.0/24
Single-site bay-stuck issue at WJ: GE Intune Report IP script filters
Get-NetIPAddress on StartsWith("10.") and posts everything matching
to the GE Tines webhook. Bays at WJ get the PXE LAN 10.9.100.x IP
captured and reported -> GE backend tags bays as on a non-corp 10.x
subnet -> dynamic group eligibility for SFLD policy never matches.
Other GE sites work because their PXE LANs aren't on 10.x at all.

Renumber PXE LAN to RFC1918 172.16.9.0/24 so the GE filter naturally
skips wired PXE addresses without any disable-NIC dance.

Server-side already in flight (netplan dual-bound, dnsmasq scope +
boot URL repointed, blancco preferences + grub.cfg + iPXE GetPxeScript
all sed'd to 172.16.9.1). This commit is the playbook / scripts /
docs side: 109 hits across 35 files sed'd in one shot.

After this lands + boot.wim is rebuilt + bays renumber off DHCP,
the 10.9.100.1 binding will be dropped from netplan as the final
cleanup step.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 16:30:32 -04:00

201 lines
8.3 KiB
Bash

#!/bin/sh
# Blancco PXE Loader - init script for custom initramfs
# Boot chain: iPXE -> GRUB EFI -> Ubuntu kernel + this initramfs -> switch_root to Blancco
#
# Blancco's own kernel freezes / lacks NIC drivers for some Dell Precision
# hardware during PXE boot. Workaround: boot Ubuntu kernel (which has a wider
# NIC driver set), download Blancco rootfs (squashfs), overlay-mount, and
# switch_root into Blancco's userspace.
#
# Verbose trace + shell-on-NIC-failure because silent hangs during Blancco
# PXE boot are painful to debug. set -x goes to /dev/console so the screen
# shows every step; if no NIC appears after the modprobe sweep, we dump
# dmesg / lspci / /proc/modules and drop to sh so the operator can
# investigate without re-imaging.
exec >/dev/console 2>&1
set -x
echo ""
echo "============================================"
echo " Blancco PXE loader (verbose)"
echo "============================================"
echo ""
export PATH=/bin:/sbin
mount -t proc proc /proc
mount -t sysfs sysfs /sys
mount -t devtmpfs devtmpfs /dev 2>/dev/null || mount -t tmpfs tmpfs /dev
mkdir -p /tmp /run /run/lower /run/upper /run/work /run/newroot
KVER=$(uname -r)
echo "Running kernel: $KVER"
ls /lib/modules/ 2>/dev/null
echo "[1/5] Loading NIC drivers via modprobe (resolves deps automatically)..."
# Throw the full common-NIC driver list at the wall. modprobe resolves the
# deps from /lib/modules/$KVER/modules.dep (built by prepare-boot-tools).
# Anything missing is silently ignored; whatever matches PCI IDs will bind.
for drv in \
mii libphy ptp \
e1000 e1000e igb igc ixgbe ixgbevf i40e ice iavf \
tg3 bnx2 bnx2x bnxt_en b44 \
r8169 r8152 atlantic \
vmxnet3 virtio_net virtio_pci \
pcnet32 8139too 8139cp \
sfc sfc_ef100 mlx4_en mlx5_core \
alx atl1c atl1e atl2 \
via_rhine via_velocity forcedeth \
pegasus dm9601 asix ax88179_178a cdc_ether cdc_ncm rndis_host; do
modprobe -v "$drv" 2>/dev/null && echo " OK $drv" || true
done
sleep 3
echo "[2/5] /sys/class/net after driver load:"
ls /sys/class/net/ || true
ip link || true
echo " Waiting up to 60s for non-lo interface..."
IFACE=""
for i in $(seq 1 60); do
for n in /sys/class/net/*; do
name="${n##*/}"
[ "$name" = "lo" ] && continue
[ -d "$n" ] && IFACE="$name" && break 2
done
sleep 1
echo -n "."
done
echo ""
if [ -z "$IFACE" ]; then
echo "ERROR: No network interface after 60s"
echo "=== dmesg tail ==="; dmesg | tail -40
echo "=== PCI devices (sysfs) ==="; ls /sys/bus/pci/devices/ 2>/dev/null
echo "=== loaded modules ==="; cat /proc/modules
echo "Dropping to shell - type 'exit' to reboot."
exec /bin/sh
fi
echo " IFACE=$IFACE, bringing up..."
ip link set "$IFACE" up || ifconfig "$IFACE" up
sleep 2
SERVER=172.16.9.1
ifconfig "$IFACE" 172.16.9.250 netmask 255.255.255.0 up
sleep 1
echo " IP: 172.16.9.250 SERVER: $SERVER"
ip addr
echo "[3/5] Downloading airootfs.sfs (~756 MB)..."
wget -O /tmp/airootfs.sfs http://$SERVER/blancco/arch/x86_64/airootfs.sfs 2>&1
[ -s /tmp/airootfs.sfs ] || { echo "ERROR: download failed"; exec /bin/sh; }
echo "[4/5] Mounting rootfs + overlay..."
modprobe overlay 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/overlayfs/overlay.ko 2>/dev/null
modprobe squashfs 2>/dev/null || insmod /lib/modules/$KVER/kernel/fs/squashfs/squashfs.ko 2>/dev/null
modprobe loop 2>/dev/null
losetup /dev/loop0 /tmp/airootfs.sfs
mount -t squashfs -o ro /dev/loop0 /run/lower
mount -t tmpfs -o size=50% tmpfs /run/upper
mkdir -p /run/upper/upper /run/upper/work
mount -t overlay overlay -o lowerdir=/run/lower,upperdir=/run/upper/upper,workdir=/run/upper/work /run/newroot
echo "[5/5] Fetching kmod tarball + config..."
wget -O /tmp/kmod.tar.gz http://$SERVER/blancco/kmod.tar.gz 2>&1
[ -s /tmp/kmod.tar.gz ] && (cd /run/newroot && gunzip -c /tmp/kmod.tar.gz | tar xf - && rm -f /tmp/kmod.tar.gz)
mkdir -p /run/newroot/albus
# preferences.xml REQUIRED. Without valid file, Blancco airootfs's
# /opt/scripts/validate_preferences.sh silently restores
# /albus/preferences.save (factory defaults, empty network_share) so
# erasure reports never reach SMB. Fail loud here instead.
PREFS=/run/newroot/albus/preferences.xml
if ! wget -O "$PREFS" "http://$SERVER/blancco/preferences.xml"; then
echo "ERROR: preferences.xml download failed from http://$SERVER/blancco/preferences.xml"
echo "Reports would fall back to factory defaults (no SMB target)."
echo "Dropping to shell - check Apache + network."
exec /bin/sh
fi
# Busybox initramfs has no xmllint. Grep for required SMB markers instead;
# if either is missing, the prefs file in web_root is stale or corrupted.
if [ ! -s "$PREFS" ] || ! grep -q "<hostname>$SERVER</hostname>" "$PREFS" \
|| ! grep -q '<path>blancco-reports</path>' "$PREFS"; then
echo "ERROR: preferences.xml missing required network_share entries."
echo "Expected <hostname>$SERVER</hostname> + <path>blancco-reports</path>."
echo "=== first 60 lines of $PREFS ==="
head -60 "$PREFS"
exec /bin/sh
fi
# Clobber preferences.save too. validate_preferences.sh in airootfs falls
# back to preferences.save on any future xmllint failure; if that ever
# fires, we want the fallback to still have the right SMB target.
cp -f "$PREFS" /run/newroot/albus/preferences.save
if ! wget -O /run/newroot/albus/config.xml "http://$SERVER/blancco/config-clean.xml"; then
echo "ERROR: config.xml (license container) download failed."
exec /bin/sh
fi
mkdir -p /run/newroot/etc/X11/xorg.conf.d
# Don't pin a single Xorg driver - hardware varies per site (Intel iGPU,
# NVIDIA GK208, AMD, etc). Let Xorg auto-pick. With nomodeset + KMS
# blacklist on kernel cmdline, Xorg falls back to fbdev (uses
# kernel-provided framebuffer from vga=normal) which works on most boxes.
# Previous "modesetting" pin needed KMS we disabled; "vesa" pin also
# didn't drive NVIDIA cards. Removing the pin entirely.
rm -f /run/newroot/etc/X11/xorg.conf.d/20-failsafeDriver.conf 2>/dev/null || true
# Hard-mask sleep/suspend/hibernate targets in the airootfs overlay. systemd
# kernel cmdline systemd.mask= only blocks systemd-side activation; userspace
# can still write /sys/power/state directly. /dev/null symlinks under
# /etc/systemd/system/ block ALL systemd-mediated paths AND stop logind from
# advertising sleep capability to userland. Combined with the logind drop-in
# below, /sys/power/state writes from non-root userland (Albus) also fail.
mkdir -p /run/newroot/etc/systemd/system
for tgt in sleep.target suspend.target hibernate.target hybrid-sleep.target suspend-then-hibernate.target; do
ln -sf /dev/null "/run/newroot/etc/systemd/system/$tgt"
done
mkdir -p /run/newroot/etc/systemd/logind.conf.d
cat > /run/newroot/etc/systemd/logind.conf.d/no-suspend.conf << XEOF
[Login]
IdleAction=ignore
HandleSuspendKey=ignore
HandleHibernateKey=ignore
HandleLidSwitch=ignore
HandleLidSwitchDocked=ignore
HandlePowerKey=ignore
XEOF
# Disable Xorg screensaver + DPMS so display stays on during long erasures.
# Numbered 00-* to run before vendor xinitrc.d hooks.
mkdir -p /run/newroot/etc/X11/xinit/xinitrc.d
cat > /run/newroot/etc/X11/xinit/xinitrc.d/00-no-screen-blank.sh << 'XEOF'
#!/bin/sh
xset s off -dpms 2>/dev/null || true
xset s noblank 2>/dev/null || true
setterm -blank 0 -powerdown 0 2>/dev/null || true
XEOF
chmod +x /run/newroot/etc/X11/xinit/xinitrc.d/00-no-screen-blank.sh
mkdir -p /run/newroot/proc /run/newroot/sys /run/newroot/dev /run/newroot/run /run/newroot/tmp
mount --move /proc /run/newroot/proc
mount --move /sys /run/newroot/sys
mount --move /dev /run/newroot/dev
# Hard-block kernel suspend/hibernate path. Bind-mount /dev/null over the
# sysfs power-control files so any userland write (Albus auto-suspend,
# pm-utils, etc) becomes a no-op. This is the LAST line of defense - kernel
# cmdline systemd.mask, /dev/null symlinks for sleep targets, and logind
# drop-ins were all bypassed by Albus writing /sys/power/state directly.
# Bind-mount works at the VFS layer below sysfs, so even kernel-side mount
# remounts wouldn't undo it.
mount --bind /dev/null /run/newroot/sys/power/state 2>/dev/null || true
mount --bind /dev/null /run/newroot/sys/power/disk 2>/dev/null || true
mount --bind /dev/null /run/newroot/sys/power/mem_sleep 2>/dev/null || true
mount --bind /dev/null /run/newroot/sys/power/autosleep 2>/dev/null || true
echo "Switching root..."
exec switch_root /run/newroot /sbin/init