#!/bin/bash # smb-soft-reset.sh - progressive soft recovery for the WinPE re-image # "System error 53 has occurred" issue without having to power-cycle the box. # Error 53 = network path not found; the TCP connect to :445 never # completes. Root cause is almost always stale kernel netfilter conntrack # state that systemctl restart smbd doesn't touch. Full reboot clears it; # this script tries to do the same without the reboot. # # Run as root (sudo) when the issue happens. It applies increasingly # aggressive fixes and pauses between steps so you can retry the WinPE # re-image without closing this shell. # # Step 1: reload smbd config (cheapest, may clear cached state) # Step 2: restart nmbd (NetBIOS daemon - separate from smbd) # Step 3: restart smbd (full smbd restart, kills all child sessions) # Step 4: kill any leftover smbd child processes that survived restart # Step 5: flush conntrack for 10.9.100.0/24 (kernel connection tracking) # Step 6: flush ARP / neighbour cache on br-pxe # Step 7: drop TCP sockets on port 445 via ss -K # Step 8: restart dnsmasq (DHCP/TFTP state as a last resort before reboot) set -e pause() { echo echo "----------------------------------------------------------------" echo "$1" echo " Try your WinPE re-image NOW. Did it work?" echo " [Enter] to continue to next step, Ctrl+C to stop." echo "----------------------------------------------------------------" read -r _ } if [ "$EUID" -ne 0 ]; then echo "Run as root: sudo $0" >&2 exit 1 fi echo "=== Step 1/8: reload smbd config ===" systemctl reload smbd 2>&1 || true pause "Step 1 done" echo "=== Step 2/8: restart nmbd ===" systemctl restart nmbd 2>&1 || true pause "Step 2 done" echo "=== Step 3/8: restart smbd ===" systemctl restart smbd 2>&1 pause "Step 3 done" echo "=== Step 4/8: kill any leftover smbd children ===" # systemctl restart should take care of this with KillMode=control-group, # but belt + suspenders since the issue has been surviving restarts. pkill -9 -x smbd 2>/dev/null || true sleep 1 systemctl start smbd 2>&1 pause "Step 4 done" echo "=== Step 5/8: flush conntrack entries for 10.9.100.0/24 ===" if command -v conntrack >/dev/null 2>&1; then conntrack -D -s 10.9.100.0/24 2>&1 || true conntrack -D -d 10.9.100.0/24 2>&1 || true else echo " conntrack tool not installed - skipping (apt install conntrack)" fi pause "Step 5 done" echo "=== Step 6/8: flush ARP / neighbour cache on br-pxe ===" ip neigh flush dev br-pxe 2>&1 || ip neigh flush all 2>&1 || true pause "Step 6 done" echo "=== Step 7/8: drop tcp sockets on port 445 ===" # ss -K requires iproute2 >= 4.11 and CONFIG_INET_DIAG_DESTROY in the kernel. # Drops existing TCP connections to port 445 without touching listening socket. ss -K dport = 445 2>&1 || echo " ss -K not supported on this system" ss -K sport = 445 2>&1 || true pause "Step 7 done" echo "=== Step 8/8: restart dnsmasq ===" systemctl restart dnsmasq 2>&1 pause "Step 8 done" echo echo "All soft-fix steps exhausted. If the issue is still present, you'll" echo "have to power-cycle. Before you do, run /usr/local/sbin/smb-diag.sh" echo "and share the /tmp/smb-diag-*.log output so we can pinpoint which" echo "kernel state the reboot is actually clearing."