From 45ff163eea6675a82a85dd6401f5cb1783b9727d Mon Sep 17 00:00:00 2001 From: cproudlock Date: Fri, 10 Apr 2026 10:58:57 -0400 Subject: [PATCH] Fix reboot race + dispatcher owns all reboots Two related fixes from the pipeline audit: 1. Stage-Dispatcher race condition (critical): Run-ShopfloorSetup.ps1 called shutdown /r /t 10 and the dispatcher had to write the next stage + register RunOnce within that 10-second window. If disk I/O was slow, the reboot fired before RunOnce was registered, and the chain broke. Fix: dispatcher now cancels Run-ShopfloorSetup's pending reboot (shutdown /a) immediately after it returns, then advances the stage and registers RunOnce with no time pressure, then initiates its own shutdown /r /t 5. 2. Dispatcher owns all reboots: Run-ShopfloorSetup.ps1 now checks the -FromDispatcher flag at the end. When called by the dispatcher, it schedules shutdown /r /t 30 as a safety net (the dispatcher cancels it immediately). When called standalone (manual run or legacy FirstLogonCommands), it reboots directly with /t 10 as before. This means the dispatcher has full control over the reboot lifecycle: cancel -> advance stage -> register RunOnce -> reboot. No racing. Co-Authored-By: Claude Opus 4.6 (1M context) --- playbook/shopfloor-setup/Run-ShopfloorSetup.ps1 | 13 +++++++++++-- playbook/shopfloor-setup/Stage-Dispatcher.ps1 | 12 +++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/playbook/shopfloor-setup/Run-ShopfloorSetup.ps1 b/playbook/shopfloor-setup/Run-ShopfloorSetup.ps1 index 999911e..67decae 100644 --- a/playbook/shopfloor-setup/Run-ShopfloorSetup.ps1 +++ b/playbook/shopfloor-setup/Run-ShopfloorSetup.ps1 @@ -208,5 +208,14 @@ Write-Host "================================================================" # Flush transcript before shutdown so the log file is complete on next boot try { Stop-Transcript | Out-Null } catch {} -Write-Host "Rebooting in 10 seconds..." -shutdown /r /t 10 +if ($FromDispatcher) { + # Dispatcher owns the reboot — it cancels ours and reboots on its own + # terms after advancing the stage and re-registering RunOnce. We still + # schedule one as a safety net (dispatcher cancels it immediately). + Write-Host "Returning to Stage-Dispatcher for reboot." + shutdown /r /t 30 +} else { + # Standalone run (manual or legacy FirstLogonCommands) — reboot directly. + Write-Host "Rebooting in 10 seconds..." + shutdown /r /t 10 +} diff --git a/playbook/shopfloor-setup/Stage-Dispatcher.ps1 b/playbook/shopfloor-setup/Stage-Dispatcher.ps1 index 8087cef..5f7e999 100644 --- a/playbook/shopfloor-setup/Stage-Dispatcher.ps1 +++ b/playbook/shopfloor-setup/Stage-Dispatcher.ps1 @@ -86,18 +86,20 @@ switch ($stage) { break } - # Run-ShopfloorSetup.ps1 calls shutdown /r /t 10 at the end, which - # gives us a ~10 second window after it returns to advance the stage - # and re-register RunOnce before the reboot fires. # -FromDispatcher bypasses the stage-file gate at the top of # Run-ShopfloorSetup (which would otherwise see the stage file # and exit immediately thinking it should defer to us). & $script -FromDispatcher - Write-Host "Run-ShopfloorSetup.ps1 finished. Advancing stage to sync-intune." + # Cancel whatever reboot Run-ShopfloorSetup scheduled (shutdown /r + # /t 10) so we can advance the stage and re-register RunOnce WITHOUT + # racing a 10-second fuse. Then reboot on our own terms. + Write-Host "Run-ShopfloorSetup.ps1 finished. Canceling its reboot so we can advance safely." + cmd /c "shutdown /a 2>nul" *>$null Set-Content -LiteralPath $stageFile -Value 'sync-intune' -Force Register-NextRun - Write-Host "Reboot imminent (initiated by Run-ShopfloorSetup.ps1)." + Write-Host "Stage advanced to sync-intune. Rebooting." + shutdown /r /t 5 } 'sync-intune' {