Symptom: every Restore-UDCData log entry showed bay-level files as 'absent' even when they actually existed on the share - on a device where another PC's run had successfully consumed and migrated the same backup. Endless 'no work this cycle' loop on the device that should have done the consume. Cause: script ran as NT AUTHORITY\SYSTEM (manifest engine on logon). SYSTEM authenticates to remote SMB as the COMPUTER ACCOUNT (DOMAIN\HOSTNAME$), not as a user. The SFLD share's ACL grants top-level enumeration to authenticated computers (so Test-Path on share root + bay dir returned True) but file-level read only to the SFLD user. With no explicit user creds, Test-Path on bay-level files returns False - indistinguishable from 'file not found' - so the script silently logged 'absent' on files that actually exist. A different PC with proper creds consumed bay 3207 first; ours kept polling forever. Update-MachineNumber.ps1's branch already worked around this by calling Mount-SFLDShare (Restore-EDncReg.ps1's helper that reads HKLM:\SOFTWARE\GE\SFLD\Credentials\* and net-use's the share with the SFLD user identity). Fix: Restore-UDCData.ps1 now does the same. Replaces raw-UNC Test-Path polling with Mount-SFLDShare, probes via the W: drive letter, and unmounts on every exit path. If creds are missing in registry the script fails fast with a clear ERROR rather than masquerading as 'no backup'.
346 lines
14 KiB
PowerShell
346 lines
14 KiB
PowerShell
# Restore-UDCData.ps1 - Idempotent UDC data restore for the manifest engine.
|
|
#
|
|
# Triggered by the GE Shopfloor Enforce scheduled task (runs as SYSTEM, every
|
|
# user logon + every 5 min). Standard-machine manifest entry uses
|
|
# DetectionMethod=Always so this fires every cycle; the script self-decides
|
|
# whether there's actually any work to do.
|
|
#
|
|
# CONTRACT:
|
|
# - 99% of cycles: no backup waiting -> exit 0 in ~1 second, ~5 lines of log
|
|
# - 1 cycle (the one after Backup-UDCData lands a backup for this PC's bay):
|
|
# stop UDC, copy CurrentData.json + ArchivedData/ to C:\ProgramData\UDC,
|
|
# move consumed backup to <bay>\migrated\<timestamp>\, write
|
|
# restore.manifest.json, restart UDC. After this, root is empty so the
|
|
# check returns "no backup waiting" again on subsequent cycles.
|
|
#
|
|
# DESIGNED FOR THE SWAP WORKFLOW:
|
|
# New PC gets pre-imaged with real machine number + locked down, sits in
|
|
# storage. Days/weeks later, tech runs Backup-UDCData on old PC -> backup
|
|
# lands on share. Tech swaps PCs. New PC powers on at the bay -> ShopFloor
|
|
# autologon -> manifest engine fires this script -> backup detected ->
|
|
# restored -> UDC opens with prior history intact.
|
|
#
|
|
# Replaces the placeholder->real trigger in Update-MachineNumber.ps1 for the
|
|
# pre-imaged-then-swapped case (where the trigger fired at imaging time, before
|
|
# the backup existed). Update-MachineNumber.ps1's branch still handles the
|
|
# secondary case (tech used 9999 placeholder + sets number at bay) - both
|
|
# triggers safely no-op if the other already consumed the backup.
|
|
#
|
|
# LOGGING:
|
|
# Single rotating log at C:\Logs\UDC\Restore-UDCData.log (1 MB cap, rotated
|
|
# to .old.log on overflow). Every cycle writes a header line so even the
|
|
# silent no-op path leaves a trace. Errors include full exception type,
|
|
# position, and stack trace.
|
|
|
|
[CmdletBinding()]
|
|
param(
|
|
[string]$BackupShareRoot = '\\tsgwp00525.wjs.geaerospace.net\shared\dt\shopfloor\backup\udc',
|
|
[string]$UdcDataDir = 'C:\ProgramData\UDC',
|
|
[string]$UdcExePath = 'C:\Program Files\UDC\UDC.exe',
|
|
[string]$UdcSettingsPath = 'C:\ProgramData\UDC\udc_settings.json',
|
|
[string]$Site = 'West Jefferson',
|
|
# Share can take 20-60s to become reachable from SYSTEM context after a
|
|
# cold boot or fresh logon. Retry until then before deciding "no backup".
|
|
[int]$ShareTimeoutSec = 60,
|
|
[int]$SharePollSec = 3
|
|
)
|
|
|
|
$ErrorActionPreference = 'Continue'
|
|
|
|
# -- Logging setup --------------------------------------------------------
|
|
$logDir = 'C:\Logs\UDC'
|
|
try {
|
|
if (-not (Test-Path $logDir)) { New-Item -Path $logDir -ItemType Directory -Force | Out-Null }
|
|
} catch { $logDir = $env:TEMP }
|
|
$logFile = Join-Path $logDir 'Restore-UDCData.log'
|
|
$logFileMaxBytes = 1MB
|
|
|
|
# Rotate log file if oversized (keeps one prior generation)
|
|
try {
|
|
if ((Test-Path $logFile) -and ((Get-Item $logFile).Length -gt $logFileMaxBytes)) {
|
|
$rotated = Join-Path $logDir 'Restore-UDCData.old.log'
|
|
if (Test-Path $rotated) { Remove-Item $rotated -Force -ErrorAction SilentlyContinue }
|
|
Rename-Item -Path $logFile -NewName 'Restore-UDCData.old.log' -Force -ErrorAction SilentlyContinue
|
|
}
|
|
} catch {}
|
|
|
|
function Log {
|
|
param([string]$Msg, [string]$Level = 'INFO')
|
|
$ts = Get-Date -Format 'yyyy-MM-dd HH:mm:ss.fff'
|
|
$line = "[$ts][$Level] $Msg"
|
|
try { Add-Content -LiteralPath $logFile -Value $line -ErrorAction SilentlyContinue } catch {}
|
|
Write-Host $line
|
|
}
|
|
|
|
function LogErr {
|
|
param($Err)
|
|
if (-not $Err) { return }
|
|
$exType = if ($Err.Exception) { $Err.Exception.GetType().FullName } else { '<no exception>' }
|
|
$exMsg = if ($Err.Exception) { $Err.Exception.Message } else { "$Err" }
|
|
Log " exception: $exType - $exMsg" 'ERROR'
|
|
if ($Err.InvocationInfo -and $Err.InvocationInfo.PositionMessage) {
|
|
$pos = ($Err.InvocationInfo.PositionMessage -replace "`r?`n", ' | ')
|
|
Log " at: $pos" 'ERROR'
|
|
}
|
|
if ($Err.ScriptStackTrace) {
|
|
$st = ($Err.ScriptStackTrace -replace "`r?`n", ' | ')
|
|
Log " stack: $st" 'ERROR'
|
|
}
|
|
if ($Err.Exception -and $Err.Exception.InnerException) {
|
|
Log " inner: $($Err.Exception.InnerException.Message)" 'ERROR'
|
|
}
|
|
}
|
|
|
|
Log '==============================================='
|
|
Log "Restore-UDCData starting (PID $PID)"
|
|
Log "Hostname: $env:COMPUTERNAME"
|
|
try {
|
|
$whoami = [System.Security.Principal.WindowsIdentity]::GetCurrent().Name
|
|
} catch { $whoami = '<unknown>' }
|
|
Log "User identity: $whoami"
|
|
Log "PowerShell version: $($PSVersionTable.PSVersion)"
|
|
Log "BackupShareRoot: $BackupShareRoot"
|
|
Log "UdcDataDir: $UdcDataDir"
|
|
Log "UdcSettingsPath: $UdcSettingsPath"
|
|
Log "ShareTimeoutSec: $ShareTimeoutSec SharePollSec: $SharePollSec"
|
|
|
|
# -- Resolve local machine number ----------------------------------------
|
|
if (-not (Test-Path -LiteralPath $UdcSettingsPath)) {
|
|
Log "udc_settings.json not present - UDC not installed yet, no work to do."
|
|
Log 'Exit 0.'
|
|
exit 0
|
|
}
|
|
try {
|
|
$json = Get-Content -LiteralPath $UdcSettingsPath -Raw -ErrorAction Stop | ConvertFrom-Json -ErrorAction Stop
|
|
$mn = $json.GeneralSettings.MachineNumber
|
|
Log "Resolved MachineNumber from udc_settings: $mn"
|
|
} catch {
|
|
Log "Failed to parse $UdcSettingsPath" 'ERROR'
|
|
LogErr $_
|
|
Log 'Exit 0.'
|
|
exit 0
|
|
}
|
|
if (-not $mn -or $mn -eq '9999' -or $mn -notmatch '^\d+$') {
|
|
Log "Machine number is placeholder/empty/non-numeric ('$mn'). Update-MachineNumber.ps1's branch will catch the placeholder->real transition. No work to do."
|
|
Log 'Exit 0.'
|
|
exit 0
|
|
}
|
|
|
|
# -- Mount share with SFLD user creds -----------------------------------
|
|
# This script runs as NT AUTHORITY\SYSTEM (manifest engine on logon, or
|
|
# scheduled task). SYSTEM authenticates to remote SMB as the COMPUTER
|
|
# ACCOUNT (DOMAIN\HOSTNAME$), not as a user. The SFLD share's ACLs grant
|
|
# top-level enumeration to authenticated computers but file-level read
|
|
# only to a specific SFLD user. Without explicit user creds, Test-Path
|
|
# on bay-level files returns False (access denied = same return as not-
|
|
# found), making the script silently log "absent" when the files in fact
|
|
# exist. Symptom: Restore-UDCData.log shows endless "no work this cycle"
|
|
# while another PC (or a user-context invocation) successfully consumes
|
|
# the backup. Fix: mount the share with explicit SFLD creds from
|
|
# HKLM:\SOFTWARE\GE\SFLD\Credentials and probe via the drive letter.
|
|
. (Join-Path $PSScriptRoot '..\Shopfloor\lib\Restore-EDncReg.ps1')
|
|
|
|
Log "Mounting share with SFLD creds: $BackupShareRoot -> W:"
|
|
$shareMounted = $false
|
|
$sw = [Diagnostics.Stopwatch]::StartNew()
|
|
while ($sw.Elapsed.TotalSeconds -lt $ShareTimeoutSec) {
|
|
if (Mount-SFLDShare -SharePath $BackupShareRoot -DriveLetter 'W:') {
|
|
$shareMounted = $true
|
|
break
|
|
}
|
|
Start-Sleep -Seconds $SharePollSec
|
|
}
|
|
$sw.Stop()
|
|
if ($shareMounted) {
|
|
Log ("Share mounted as W: after {0:N1} s" -f $sw.Elapsed.TotalSeconds)
|
|
} else {
|
|
Log "Mount-SFLDShare failed after $ShareTimeoutSec s. SFLD creds may be missing in HKLM:\SOFTWARE\GE\SFLD\Credentials, or the share is unreachable. Exiting non-zero so the dispatcher logs a failure." 'ERROR'
|
|
Log 'Exit 1.'
|
|
exit 1
|
|
}
|
|
|
|
# All bay-level paths now go through W: (authenticated as SFLD user) so
|
|
# Test-Path returns the truth, not access-denied-False.
|
|
$bayDir = Join-Path 'W:\' $mn
|
|
$srcCur = Join-Path $bayDir 'CurrentData.json'
|
|
$srcArc = Join-Path $bayDir 'ArchivedData'
|
|
Log "Probing backup paths for bay $mn"
|
|
Log " bayDir: $bayDir"
|
|
$bayDirExists = Test-Path -LiteralPath $bayDir
|
|
Log " bayDir exists: $bayDirExists"
|
|
$srcCurExists = Test-Path -LiteralPath $srcCur
|
|
Log " CurrentData.json src: $(if ($srcCurExists) { 'present' } else { 'absent' }) - $srcCur"
|
|
$srcArcExists = Test-Path -LiteralPath $srcArc
|
|
Log " ArchivedData/ src: $(if ($srcArcExists) { 'present' } else { 'absent' }) - $srcArc"
|
|
|
|
if (-not $srcCurExists -and -not $srcArcExists) {
|
|
Log "No backup waiting for bay $mn (neither CurrentData.json nor ArchivedData\ at bay root) - no work to do this cycle."
|
|
& net use W: /delete /y 2>$null | Out-Null
|
|
Log 'Exit 0.'
|
|
exit 0
|
|
}
|
|
if (-not $srcCurExists) {
|
|
Log "Partial backup waiting (ArchivedData\ present, CurrentData.json absent). Will restore ArchivedData\ only. Source PC may have had no live UDC session at backup time, or backup partially failed." 'WARN'
|
|
}
|
|
if (-not $srcArcExists) {
|
|
Log "Partial backup waiting (CurrentData.json present, ArchivedData\ absent). Will restore CurrentData.json only." 'WARN'
|
|
}
|
|
|
|
# -- We have a backup. Restore. ------------------------------------------
|
|
Log "Backup waiting at $bayDir - proceeding with restore"
|
|
|
|
# Stop UDC.exe so CurrentData.json isn't locked
|
|
$udcProcs = @(Get-Process UDC -ErrorAction SilentlyContinue)
|
|
Log "UDC processes currently running: $($udcProcs.Count)"
|
|
foreach ($p in $udcProcs) {
|
|
try {
|
|
Log " stopping UDC.exe PID $($p.Id)"
|
|
$p.Kill()
|
|
$p.WaitForExit(5000) | Out-Null
|
|
Log " stopped"
|
|
} catch {
|
|
Log " could not stop UDC.exe PID $($p.Id)" 'WARN'
|
|
LogErr $_
|
|
}
|
|
}
|
|
Start-Sleep -Milliseconds 500
|
|
|
|
# Ensure local UDC data dir exists
|
|
if (-not (Test-Path -LiteralPath $UdcDataDir)) {
|
|
Log "Creating local UDC data dir: $UdcDataDir"
|
|
try {
|
|
New-Item -Path $UdcDataDir -ItemType Directory -Force | Out-Null
|
|
} catch {
|
|
Log "Failed to create $UdcDataDir - cannot continue" 'ERROR'
|
|
LogErr $_
|
|
& net use W: /delete /y 2>$null | Out-Null
|
|
Log 'Exit 1.'
|
|
exit 1
|
|
}
|
|
}
|
|
$localCur = Join-Path $UdcDataDir 'CurrentData.json'
|
|
$localArc = Join-Path $UdcDataDir 'ArchivedData'
|
|
|
|
# Copy CurrentData.json (only if present at source)
|
|
$copiedCur = $false
|
|
if ($srcCurExists) {
|
|
Log "Copying CurrentData.json"
|
|
Log " src: $srcCur"
|
|
Log " dst: $localCur"
|
|
try {
|
|
Copy-Item -LiteralPath $srcCur -Destination $localCur -Force -ErrorAction Stop
|
|
$copiedCur = $true
|
|
$sz = (Get-Item -LiteralPath $localCur).Length
|
|
Log " OK ($sz bytes)"
|
|
} catch {
|
|
Log " FAILED" 'ERROR'
|
|
LogErr $_
|
|
}
|
|
} else {
|
|
Log "CurrentData.json not present in backup - skipping that copy step"
|
|
}
|
|
|
|
# Copy ArchivedData/
|
|
$copiedArc = $false
|
|
$arcFiles = 0
|
|
$arcBytes = 0
|
|
if ($srcArcExists) {
|
|
Log "Copying ArchivedData/"
|
|
Log " src: $srcArc"
|
|
Log " dst: $localArc"
|
|
try {
|
|
if (Test-Path -LiteralPath $localArc) {
|
|
Log " removing existing $localArc"
|
|
Remove-Item -LiteralPath $localArc -Recurse -Force -ErrorAction SilentlyContinue
|
|
}
|
|
Copy-Item -LiteralPath $srcArc -Destination $localArc -Recurse -Force -ErrorAction Stop
|
|
$arcItems = Get-ChildItem -LiteralPath $localArc -Recurse -File -ErrorAction SilentlyContinue
|
|
$arcFiles = @($arcItems).Count
|
|
$arcBytes = ($arcItems | Measure-Object Length -Sum).Sum
|
|
$copiedArc = $true
|
|
Log " OK ($arcFiles files, $arcBytes bytes)"
|
|
} catch {
|
|
Log " FAILED" 'ERROR'
|
|
LogErr $_
|
|
}
|
|
} else {
|
|
Log "ArchivedData/ not present in backup - skipping that copy step"
|
|
}
|
|
|
|
# One-shot consumption: only consume when every present source has been
|
|
# successfully copied. If a source was absent we don't fault on it; if a
|
|
# source was present but copy failed, we leave the live backup for retry.
|
|
# Must have copied at least one thing to consume.
|
|
$consumeOk = (($copiedCur -or -not $srcCurExists) -and `
|
|
($copiedArc -or -not $srcArcExists) -and `
|
|
($copiedCur -or $copiedArc))
|
|
Log "consumeOk=$consumeOk (copiedCur=$copiedCur, copiedArc=$copiedArc, srcCurExists=$srcCurExists, srcArcExists=$srcArcExists)"
|
|
|
|
if ($consumeOk) {
|
|
try {
|
|
$stamp = Get-Date -Format 'yyyy-MM-ddTHH-mm-ssZ'
|
|
$migDir = Join-Path $bayDir 'migrated'
|
|
$migStamp = Join-Path $migDir $stamp
|
|
Log "Moving consumed backup to $migStamp"
|
|
if (-not (Test-Path -LiteralPath $migDir)) { New-Item -ItemType Directory -Path $migDir -Force | Out-Null }
|
|
if (-not (Test-Path -LiteralPath $migStamp)) { New-Item -ItemType Directory -Path $migStamp -Force | Out-Null }
|
|
|
|
if (Test-Path -LiteralPath $srcCur) {
|
|
Move-Item -LiteralPath $srcCur -Destination (Join-Path $migStamp 'CurrentData.json') -Force -ErrorAction Stop
|
|
Log " moved CurrentData.json"
|
|
}
|
|
if (Test-Path -LiteralPath $srcArc) {
|
|
Move-Item -LiteralPath $srcArc -Destination (Join-Path $migStamp 'ArchivedData') -Force -ErrorAction Stop
|
|
Log " moved ArchivedData/"
|
|
}
|
|
$bakManifest = Join-Path $bayDir 'backup.manifest.json'
|
|
if (Test-Path -LiteralPath $bakManifest) {
|
|
Move-Item -LiteralPath $bakManifest -Destination (Join-Path $migStamp 'backup.manifest.json') -Force -ErrorAction SilentlyContinue
|
|
Log " moved backup.manifest.json"
|
|
}
|
|
|
|
$restoreManifest = [ordered]@{
|
|
RestoredAt = (Get-Date -Format 'o')
|
|
DestinationHostname = $env:COMPUTERNAME
|
|
DestinationUser = $whoami
|
|
MachineNumber = $mn
|
|
CurrentDataPresent = $copiedCur
|
|
CurrentDataBytes = if ($copiedCur) { (Get-Item -LiteralPath $localCur).Length } else { 0 }
|
|
ArchivedDataPresent = $copiedArc
|
|
ArchivedDataFiles = $arcFiles
|
|
ArchivedDataBytes = $arcBytes
|
|
RestoredVia = 'Restore-UDCData.ps1 (manifest engine, on logon)'
|
|
}
|
|
$restoreManifest | ConvertTo-Json | Set-Content -Path (Join-Path $migStamp 'restore.manifest.json') -Encoding UTF8
|
|
Log " wrote restore.manifest.json"
|
|
|
|
Log "Backup consumed -> migrated\$stamp\"
|
|
} catch {
|
|
Log "Move-to-migrated FAILED (data IS restored locally; live backup remains, next cycle will retry consumption)" 'ERROR'
|
|
LogErr $_
|
|
}
|
|
} else {
|
|
Log "Restore incomplete - leaving live backup at $bayDir for retry next cycle." 'WARN'
|
|
}
|
|
|
|
# Relaunch UDC with the current machine number args. UDC's vendor autostart in
|
|
# HKLM\Run will also fire on the next user logon, so this is belt-and-suspenders
|
|
# for the same-session case (e.g. tech is at the keyboard during the restore).
|
|
if ((Test-Path -LiteralPath $UdcExePath) -and ($copiedCur -or $copiedArc)) {
|
|
Log "Relaunching UDC.exe: `"$Site`" -$mn"
|
|
try {
|
|
Start-Process -FilePath $UdcExePath -ArgumentList @("`"$Site`"", "-$mn")
|
|
Log " relaunched"
|
|
} catch {
|
|
Log " UDC relaunch FAILED" 'WARN'
|
|
LogErr $_
|
|
}
|
|
}
|
|
|
|
# Unmount the SFLD-creds-mounted drive so we don't leave a stale net-use entry
|
|
& net use W: /delete /y 2>$null | Out-Null
|
|
|
|
Log 'Exit 0.'
|
|
Log '==============================================='
|
|
exit 0
|