test harness: smoke-pass B-enforce, fix four issues

Harness now passes 9/9 across baseline + heal + idempotent phases on the
win11 VM (Standard/Machine), with 6 drift scenarios applied + healed
between the baseline and heal cycles in ~30s total.

Fixes:

1. lib/qga-run.py - extracted the qga round-trip out of an inline
   `python3 - <<PY` heredoc. The inline form clobbered stdin (heredoc
   replaces stdin to feed python the script, leaving sys.stdin empty
   for the PowerShell snippet the function caller piped in).
2. lib/qga.sh - dropped `set -euo pipefail`. When sourced, it leaked
   into the harness shell. Then any captured `out=$(qga_run_ps ...)`
   that exited non-zero (verify-state.ps1 returns 1 on any FAIL,
   normal during drift phases) would silently abort the harness.
   Callers handle non-zero with `|| rc=$?`.
3. B-enforce/run.sh do_verify - rewritten to capture rc, parse summary
   line, distinguish expect_pass=true vs false, route to ok / fail
   helper without aborting the harness on a normal non-zero verify.
4. matrix.json WJF Defect Tracker entry - switched detection from File
   to Registry (uninstall key DisplayVersion). The MSI does not drop
   the Defect_Tracker.exe artifact at the documented path even though
   the manifest's File detection treats it as installed; the uninstall
   reg entry is the reliable install marker. v2 manifest's File
   detection path may also need fixing, separate task.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
cproudlock
2026-05-02 17:45:06 -04:00
parent db1cdf7aee
commit eaf2dbf167
4 changed files with 79 additions and 51 deletions

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python3
# qga-run.py - run a PowerShell snippet inside the win11 VM via qemu-guest-agent.
# Stdin = PS snippet. Stdout = combined stdout, then optional STDERR block,
# then "--- exit N ---". Exit code 0 on PS rc 0, 1 otherwise.
import base64, json, os, subprocess, sys, time
DOMAIN = os.environ.get('VM_DOMAIN', 'win11')
TIMEOUT = int(os.environ.get('QGA_TIMEOUT', '300'))
def virsh(cmd):
p = subprocess.run(
['virsh', '-c', 'qemu:///system', 'qemu-agent-command', DOMAIN, json.dumps(cmd)],
capture_output=True, text=True, timeout=120,
)
if p.returncode != 0:
sys.exit(f"virsh err: {p.stderr.strip()}")
return json.loads(p.stdout)['return']
snippet = sys.stdin.read()
if not snippet.strip():
sys.exit("qga-run.py: empty PowerShell snippet on stdin")
args = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', snippet]
pid = virsh({'execute': 'guest-exec',
'arguments': {'path': 'powershell.exe', 'arg': args, 'capture-output': True}})['pid']
deadline = time.time() + TIMEOUT
while time.time() < deadline:
st = virsh({'execute': 'guest-exec-status', 'arguments': {'pid': pid}})
if st.get('exited'):
out = base64.b64decode(st.get('out-data', '')).decode('utf-8', 'replace')
err = base64.b64decode(st.get('err-data', '')).decode('utf-8', 'replace')
rc = st.get('exitcode')
sys.stdout.write(out)
if err:
sys.stdout.write('\n--- STDERR ---\n')
sys.stdout.write(err)
sys.stdout.write(f'\n--- exit {rc} ---\n')
sys.exit(0 if rc == 0 else 1)
time.sleep(0.5)
sys.exit(f"timeout waiting for pid {pid}")

View File

@@ -6,8 +6,13 @@
# All commands run as NT AUTHORITY\SYSTEM inside the VM (qemu-ga's service
# context). See reference-vm-qga-as-system memory note for why this is
# preferred over WinRM for dispatcher / manifest-engine tests.
set -euo pipefail
#
# Sourced by harness scripts. Deliberately does NOT enable set -e because
# qga_run_ps returns non-zero whenever the inner PowerShell exits non-zero
# (expected during drift/verify phases), and a sourced set -e would silently
# abort the calling shell on every $(qga_run_ps ...) capture of a failing run.
# Callers that want strict mode should set it themselves AND wrap qga_run_ps
# captures with `|| rc=$?` so the non-zero exit does not propagate.
VM_DOMAIN="${VM_DOMAIN:-win11}"
VM_IP="${VM_IP:-192.168.122.225}"
@@ -26,38 +31,17 @@ qga() {
|| die "qga call failed: $payload"
}
# Run a PowerShell snippet inside the VM as SYSTEM. Stdin = snippet,
# stdout = exit-decorated combined stdout+stderr from the PS process.
# Run a PowerShell snippet inside the VM as SYSTEM. Stdin = snippet.
# Stdout = combined PS stdout, then optional "--- STDERR ---" block,
# then "--- exit N ---". Returns 0 iff the PS process returned 0.
#
# Implementation note: keep the python script in a sibling file rather
# than inlined via heredoc. An inline `python3 - <<PY ... PY` redirects
# stdin to the heredoc, which clobbers the PowerShell snippet the
# caller piped in.
QGA_LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
qga_run_ps() {
python3 - <<'PY'
import base64, json, subprocess, sys, time, os
DOMAIN = os.environ.get('VM_DOMAIN', 'win11')
TIMEOUT = int(os.environ.get('QGA_TIMEOUT', '300'))
def virsh(cmd):
p = subprocess.run(['virsh','-c','qemu:///system','qemu-agent-command',DOMAIN, json.dumps(cmd)],
capture_output=True, text=True, timeout=120)
if p.returncode != 0:
sys.exit(f"virsh err: {p.stderr.strip()}")
return json.loads(p.stdout)['return']
snippet = sys.stdin.read()
args = ['-NoProfile','-ExecutionPolicy','Bypass','-Command', snippet]
pid = virsh({'execute':'guest-exec','arguments':{'path':'powershell.exe','arg':args,'capture-output':True}})['pid']
deadline = time.time() + TIMEOUT
while time.time() < deadline:
st = virsh({'execute':'guest-exec-status','arguments':{'pid':pid}})
if st.get('exited'):
out = base64.b64decode(st.get('out-data','')).decode('utf-8','replace')
err = base64.b64decode(st.get('err-data','')).decode('utf-8','replace')
rc = st.get('exitcode')
sys.stdout.write(out)
if err:
sys.stdout.write('\n--- STDERR ---\n')
sys.stdout.write(err)
sys.stdout.write(f'\n--- exit {rc} ---\n')
sys.exit(0 if rc == 0 else 1)
time.sleep(0.5)
sys.exit(f"timeout waiting for pid {pid}")
PY
python3 "$QGA_LIB_DIR/qga-run.py"
}
# Wait until qemu-ga responds to guest-ping