"""
Parse Blancco Drive Eraser XML report into a friendly Python dict.

Blancco encodes everything as <entries name="X"><entry name="K" type="T">V</entry>...
which is too noisy to read directly in a template. Walking the tree once into
nested dicts/lists lets the template render fields by their semantic name
without any XPath-style noise.

Returned shape (best-effort; missing keys are OK in templates):
    {
      "meta":       {document_id, date, product_name, product_version,
                     product_revision, integrity},
      "company":    {business_location, business_name, customer_license, ...},
      "license_consumption_ids": [str, ...],
      "erasures":   [ {erasure_id, timestamp, target:{...}, state,
                       elapsed_time, erasure_standard_name, step:[...], ...} ],
      "hardware":   {system:{...}, bios:{...}, processors:{...}, ...},
    }
"""
from __future__ import annotations
import xml.etree.ElementTree as ET


def _walk(node):
    # Recursive walk over Blancco's <entry|entries name="K"> children.
    # Repeated names collapse to a list.
    result = {}
    for child in node:
        name = child.attrib.get("name") or child.tag
        if child.tag == "entry":
            val = (child.text or "").strip()
        elif child.tag == "entries":
            val = _walk(child)
        else:
            continue
        if name in result:
            existing = result[name]
            if not isinstance(existing, list):
                result[name] = [existing]
            result[name].append(val)
        else:
            result[name] = val
    return result


def _text(parent, path, default=""):
    if parent is None:
        return default
    el = parent.find(path)
    if el is None or el.text is None:
        return default
    return el.text.strip()


def parse(path: str) -> dict:
    tree = ET.parse(path)
    root = tree.getroot()
    rep = root.find(".//report/blancco_data") or root

    out = {
        "meta": {},
        "company": {},
        "license_consumption_ids": [],
        "erasures": [],
        "hardware": {},
    }

    desc = rep.find("description")
    if desc is not None:
        out["meta"]["document_id"] = _text(desc, "document_id")
        log_entry = desc.find(".//log_entry")
        if log_entry is not None:
            author = log_entry.find("author")
            if author is not None:
                out["meta"]["product_name"]     = _text(author, "product_name")
                out["meta"]["product_version"]  = _text(author, "product_version")
                out["meta"]["product_revision"] = _text(author, "product_revision")
            out["meta"]["date"]      = _text(log_entry, "date")
            out["meta"]["integrity"] = _text(log_entry, "integrity")

        company = desc.find(".//entry[@name='description_entries']/entries[@name='company_information']")
        if company is not None:
            for e in company.findall("entry"):
                k = e.attrib.get("name", "")
                if k:
                    out["company"][k] = (e.text or "").strip()

        lcids = desc.find(".//entry[@name='description_entries']/entries[@name='license_consumption_ids']")
        if lcids is not None:
            for e in lcids.findall("entry"):
                v = (e.text or "").strip()
                if v:
                    out["license_consumption_ids"].append(v)

    for er in rep.findall(".//blancco_erasure_report//entries[@name='erasure']"):
        out["erasures"].append(_walk(er))

    hw = rep.find("blancco_erasure_report/../blancco_hardware_report") or rep.find(".//blancco_hardware_report")
    if hw is not None:
        for sub in hw:
            if sub.tag != "entries":
                continue
            out["hardware"][sub.attrib.get("name", "")] = _walk(sub)

    return out