""" Verify data migration integrity. This script compares record counts between source and target databases and performs spot checks on data integrity. Usage: python -m scripts.migration.verify_migration --source """ import argparse import logging from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def verify_counts(source_session, target_session): """Compare record counts between source and target.""" results = {} # Define table mappings (source -> target) table_mappings = [ ('machines', 'assets', 'Machine to Asset'), ('communications', 'communications', 'Communications'), ('vendors', 'vendors', 'Vendors'), ('locations', 'locations', 'Locations'), ('businessunits', 'businessunits', 'Business Units'), ] for source_table, target_table, description in table_mappings: try: source_count = source_session.execute(text(f"SELECT COUNT(*) FROM {source_table}")).scalar() except Exception as e: source_count = f"Error: {e}" try: target_count = target_session.execute(text(f"SELECT COUNT(*) FROM {target_table}")).scalar() except Exception as e: target_count = f"Error: {e}" match = source_count == target_count if isinstance(source_count, int) and isinstance(target_count, int) else False results[description] = { 'source': source_count, 'target': target_count, 'match': match } return results def verify_sample_records(source_session, target_session, sample_size=10): """Spot-check sample records for data integrity.""" issues = [] # Sample machine -> asset migration try: sample_machines = source_session.execute(text(f""" SELECT machineid, machinenumber, serialnumber, alias FROM machines ORDER BY RAND() LIMIT {sample_size} """)) for machine in sample_machines: machine_dict = dict(machine._mapping) # Check if asset exists with same ID asset = target_session.execute(text(""" SELECT assetid, assetnumber, serialnumber, name FROM assets WHERE assetid = :assetid """), {'assetid': machine_dict['machineid']}).fetchone() if not asset: issues.append(f"Machine {machine_dict['machineid']} not found in assets") continue asset_dict = dict(asset._mapping) # Verify data matches if machine_dict['machinenumber'] != asset_dict['assetnumber']: issues.append(f"Asset {asset_dict['assetid']}: machinenumber mismatch") if machine_dict.get('serialnumber') != asset_dict.get('serialnumber'): issues.append(f"Asset {asset_dict['assetid']}: serialnumber mismatch") except Exception as e: issues.append(f"Could not verify machines: {e}") return issues def run_verification(source_conn_str, target_conn_str): """ Run migration verification. Args: source_conn_str: Source database connection string target_conn_str: Target database connection string """ source_engine = create_engine(source_conn_str) target_engine = create_engine(target_conn_str) SourceSession = sessionmaker(bind=source_engine) TargetSession = sessionmaker(bind=target_engine) source_session = SourceSession() target_session = TargetSession() try: logger.info("=" * 60) logger.info("MIGRATION VERIFICATION REPORT") logger.info("=" * 60) # Verify counts logger.info("\nRecord Count Comparison:") logger.info("-" * 40) counts = verify_counts(source_session, target_session) all_match = True for table, result in counts.items(): status = "OK" if result['match'] else "MISMATCH" if not result['match']: all_match = False logger.info(f" {table}: Source={result['source']}, Target={result['target']} [{status}]") # Verify sample records logger.info("\nSample Record Verification:") logger.info("-" * 40) issues = verify_sample_records(source_session, target_session) if issues: for issue in issues: logger.warning(f" ! {issue}") else: logger.info(" All sample records verified OK") # Summary logger.info("\n" + "=" * 60) if all_match and not issues: logger.info("VERIFICATION PASSED - Migration looks good!") else: logger.warning("VERIFICATION FOUND ISSUES - Review above") logger.info("=" * 60) finally: source_session.close() target_session.close() def main(): parser = argparse.ArgumentParser(description='Verify migration integrity') parser.add_argument('--source', required=True, help='Source database connection string') parser.add_argument('--target', help='Target database connection string') args = parser.parse_args() target = args.target if not target: import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) from shopdb import create_app app = create_app() target = app.config['SQLALCHEMY_DATABASE_URI'] run_verification(args.source, target) if __name__ == '__main__': main()