-- ===================================================== -- CLEANUP: Consolidate Duplicate Vendors and Models -- ===================================================== -- Purpose: Remove duplicate vendors/models caused by case/spacing differences -- WARNING: This will modify data. BACKUP FIRST! -- ===================================================== USE shopdb; SET SQL_SAFE_UPDATES = 0; -- ===================================================== -- STEP 1: Show Duplicate Vendors -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 1: Analyzing Duplicate Vendors' AS ''; SELECT '============================================' AS ''; SELECT LOWER(TRIM(vendor)) as normalized_name, COUNT(*) as duplicate_count, GROUP_CONCAT(vendor ORDER BY vendorid SEPARATOR ' | ') as variations, GROUP_CONCAT(vendorid ORDER BY vendorid) as vendor_ids FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ORDER BY duplicate_count DESC, normalized_name; -- Count machines affected SELECT 'Machines using duplicate vendors:' as status, COUNT(DISTINCT m.machineid) as machine_count FROM machines m JOIN models mo ON m.modelnumberid = mo.modelnumberid JOIN vendors v ON mo.vendorid = v.vendorid WHERE v.vendorid IN ( SELECT vendorid FROM vendors v2 WHERE LOWER(TRIM(v2.vendor)) IN ( SELECT LOWER(TRIM(vendor)) FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ) ); -- ===================================================== -- STEP 2: Show Duplicate Models -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 2: Analyzing Duplicate Models' AS ''; SELECT '============================================' AS ''; SELECT REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', '') as normalized_model, v.vendor, COUNT(*) as duplicate_count, GROUP_CONCAT(modelnumber ORDER BY modelnumberid SEPARATOR ' | ') as variations, GROUP_CONCAT(modelnumberid ORDER BY modelnumberid) as model_ids FROM models m JOIN vendors v ON m.vendorid = v.vendorid WHERE m.isactive = 1 GROUP BY REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', ''), m.vendorid HAVING COUNT(*) > 1 ORDER BY duplicate_count DESC, normalized_model; -- ===================================================== -- STEP 3: Consolidate Duplicate Vendors (DRY RUN) -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 3: Vendor Consolidation Plan' AS ''; SELECT '============================================' AS ''; -- This shows what WOULD be updated (DRY RUN) SELECT 'KEEP vendorid:' as action, MIN(vendorid) as keep_id, LOWER(TRIM(vendor)) as normalized_name, GROUP_CONCAT(vendor ORDER BY vendorid SEPARATOR ' -> ') as consolidating, GROUP_CONCAT(CASE WHEN vendorid != MIN(vendorid) THEN vendorid END) as will_delete_ids, COUNT(*) - 1 as duplicates_to_remove FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1; -- Show models that will be updated SELECT 'Models to be updated:' as action, mo.modelnumberid, mo.modelnumber, v_old.vendorid as old_vendor_id, v_old.vendor as old_vendor_name, v_new.vendorid as new_vendor_id, v_new.vendor as new_vendor_name FROM models mo JOIN vendors v_old ON mo.vendorid = v_old.vendorid JOIN ( SELECT MIN(vendorid) as keep_id, LOWER(TRIM(vendor)) as norm FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ) keepers ON LOWER(TRIM(v_old.vendor)) = keepers.norm JOIN vendors v_new ON v_new.vendorid = keepers.keep_id WHERE v_old.vendorid != v_new.vendorid ORDER BY mo.modelnumberid; -- ===================================================== -- STEP 4: Execute Vendor Consolidation -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 4: Executing Vendor Consolidation' AS ''; SELECT '============================================' AS ''; -- Update models to point to keeper vendor UPDATE models mo JOIN vendors v_old ON mo.vendorid = v_old.vendorid JOIN ( SELECT MIN(vendorid) as keep_id, LOWER(TRIM(vendor)) as norm FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ) keepers ON LOWER(TRIM(v_old.vendor)) = keepers.norm SET mo.vendorid = keepers.keep_id WHERE v_old.vendorid != keepers.keep_id; SELECT ROW_COUNT() as models_updated; -- Mark duplicate vendors as inactive UPDATE vendors v JOIN ( SELECT vendorid FROM vendors v2 WHERE v2.isactive = 1 AND LOWER(TRIM(v2.vendor)) IN ( SELECT LOWER(TRIM(vendor)) FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ) AND v2.vendorid NOT IN ( SELECT MIN(vendorid) FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) ) ) dups ON v.vendorid = dups.vendorid SET v.isactive = 0; SELECT ROW_COUNT() as vendors_deactivated; -- ===================================================== -- STEP 5: Consolidate Duplicate Models -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 5: Executing Model Consolidation' AS ''; SELECT '============================================' AS ''; -- Update machines to point to keeper model (normalized: spaces, hyphens, underscores removed) UPDATE machines m JOIN models mo_old ON m.modelnumberid = mo_old.modelnumberid JOIN ( SELECT MIN(modelnumberid) as keep_id, REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', '') as norm, vendorid FROM models WHERE isactive = 1 GROUP BY REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', ''), vendorid HAVING COUNT(*) > 1 ) keepers ON REPLACE(REPLACE(REPLACE(LOWER(TRIM(mo_old.modelnumber)), ' ', ''), '-', ''), '_', '') = keepers.norm AND mo_old.vendorid = keepers.vendorid SET m.modelnumberid = keepers.keep_id WHERE mo_old.modelnumberid != keepers.keep_id; SELECT ROW_COUNT() as machines_updated; -- Mark duplicate models as inactive UPDATE models mo JOIN ( SELECT modelnumberid FROM models mo2 WHERE mo2.isactive = 1 AND (REPLACE(REPLACE(REPLACE(LOWER(TRIM(mo2.modelnumber)), ' ', ''), '-', ''), '_', ''), mo2.vendorid) IN ( SELECT REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', ''), vendorid FROM models WHERE isactive = 1 GROUP BY REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', ''), vendorid HAVING COUNT(*) > 1 ) AND mo2.modelnumberid NOT IN ( SELECT MIN(modelnumberid) FROM models WHERE isactive = 1 GROUP BY REPLACE(REPLACE(REPLACE(LOWER(TRIM(modelnumber)), ' ', ''), '-', ''), '_', ''), vendorid ) ) dups ON mo.modelnumberid = dups.modelnumberid SET mo.isactive = 0; SELECT ROW_COUNT() as models_deactivated; -- ===================================================== -- STEP 6: Verification -- ===================================================== SELECT '============================================' AS ''; SELECT 'STEP 6: Verification' AS ''; SELECT '============================================' AS ''; SELECT 'Active vendors after cleanup:' as status, COUNT(*) as count FROM vendors WHERE isactive = 1; SELECT 'Active models after cleanup:' as status, COUNT(*) as count FROM models WHERE isactive = 1; SELECT 'Remaining duplicate vendors:' as status, COUNT(*) as count FROM ( SELECT LOWER(TRIM(vendor)) as norm FROM vendors WHERE isactive = 1 GROUP BY LOWER(TRIM(vendor)) HAVING COUNT(*) > 1 ) dup_check; SELECT 'Remaining duplicate models:' as status, COUNT(*) as count FROM ( SELECT LOWER(TRIM(modelnumber)), vendorid FROM models WHERE isactive = 1 GROUP BY LOWER(TRIM(modelnumber)), vendorid HAVING COUNT(*) > 1 ) dup_check; SET SQL_SAFE_UPDATES = 1; SELECT '============================================' AS ''; SELECT 'DRY RUN COMPLETE' AS ''; SELECT 'Review results above, then uncomment' AS ''; SELECT 'STEP 4 and STEP 5 to execute cleanup' AS ''; SELECT '============================================' AS '';