diff --git a/sql/udc_retention.sql b/sql/udc_retention.sql new file mode 100644 index 0000000..4dadd18 --- /dev/null +++ b/sql/udc_retention.sql @@ -0,0 +1,286 @@ +-- ============================================================================ +-- UDC Data Retention & Summarization +-- Keeps daily summaries forever, purges raw data after 90 days +-- Created: 2025-12-16 +-- ============================================================================ + +-- Daily measurement summary (aggregated from udcmeasurements) +CREATE TABLE IF NOT EXISTS udcmeasurements_daily ( + summaryid INT AUTO_INCREMENT PRIMARY KEY, + summarydate DATE NOT NULL, + machinenumber VARCHAR(20) NOT NULL, + method VARCHAR(20), + eventtype VARCHAR(20), + measurement_count INT DEFAULT 0, + oot_count INT DEFAULT 0, + avg_deviation DECIMAL(12,6), + min_deviation DECIMAL(12,6), + max_deviation DECIMAL(12,6), + avg_actualval DECIMAL(12,6), + dateadded DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE INDEX uk_daily_machine_method (summarydate, machinenumber, method, eventtype), + INDEX idx_summarydate (summarydate), + INDEX idx_machinenumber (machinenumber) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +-- Daily event summary (aggregated from udcevents) +CREATE TABLE IF NOT EXISTS udcevents_daily ( + summaryid INT AUTO_INCREMENT PRIMARY KEY, + summarydate DATE NOT NULL, + machinenumber VARCHAR(20) NOT NULL, + eventtype VARCHAR(30), + event_count INT DEFAULT 0, + dateadded DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE INDEX uk_daily_machine_event (summarydate, machinenumber, eventtype), + INDEX idx_summarydate (summarydate), + INDEX idx_machinenumber (machinenumber) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +-- Daily tool data summary (aggregated from udctooldata) +CREATE TABLE IF NOT EXISTS udctooldata_daily ( + summaryid INT AUTO_INCREMENT PRIMARY KEY, + summarydate DATE NOT NULL, + machinenumber VARCHAR(20) NOT NULL, + toolnumber INT, + measurement_count INT DEFAULT 0, + oot_count INT DEFAULT 0, + avg_deviation DECIMAL(12,6), + max_deviation DECIMAL(12,6), + dateadded DATETIME DEFAULT CURRENT_TIMESTAMP, + UNIQUE INDEX uk_daily_machine_tool (summarydate, machinenumber, toolnumber), + INDEX idx_summarydate (summarydate), + INDEX idx_machinenumber (machinenumber), + INDEX idx_toolnumber (toolnumber) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +-- ============================================================================ +-- Stored Procedure: Summarize a specific date's data +-- ============================================================================ +DELIMITER // + +DROP PROCEDURE IF EXISTS sp_udc_summarize_date // +CREATE PROCEDURE sp_udc_summarize_date(IN p_date DATE) +BEGIN + -- Summarize measurements for the date + INSERT INTO udcmeasurements_daily + (summarydate, machinenumber, method, eventtype, measurement_count, oot_count, + avg_deviation, min_deviation, max_deviation, avg_actualval) + SELECT + DATE(m.eventtime) as summarydate, + s.machinenumber, + m.method, + m.eventtype, + COUNT(*) as measurement_count, + SUM(m.oot) as oot_count, + ROUND(AVG(m.deviation), 6) as avg_deviation, + ROUND(MIN(m.deviation), 6) as min_deviation, + ROUND(MAX(m.deviation), 6) as max_deviation, + ROUND(AVG(m.actualval), 6) as avg_actualval + FROM udcmeasurements m + JOIN udcsessions s ON m.sessionid = s.sessionid + WHERE DATE(m.eventtime) = p_date + GROUP BY DATE(m.eventtime), s.machinenumber, m.method, m.eventtype + ON DUPLICATE KEY UPDATE + measurement_count = VALUES(measurement_count), + oot_count = VALUES(oot_count), + avg_deviation = VALUES(avg_deviation), + min_deviation = VALUES(min_deviation), + max_deviation = VALUES(max_deviation), + avg_actualval = VALUES(avg_actualval), + dateadded = CURRENT_TIMESTAMP; + + -- Summarize events for the date + INSERT INTO udcevents_daily + (summarydate, machinenumber, eventtype, event_count) + SELECT + DATE(e.eventtime) as summarydate, + s.machinenumber, + e.eventtype, + COUNT(*) as event_count + FROM udcevents e + JOIN udcsessions s ON e.sessionid = s.sessionid + WHERE DATE(e.eventtime) = p_date + GROUP BY DATE(e.eventtime), s.machinenumber, e.eventtype + ON DUPLICATE KEY UPDATE + event_count = VALUES(event_count), + dateadded = CURRENT_TIMESTAMP; + + -- Summarize tool data for the date + INSERT INTO udctooldata_daily + (summarydate, machinenumber, toolnumber, measurement_count, oot_count, avg_deviation, max_deviation) + SELECT + DATE(t.eventtime) as summarydate, + s.machinenumber, + t.toolnumber, + COUNT(*) as measurement_count, + SUM(t.oot) as oot_count, + ROUND(AVG(t.deviation), 6) as avg_deviation, + ROUND(MAX(ABS(t.deviation)), 6) as max_deviation + FROM udctooldata t + JOIN udcsessions s ON t.sessionid = s.sessionid + WHERE DATE(t.eventtime) = p_date + GROUP BY DATE(t.eventtime), s.machinenumber, t.toolnumber + ON DUPLICATE KEY UPDATE + measurement_count = VALUES(measurement_count), + oot_count = VALUES(oot_count), + avg_deviation = VALUES(avg_deviation), + max_deviation = VALUES(max_deviation), + dateadded = CURRENT_TIMESTAMP; +END // + +DELIMITER ; + +-- ============================================================================ +-- Stored Procedure: Purge raw data older than retention period +-- ============================================================================ +DELIMITER // + +DROP PROCEDURE IF EXISTS sp_udc_purge_old_data // +CREATE PROCEDURE sp_udc_purge_old_data(IN p_retention_days INT) +BEGIN + DECLARE v_cutoff_date DATE; + DECLARE v_measurements_deleted INT DEFAULT 0; + DECLARE v_events_deleted INT DEFAULT 0; + DECLARE v_tooldata_deleted INT DEFAULT 0; + + SET v_cutoff_date = DATE_SUB(CURDATE(), INTERVAL p_retention_days DAY); + + -- Delete old measurements + DELETE FROM udcmeasurements WHERE DATE(eventtime) < v_cutoff_date; + SET v_measurements_deleted = ROW_COUNT(); + + -- Delete old events + DELETE FROM udcevents WHERE DATE(eventtime) < v_cutoff_date; + SET v_events_deleted = ROW_COUNT(); + + -- Delete old tool data + DELETE FROM udctooldata WHERE DATE(eventtime) < v_cutoff_date; + SET v_tooldata_deleted = ROW_COUNT(); + + -- Return counts + SELECT v_measurements_deleted AS measurements_deleted, + v_events_deleted AS events_deleted, + v_tooldata_deleted AS tooldata_deleted, + v_cutoff_date AS cutoff_date; +END // + +DELIMITER ; + +-- ============================================================================ +-- Stored Procedure: Daily maintenance (summarize yesterday, purge old) +-- Run this daily via scheduled task or cron +-- ============================================================================ +DELIMITER // + +DROP PROCEDURE IF EXISTS sp_udc_daily_maintenance // +CREATE PROCEDURE sp_udc_daily_maintenance(IN p_retention_days INT) +BEGIN + DECLARE v_yesterday DATE; + SET v_yesterday = DATE_SUB(CURDATE(), INTERVAL 1 DAY); + + -- Summarize yesterday's data + CALL sp_udc_summarize_date(v_yesterday); + + -- Purge old data + CALL sp_udc_purge_old_data(p_retention_days); +END // + +DELIMITER ; + +-- ============================================================================ +-- Stored Procedure: Backfill summaries for historical data +-- Run once to summarize all existing data before enabling purge +-- ============================================================================ +DELIMITER // + +DROP PROCEDURE IF EXISTS sp_udc_backfill_summaries // +CREATE PROCEDURE sp_udc_backfill_summaries() +BEGIN + DECLARE v_min_date DATE; + DECLARE v_max_date DATE; + DECLARE v_current_date DATE; + DECLARE v_count INT DEFAULT 0; + + -- Get date range from measurements + SELECT MIN(DATE(eventtime)), MAX(DATE(eventtime)) + INTO v_min_date, v_max_date + FROM udcmeasurements; + + IF v_min_date IS NOT NULL THEN + SET v_current_date = v_min_date; + + WHILE v_current_date <= v_max_date DO + CALL sp_udc_summarize_date(v_current_date); + SET v_current_date = DATE_ADD(v_current_date, INTERVAL 1 DAY); + SET v_count = v_count + 1; + END WHILE; + END IF; + + SELECT v_count AS days_summarized, v_min_date AS start_date, v_max_date AS end_date; +END // + +DELIMITER ; + +-- ============================================================================ +-- Views for summary data +-- ============================================================================ + +-- Weekly measurement trends from daily summaries +CREATE OR REPLACE VIEW vwudcmeasurements_weekly AS +SELECT + YEARWEEK(summarydate, 1) as yearweek, + MIN(summarydate) as week_start, + machinenumber, + method, + SUM(measurement_count) as measurement_count, + SUM(oot_count) as oot_count, + ROUND(SUM(oot_count) * 100.0 / SUM(measurement_count), 2) as oot_rate, + ROUND(AVG(avg_deviation), 6) as avg_deviation +FROM udcmeasurements_daily +GROUP BY YEARWEEK(summarydate, 1), machinenumber, method; + +-- Monthly measurement trends from daily summaries +CREATE OR REPLACE VIEW vwudcmeasurements_monthly AS +SELECT + DATE_FORMAT(summarydate, '%Y-%m') as yearmonth, + machinenumber, + method, + SUM(measurement_count) as measurement_count, + SUM(oot_count) as oot_count, + ROUND(SUM(oot_count) * 100.0 / SUM(measurement_count), 2) as oot_rate, + ROUND(AVG(avg_deviation), 6) as avg_deviation +FROM udcmeasurements_daily +GROUP BY DATE_FORMAT(summarydate, '%Y-%m'), machinenumber, method; + +-- Weekly event trends from daily summaries +CREATE OR REPLACE VIEW vwudcevents_weekly AS +SELECT + YEARWEEK(summarydate, 1) as yearweek, + MIN(summarydate) as week_start, + machinenumber, + eventtype, + SUM(event_count) as event_count +FROM udcevents_daily +GROUP BY YEARWEEK(summarydate, 1), machinenumber, eventtype; + +-- ============================================================================ +-- Usage Instructions +-- ============================================================================ +-- +-- 1. FIRST TIME SETUP (run once): +-- CALL sp_udc_backfill_summaries(); +-- This summarizes all existing data into daily tables. +-- +-- 2. DAILY MAINTENANCE (schedule to run daily): +-- CALL sp_udc_daily_maintenance(90); +-- This summarizes yesterday and purges data older than 90 days. +-- +-- 3. MANUAL SUMMARIZE (for a specific date): +-- CALL sp_udc_summarize_date('2025-12-15'); +-- +-- 4. MANUAL PURGE (with custom retention): +-- CALL sp_udc_purge_old_data(30); -- Keep only 30 days +-- +-- ============================================================================ + +SELECT 'UDC retention tables and procedures created successfully' AS status;