Add UDC data retention and summarization system

- Creates daily summary tables: udcmeasurements_daily, udcevents_daily, udctooldata_daily
- Stored procedures:
  - sp_udc_backfill_summaries(): One-time backfill of historical data
  - sp_udc_summarize_date(date): Summarize a specific date
  - sp_udc_purge_old_data(days): Purge raw data older than X days
  - sp_udc_daily_maintenance(days): Daily summarize + purge
- Weekly/monthly views for trend analysis
- Keeps summaries forever, purges raw data after retention period

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
cproudlock
2025-12-16 09:18:02 -05:00
parent ed52086732
commit 0d0e589ea4

286
sql/udc_retention.sql Normal file
View File

@@ -0,0 +1,286 @@
-- ============================================================================
-- UDC Data Retention & Summarization
-- Keeps daily summaries forever, purges raw data after 90 days
-- Created: 2025-12-16
-- ============================================================================
-- Daily measurement summary (aggregated from udcmeasurements)
CREATE TABLE IF NOT EXISTS udcmeasurements_daily (
summaryid INT AUTO_INCREMENT PRIMARY KEY,
summarydate DATE NOT NULL,
machinenumber VARCHAR(20) NOT NULL,
method VARCHAR(20),
eventtype VARCHAR(20),
measurement_count INT DEFAULT 0,
oot_count INT DEFAULT 0,
avg_deviation DECIMAL(12,6),
min_deviation DECIMAL(12,6),
max_deviation DECIMAL(12,6),
avg_actualval DECIMAL(12,6),
dateadded DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX uk_daily_machine_method (summarydate, machinenumber, method, eventtype),
INDEX idx_summarydate (summarydate),
INDEX idx_machinenumber (machinenumber)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Daily event summary (aggregated from udcevents)
CREATE TABLE IF NOT EXISTS udcevents_daily (
summaryid INT AUTO_INCREMENT PRIMARY KEY,
summarydate DATE NOT NULL,
machinenumber VARCHAR(20) NOT NULL,
eventtype VARCHAR(30),
event_count INT DEFAULT 0,
dateadded DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX uk_daily_machine_event (summarydate, machinenumber, eventtype),
INDEX idx_summarydate (summarydate),
INDEX idx_machinenumber (machinenumber)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- Daily tool data summary (aggregated from udctooldata)
CREATE TABLE IF NOT EXISTS udctooldata_daily (
summaryid INT AUTO_INCREMENT PRIMARY KEY,
summarydate DATE NOT NULL,
machinenumber VARCHAR(20) NOT NULL,
toolnumber INT,
measurement_count INT DEFAULT 0,
oot_count INT DEFAULT 0,
avg_deviation DECIMAL(12,6),
max_deviation DECIMAL(12,6),
dateadded DATETIME DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX uk_daily_machine_tool (summarydate, machinenumber, toolnumber),
INDEX idx_summarydate (summarydate),
INDEX idx_machinenumber (machinenumber),
INDEX idx_toolnumber (toolnumber)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- ============================================================================
-- Stored Procedure: Summarize a specific date's data
-- ============================================================================
DELIMITER //
DROP PROCEDURE IF EXISTS sp_udc_summarize_date //
CREATE PROCEDURE sp_udc_summarize_date(IN p_date DATE)
BEGIN
-- Summarize measurements for the date
INSERT INTO udcmeasurements_daily
(summarydate, machinenumber, method, eventtype, measurement_count, oot_count,
avg_deviation, min_deviation, max_deviation, avg_actualval)
SELECT
DATE(m.eventtime) as summarydate,
s.machinenumber,
m.method,
m.eventtype,
COUNT(*) as measurement_count,
SUM(m.oot) as oot_count,
ROUND(AVG(m.deviation), 6) as avg_deviation,
ROUND(MIN(m.deviation), 6) as min_deviation,
ROUND(MAX(m.deviation), 6) as max_deviation,
ROUND(AVG(m.actualval), 6) as avg_actualval
FROM udcmeasurements m
JOIN udcsessions s ON m.sessionid = s.sessionid
WHERE DATE(m.eventtime) = p_date
GROUP BY DATE(m.eventtime), s.machinenumber, m.method, m.eventtype
ON DUPLICATE KEY UPDATE
measurement_count = VALUES(measurement_count),
oot_count = VALUES(oot_count),
avg_deviation = VALUES(avg_deviation),
min_deviation = VALUES(min_deviation),
max_deviation = VALUES(max_deviation),
avg_actualval = VALUES(avg_actualval),
dateadded = CURRENT_TIMESTAMP;
-- Summarize events for the date
INSERT INTO udcevents_daily
(summarydate, machinenumber, eventtype, event_count)
SELECT
DATE(e.eventtime) as summarydate,
s.machinenumber,
e.eventtype,
COUNT(*) as event_count
FROM udcevents e
JOIN udcsessions s ON e.sessionid = s.sessionid
WHERE DATE(e.eventtime) = p_date
GROUP BY DATE(e.eventtime), s.machinenumber, e.eventtype
ON DUPLICATE KEY UPDATE
event_count = VALUES(event_count),
dateadded = CURRENT_TIMESTAMP;
-- Summarize tool data for the date
INSERT INTO udctooldata_daily
(summarydate, machinenumber, toolnumber, measurement_count, oot_count, avg_deviation, max_deviation)
SELECT
DATE(t.eventtime) as summarydate,
s.machinenumber,
t.toolnumber,
COUNT(*) as measurement_count,
SUM(t.oot) as oot_count,
ROUND(AVG(t.deviation), 6) as avg_deviation,
ROUND(MAX(ABS(t.deviation)), 6) as max_deviation
FROM udctooldata t
JOIN udcsessions s ON t.sessionid = s.sessionid
WHERE DATE(t.eventtime) = p_date
GROUP BY DATE(t.eventtime), s.machinenumber, t.toolnumber
ON DUPLICATE KEY UPDATE
measurement_count = VALUES(measurement_count),
oot_count = VALUES(oot_count),
avg_deviation = VALUES(avg_deviation),
max_deviation = VALUES(max_deviation),
dateadded = CURRENT_TIMESTAMP;
END //
DELIMITER ;
-- ============================================================================
-- Stored Procedure: Purge raw data older than retention period
-- ============================================================================
DELIMITER //
DROP PROCEDURE IF EXISTS sp_udc_purge_old_data //
CREATE PROCEDURE sp_udc_purge_old_data(IN p_retention_days INT)
BEGIN
DECLARE v_cutoff_date DATE;
DECLARE v_measurements_deleted INT DEFAULT 0;
DECLARE v_events_deleted INT DEFAULT 0;
DECLARE v_tooldata_deleted INT DEFAULT 0;
SET v_cutoff_date = DATE_SUB(CURDATE(), INTERVAL p_retention_days DAY);
-- Delete old measurements
DELETE FROM udcmeasurements WHERE DATE(eventtime) < v_cutoff_date;
SET v_measurements_deleted = ROW_COUNT();
-- Delete old events
DELETE FROM udcevents WHERE DATE(eventtime) < v_cutoff_date;
SET v_events_deleted = ROW_COUNT();
-- Delete old tool data
DELETE FROM udctooldata WHERE DATE(eventtime) < v_cutoff_date;
SET v_tooldata_deleted = ROW_COUNT();
-- Return counts
SELECT v_measurements_deleted AS measurements_deleted,
v_events_deleted AS events_deleted,
v_tooldata_deleted AS tooldata_deleted,
v_cutoff_date AS cutoff_date;
END //
DELIMITER ;
-- ============================================================================
-- Stored Procedure: Daily maintenance (summarize yesterday, purge old)
-- Run this daily via scheduled task or cron
-- ============================================================================
DELIMITER //
DROP PROCEDURE IF EXISTS sp_udc_daily_maintenance //
CREATE PROCEDURE sp_udc_daily_maintenance(IN p_retention_days INT)
BEGIN
DECLARE v_yesterday DATE;
SET v_yesterday = DATE_SUB(CURDATE(), INTERVAL 1 DAY);
-- Summarize yesterday's data
CALL sp_udc_summarize_date(v_yesterday);
-- Purge old data
CALL sp_udc_purge_old_data(p_retention_days);
END //
DELIMITER ;
-- ============================================================================
-- Stored Procedure: Backfill summaries for historical data
-- Run once to summarize all existing data before enabling purge
-- ============================================================================
DELIMITER //
DROP PROCEDURE IF EXISTS sp_udc_backfill_summaries //
CREATE PROCEDURE sp_udc_backfill_summaries()
BEGIN
DECLARE v_min_date DATE;
DECLARE v_max_date DATE;
DECLARE v_current_date DATE;
DECLARE v_count INT DEFAULT 0;
-- Get date range from measurements
SELECT MIN(DATE(eventtime)), MAX(DATE(eventtime))
INTO v_min_date, v_max_date
FROM udcmeasurements;
IF v_min_date IS NOT NULL THEN
SET v_current_date = v_min_date;
WHILE v_current_date <= v_max_date DO
CALL sp_udc_summarize_date(v_current_date);
SET v_current_date = DATE_ADD(v_current_date, INTERVAL 1 DAY);
SET v_count = v_count + 1;
END WHILE;
END IF;
SELECT v_count AS days_summarized, v_min_date AS start_date, v_max_date AS end_date;
END //
DELIMITER ;
-- ============================================================================
-- Views for summary data
-- ============================================================================
-- Weekly measurement trends from daily summaries
CREATE OR REPLACE VIEW vwudcmeasurements_weekly AS
SELECT
YEARWEEK(summarydate, 1) as yearweek,
MIN(summarydate) as week_start,
machinenumber,
method,
SUM(measurement_count) as measurement_count,
SUM(oot_count) as oot_count,
ROUND(SUM(oot_count) * 100.0 / SUM(measurement_count), 2) as oot_rate,
ROUND(AVG(avg_deviation), 6) as avg_deviation
FROM udcmeasurements_daily
GROUP BY YEARWEEK(summarydate, 1), machinenumber, method;
-- Monthly measurement trends from daily summaries
CREATE OR REPLACE VIEW vwudcmeasurements_monthly AS
SELECT
DATE_FORMAT(summarydate, '%Y-%m') as yearmonth,
machinenumber,
method,
SUM(measurement_count) as measurement_count,
SUM(oot_count) as oot_count,
ROUND(SUM(oot_count) * 100.0 / SUM(measurement_count), 2) as oot_rate,
ROUND(AVG(avg_deviation), 6) as avg_deviation
FROM udcmeasurements_daily
GROUP BY DATE_FORMAT(summarydate, '%Y-%m'), machinenumber, method;
-- Weekly event trends from daily summaries
CREATE OR REPLACE VIEW vwudcevents_weekly AS
SELECT
YEARWEEK(summarydate, 1) as yearweek,
MIN(summarydate) as week_start,
machinenumber,
eventtype,
SUM(event_count) as event_count
FROM udcevents_daily
GROUP BY YEARWEEK(summarydate, 1), machinenumber, eventtype;
-- ============================================================================
-- Usage Instructions
-- ============================================================================
--
-- 1. FIRST TIME SETUP (run once):
-- CALL sp_udc_backfill_summaries();
-- This summarizes all existing data into daily tables.
--
-- 2. DAILY MAINTENANCE (schedule to run daily):
-- CALL sp_udc_daily_maintenance(90);
-- This summarizes yesterday and purges data older than 90 days.
--
-- 3. MANUAL SUMMARIZE (for a specific date):
-- CALL sp_udc_summarize_date('2025-12-15');
--
-- 4. MANUAL PURGE (with custom retention):
-- CALL sp_udc_purge_old_data(30); -- Keep only 30 days
--
-- ============================================================================
SELECT 'UDC retention tables and procedures created successfully' AS status;