- Add UDC_LOG_PATH configuration for Windows/Linux - Implement MySQL connection pooling for better performance - Fix regex patterns to handle spaces after // delimiters in log files - Update parser to support pooled connections 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
979 lines
38 KiB
Python
979 lines
38 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
UDC Log Parser
|
|
Parses UDC (Universal Data Collector) log files and imports data into ShopDB.
|
|
|
|
Usage:
|
|
python udcparser.py [--dir /path/to/logs] [--file specific_file.log]
|
|
"""
|
|
|
|
import re
|
|
import os
|
|
import sys
|
|
import glob
|
|
import argparse
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, List, Any
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
import mysql.connector
|
|
from mysql.connector import Error
|
|
from mysql.connector.pooling import MySQLConnectionPool
|
|
|
|
# Import config
|
|
try:
|
|
from config import DB_CONFIG, UDC_LOG_PATH as LOG_DIRECTORY, BATCH_SIZE
|
|
except ImportError:
|
|
import platform
|
|
IS_WINDOWS = platform.system() == 'Windows'
|
|
DB_CONFIG = {
|
|
'host': '127.0.0.1',
|
|
'port': 3306,
|
|
'user': 'root',
|
|
'password': 'rootpassword',
|
|
'database': 'shopdb'
|
|
}
|
|
LOG_DIRECTORY = r'S:\SPC\UDC\LogFiles' if IS_WINDOWS else '/home/camp/projects/UDC/LogFiles'
|
|
BATCH_SIZE = 1000
|
|
|
|
# Global connection pool (initialized in main or parse_directory)
|
|
_connection_pool = None
|
|
|
|
|
|
def init_connection_pool(db_config: dict, pool_size: int = 5) -> MySQLConnectionPool:
|
|
"""Initialize the global connection pool"""
|
|
global _connection_pool
|
|
if _connection_pool is None:
|
|
_connection_pool = MySQLConnectionPool(
|
|
pool_name="udc_pool",
|
|
pool_size=pool_size,
|
|
pool_reset_session=True,
|
|
**db_config
|
|
)
|
|
print(f"Connection pool created (size={pool_size})")
|
|
return _connection_pool
|
|
|
|
|
|
def get_connection_from_pool():
|
|
"""Get a connection from the pool"""
|
|
global _connection_pool
|
|
if _connection_pool is None:
|
|
raise RuntimeError("Connection pool not initialized. Call init_connection_pool() first.")
|
|
return _connection_pool.get_connection()
|
|
|
|
|
|
# =============================================================================
|
|
# Regex Patterns
|
|
# =============================================================================
|
|
|
|
# Timestamp pattern: MM/DD/YYYY HH:MM:SS
|
|
RE_TIMESTAMP = re.compile(r'(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})')
|
|
|
|
# Log start/end
|
|
RE_START_LOG = re.compile(r'^Start Log : (\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})')
|
|
RE_END_LOG = re.compile(r'^End Log : (\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})')
|
|
|
|
# Headers loaded
|
|
RE_HEADERS = re.compile(r'Headers loaded \(Path 1\): Part=(\S*) Oper=(\S*) Serial=(\S*) ProgName=(\S*) Job=(\S*)')
|
|
|
|
# Badge number
|
|
RE_BADGE = re.compile(r'BadgeNumber = (\S+)')
|
|
|
|
# Program start/end
|
|
RE_START_PROGRAM = re.compile(r': Start of Program')
|
|
RE_END_PROGRAM = re.compile(r': End of Program')
|
|
|
|
# Data entry with timestamp
|
|
RE_DATA_ENTRY = re.compile(r'^DATA ENTRY : (\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2})')
|
|
|
|
# Process/Tool/Machine data
|
|
# Note: Log format has spaces after // like "// DIMID//" so we need \s* after each //
|
|
RE_PROCESSDATA = re.compile(
|
|
r'\+\+\.PROCESSDATA//\s*METHOD//\s*(\w+)//\s*DIMID//\s*(\w+)//\s*DESCRIPTION//\s*(.+?)//\s*SEQ//\s*(\d+)//\s*MIN//\s*(.+?)//\s*MAX//\s*(.+?)//\s*ACTUAL//\s*(.+?)//\s*DEVIATION//\s*(.+?)//\s*OOT//\s*(\d)//\s*--'
|
|
)
|
|
RE_TOOLDATA = re.compile(
|
|
r'\+\+\.TOOLDATA//\s*METHOD//\s*(\w+)//\s*DIMID//\s*(\w+)//\s*DESCRIPTION//\s*(.+?)//\s*SEQ//\s*(\d+)//\s*MIN//\s*(.+?)//\s*MAX//\s*(.+?)//\s*ACTUAL//\s*(.+?)//\s*DEVIATION//\s*(.+?)//\s*OOT//\s*(\d)//\s*--'
|
|
)
|
|
RE_MACHINEDATA = re.compile(
|
|
r'\+\+\.MACHINEDATA//\s*METHOD//\s*(\w+)//\s*DIMID//\s*(\w+)//.*?MIN//\s*(.+?)//\s*MAX//\s*(.+?)//\s*ACTUAL//\s*(.+?)//\s*DEVIATION//\s*(.+?)//\s*OOT//\s*(\d)//\s*--'
|
|
)
|
|
|
|
# Item crossing
|
|
RE_ITEM_CROSSING = re.compile(r'\+\+\.ITEM-CROSSING//\s*ITEMNO//\s*(.+?)//\s*DESCRIPTION//\s*(.+?)//\s*--')
|
|
|
|
# Manual request
|
|
RE_MANUAL_REQUEST = re.compile(r'\+\+\.MANUALREQUEST\d?//\s*DESCRIPTION//\s*(.+?)//\s*--')
|
|
|
|
# Message
|
|
RE_MESSAGE = re.compile(r'\+\+\.MESSAGE//')
|
|
|
|
# Start/End markers in data
|
|
RE_START = re.compile(r'\+\+\.START//\s*PROGNAME//\s*(.+?)//\s*SNUMBNO//\s*(.+?)//\s*--')
|
|
RE_END = re.compile(r'\+\+\.END//\s*--')
|
|
|
|
# Error patterns
|
|
RE_ERROR_MSG = re.compile(r'^ERROR\s*:\s*(?:Message:\s*)?(.+)', re.IGNORECASE)
|
|
RE_EXCEPTION = re.compile(r':\s*!Exception\s*:\s*(.+)')
|
|
RE_CANNOT = re.compile(r':\s*((?:cannot|could not|failed to|unable to).+)', re.IGNORECASE)
|
|
|
|
# Connection patterns
|
|
RE_SERIAL_OPEN = re.compile(r':\s*Serial Connection (\d+) Open')
|
|
RE_SERIAL_ACCEPT = re.compile(r':\s*Accepting RS232 Strings on (COM\d+)')
|
|
RE_SERIAL_CLOSE = re.compile(r':\s*Serial Connection (\d+) Close')
|
|
RE_COM_RAW = re.compile(r':\s*\[(COM\d+)\] RAW LINE:')
|
|
|
|
|
|
def parse_timestamp(ts_str: str) -> Optional[datetime]:
|
|
"""Parse MM/DD/YYYY HH:MM:SS to datetime"""
|
|
try:
|
|
return datetime.strptime(ts_str, '%m/%d/%Y %H:%M:%S')
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def parse_numeric(val_str: str) -> Optional[float]:
|
|
"""Parse numeric value, handling spaces and leading minus signs"""
|
|
if not val_str:
|
|
return None
|
|
try:
|
|
cleaned = val_str.strip().replace(' ', '')
|
|
if cleaned == '' or cleaned == '-':
|
|
return None
|
|
return float(cleaned)
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def extract_machine_number(filename: str) -> Optional[str]:
|
|
"""Extract machine number from filename like UDC_Log_3110.log"""
|
|
match = re.search(r'UDC_Log_(\d+)', filename)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
class UDCParser:
|
|
"""Parser for UDC log files"""
|
|
|
|
def __init__(self, db_config: dict = None, use_pool: bool = False):
|
|
self.db_config = db_config
|
|
self.use_pool = use_pool
|
|
self.conn = None
|
|
self.cursor = None
|
|
self._owns_connection = True # Track if we should close the connection
|
|
|
|
# Current state
|
|
self.current_session_id = None
|
|
self.current_partrun_id = None
|
|
self.current_timestamp = None
|
|
self.current_badge = None
|
|
self.current_headers = {}
|
|
self.last_program_end = None
|
|
self.pending_manual_request = None
|
|
|
|
# Counters for current part run
|
|
self.measurement_count = 0
|
|
self.manual_count = 0
|
|
self.probe_count = 0
|
|
self.oot_count = 0
|
|
|
|
# Batch storage
|
|
self.measurements_batch = []
|
|
self.tooldata_batch = []
|
|
self.events_batch = []
|
|
self.errors_batch = []
|
|
self.connections_batch = []
|
|
|
|
# Track current machine number for errors/connections outside sessions
|
|
self.current_machine_number = None
|
|
|
|
def connect(self):
|
|
"""Connect to MySQL database (or get connection from pool)"""
|
|
try:
|
|
if self.use_pool:
|
|
self.conn = get_connection_from_pool()
|
|
self._owns_connection = True
|
|
else:
|
|
self.conn = mysql.connector.connect(**self.db_config)
|
|
print(f"Connected to MySQL at {self.db_config['host']}")
|
|
self.cursor = self.conn.cursor(dictionary=True)
|
|
return True
|
|
except Error as e:
|
|
print(f"Error connecting to MySQL: {e}")
|
|
return False
|
|
|
|
def disconnect(self):
|
|
"""Disconnect from database (returns connection to pool if pooled)"""
|
|
if self.cursor:
|
|
self.cursor.close()
|
|
self.cursor = None
|
|
if self.conn and self._owns_connection:
|
|
self.conn.close() # For pooled connections, this returns to pool
|
|
self.conn = None
|
|
|
|
def is_file_imported(self, filename: str) -> bool:
|
|
"""Check if a log file has already been imported (DEPRECATED - use get_file_state instead)"""
|
|
basename = os.path.basename(filename)
|
|
self.cursor.execute(
|
|
"SELECT sessionid FROM udcsessions WHERE logfilename = %s",
|
|
(basename,)
|
|
)
|
|
return self.cursor.fetchone() is not None
|
|
|
|
def get_file_state(self, filepath: str) -> Dict[str, Any]:
|
|
"""Get the last known state of a log file (offset, size)"""
|
|
self.cursor.execute(
|
|
"SELECT fileid, lastoffset, filesize FROM udcfilestate WHERE filepath = %s",
|
|
(filepath,)
|
|
)
|
|
row = self.cursor.fetchone()
|
|
if row:
|
|
return {'fileid': row['fileid'], 'offset': row['lastoffset'], 'size': row['filesize']}
|
|
return {'fileid': None, 'offset': 0, 'size': 0}
|
|
|
|
def update_file_state(self, filepath: str, machine_number: str, offset: int, filesize: int):
|
|
"""Update the file state after parsing"""
|
|
self.cursor.execute(
|
|
"""INSERT INTO udcfilestate (filepath, machinenumber, lastoffset, lastparsed, filesize)
|
|
VALUES (%s, %s, %s, NOW(), %s)
|
|
ON DUPLICATE KEY UPDATE lastoffset = %s, lastparsed = NOW(), filesize = %s""",
|
|
(filepath, machine_number, offset, filesize, offset, filesize)
|
|
)
|
|
self.conn.commit()
|
|
|
|
def create_session(self, filename: str, machine_number: str, start_time: datetime) -> int:
|
|
"""Create a new session record"""
|
|
basename = os.path.basename(filename)
|
|
self.cursor.execute(
|
|
"""INSERT INTO udcsessions (machinenumber, logfilename, sessionstart, dateadded)
|
|
VALUES (%s, %s, %s, NOW())""",
|
|
(machine_number, basename, start_time)
|
|
)
|
|
self.conn.commit()
|
|
return self.cursor.lastrowid
|
|
|
|
def update_session_end(self, session_id: int, end_time: datetime, record_count: int):
|
|
"""Update session with end time and record count"""
|
|
self.cursor.execute(
|
|
"""UPDATE udcsessions SET sessionend = %s, recordcount = %s WHERE sessionid = %s""",
|
|
(end_time, record_count, session_id)
|
|
)
|
|
self.conn.commit()
|
|
|
|
def create_part_run(self) -> int:
|
|
"""Create a new part run record"""
|
|
changeover = None
|
|
if self.last_program_end and self.current_timestamp:
|
|
changeover = int((self.current_timestamp - self.last_program_end).total_seconds())
|
|
|
|
self.cursor.execute(
|
|
"""INSERT INTO udcparts
|
|
(sessionid, partnumber, opernumber, serialnumber, programname, jobnumber,
|
|
badgenumber, programstart, changeover, dateadded)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, NOW())""",
|
|
(
|
|
self.current_session_id,
|
|
self.current_headers.get('part'),
|
|
self.current_headers.get('oper'),
|
|
self.current_headers.get('serial'),
|
|
self.current_headers.get('progname'),
|
|
self.current_headers.get('job'),
|
|
self.current_badge,
|
|
self.current_timestamp,
|
|
changeover
|
|
)
|
|
)
|
|
self.conn.commit()
|
|
|
|
# Reset counters
|
|
self.measurement_count = 0
|
|
self.manual_count = 0
|
|
self.probe_count = 0
|
|
self.oot_count = 0
|
|
|
|
return self.cursor.lastrowid
|
|
|
|
def end_part_run(self):
|
|
"""Finalize the current part run"""
|
|
if not self.current_partrun_id:
|
|
return
|
|
|
|
# Calculate cycle time
|
|
cycletime = None
|
|
if self.current_timestamp:
|
|
self.cursor.execute(
|
|
"SELECT programstart FROM udcparts WHERE partrunid = %s",
|
|
(self.current_partrun_id,)
|
|
)
|
|
row = self.cursor.fetchone()
|
|
if row and row['programstart']:
|
|
cycletime = int((self.current_timestamp - row['programstart']).total_seconds())
|
|
|
|
# Update part run with end time and counts
|
|
self.cursor.execute(
|
|
"""UPDATE udcparts SET
|
|
programend = %s, cycletime = %s,
|
|
measurementcount = %s, manualcount = %s, probecount = %s, ootcount = %s
|
|
WHERE partrunid = %s""",
|
|
(
|
|
self.current_timestamp, cycletime,
|
|
self.measurement_count, self.manual_count, self.probe_count, self.oot_count,
|
|
self.current_partrun_id
|
|
)
|
|
)
|
|
self.conn.commit()
|
|
|
|
self.last_program_end = self.current_timestamp
|
|
self.current_partrun_id = None
|
|
|
|
def add_measurement(self, event_type: str, method: str, dimid: str, description: str,
|
|
seq: int, minval: float, maxval: float, actualval: float,
|
|
deviation: float, oot: int):
|
|
"""Add a measurement record"""
|
|
self.measurements_batch.append((
|
|
self.current_partrun_id,
|
|
self.current_session_id,
|
|
self.current_timestamp,
|
|
event_type,
|
|
method,
|
|
dimid,
|
|
description[:255] if description else None,
|
|
seq,
|
|
minval,
|
|
maxval,
|
|
actualval,
|
|
deviation,
|
|
oot
|
|
))
|
|
|
|
# Update counters
|
|
self.measurement_count += 1
|
|
if method and method.upper() == 'MANUAL':
|
|
self.manual_count += 1
|
|
elif method and method.upper() == 'PROBE':
|
|
self.probe_count += 1
|
|
if oot:
|
|
self.oot_count += 1
|
|
|
|
# Check for pending manual request
|
|
if self.pending_manual_request and method and method.upper() == 'MANUAL':
|
|
self.complete_manual_request()
|
|
|
|
# Flush batch if needed
|
|
if len(self.measurements_batch) >= BATCH_SIZE:
|
|
self.flush_measurements()
|
|
|
|
def add_tooldata(self, method: str, dimid: str, description: str,
|
|
toolnumber: int, minval: float, maxval: float, actualval: float,
|
|
deviation: float, oot: int):
|
|
"""Add a tool data record"""
|
|
self.tooldata_batch.append((
|
|
self.current_partrun_id,
|
|
self.current_session_id,
|
|
self.current_timestamp,
|
|
method,
|
|
dimid,
|
|
description[:255] if description else None,
|
|
toolnumber,
|
|
minval,
|
|
maxval,
|
|
actualval,
|
|
deviation,
|
|
oot
|
|
))
|
|
|
|
# Flush batch if needed
|
|
if len(self.tooldata_batch) >= BATCH_SIZE:
|
|
self.flush_tooldata()
|
|
|
|
def add_event(self, event_type: str, item_number: str = None, description: str = None):
|
|
"""Add an event record"""
|
|
self.events_batch.append((
|
|
self.current_partrun_id,
|
|
self.current_session_id,
|
|
self.current_timestamp,
|
|
event_type,
|
|
item_number,
|
|
description
|
|
))
|
|
|
|
if len(self.events_batch) >= BATCH_SIZE:
|
|
self.flush_events()
|
|
|
|
def start_manual_request(self, description: str):
|
|
"""Record start of a manual request"""
|
|
self.pending_manual_request = {
|
|
'time': self.current_timestamp,
|
|
'description': description,
|
|
'partrunid': self.current_partrun_id
|
|
}
|
|
|
|
def complete_manual_request(self):
|
|
"""Complete a manual request with the response measurement"""
|
|
if not self.pending_manual_request:
|
|
return
|
|
|
|
response_seconds = None
|
|
if self.pending_manual_request['time'] and self.current_timestamp:
|
|
response_seconds = int((self.current_timestamp - self.pending_manual_request['time']).total_seconds())
|
|
|
|
self.cursor.execute(
|
|
"""INSERT INTO udcmanualrequests
|
|
(partrunid, requesttime, responsetime, responseseconds, description)
|
|
VALUES (%s, %s, %s, %s, %s)""",
|
|
(
|
|
self.pending_manual_request['partrunid'],
|
|
self.pending_manual_request['time'],
|
|
self.current_timestamp,
|
|
response_seconds,
|
|
self.pending_manual_request['description'][:255] if self.pending_manual_request['description'] else None
|
|
)
|
|
)
|
|
|
|
self.pending_manual_request = None
|
|
|
|
def flush_measurements(self):
|
|
"""Flush measurement batch to database"""
|
|
if not self.measurements_batch:
|
|
return
|
|
|
|
self.cursor.executemany(
|
|
"""INSERT INTO udcmeasurements
|
|
(partrunid, sessionid, eventtime, eventtype, method, dimid, description,
|
|
seqnumber, minval, maxval, actualval, deviation, oot)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
|
|
self.measurements_batch
|
|
)
|
|
self.conn.commit()
|
|
self.measurements_batch = []
|
|
|
|
def flush_tooldata(self):
|
|
"""Flush tool data batch to database"""
|
|
if not self.tooldata_batch:
|
|
return
|
|
|
|
self.cursor.executemany(
|
|
"""INSERT INTO udctooldata
|
|
(partrunid, sessionid, eventtime, method, dimid, description,
|
|
toolnumber, minval, maxval, actualval, deviation, oot)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
|
|
self.tooldata_batch
|
|
)
|
|
self.conn.commit()
|
|
self.tooldata_batch = []
|
|
|
|
def flush_events(self):
|
|
"""Flush event batch to database"""
|
|
if not self.events_batch:
|
|
return
|
|
|
|
self.cursor.executemany(
|
|
"""INSERT INTO udcevents
|
|
(partrunid, sessionid, eventtime, eventtype, itemnumber, description)
|
|
VALUES (%s, %s, %s, %s, %s, %s)""",
|
|
self.events_batch
|
|
)
|
|
self.conn.commit()
|
|
self.events_batch = []
|
|
|
|
def add_error(self, error_type: str, error_message: str, source_method: str = None):
|
|
"""Add an error record"""
|
|
self.errors_batch.append((
|
|
self.current_session_id,
|
|
self.current_machine_number,
|
|
self.current_timestamp,
|
|
error_type[:50] if error_type else 'UNKNOWN',
|
|
error_message[:500] if error_message else None,
|
|
source_method[:100] if source_method else None
|
|
))
|
|
|
|
if len(self.errors_batch) >= BATCH_SIZE:
|
|
self.flush_errors()
|
|
|
|
def add_connection(self, event_type: str, comport: str = None, details: str = None):
|
|
"""Add a connection event record"""
|
|
self.connections_batch.append((
|
|
self.current_session_id,
|
|
self.current_machine_number,
|
|
self.current_timestamp,
|
|
event_type[:20] if event_type else 'UNKNOWN',
|
|
comport[:10] if comport else None,
|
|
details[:255] if details else None
|
|
))
|
|
|
|
if len(self.connections_batch) >= BATCH_SIZE:
|
|
self.flush_connections()
|
|
|
|
def flush_errors(self):
|
|
"""Flush error batch to database"""
|
|
if not self.errors_batch:
|
|
return
|
|
|
|
self.cursor.executemany(
|
|
"""INSERT INTO udcerrors
|
|
(sessionid, machinenumber, eventtime, errortype, errormessage, sourcemethod)
|
|
VALUES (%s, %s, %s, %s, %s, %s)""",
|
|
self.errors_batch
|
|
)
|
|
self.conn.commit()
|
|
self.errors_batch = []
|
|
|
|
def flush_connections(self):
|
|
"""Flush connection batch to database"""
|
|
if not self.connections_batch:
|
|
return
|
|
|
|
self.cursor.executemany(
|
|
"""INSERT INTO udcconnections
|
|
(sessionid, machinenumber, eventtime, eventtype, comport, details)
|
|
VALUES (%s, %s, %s, %s, %s, %s)""",
|
|
self.connections_batch
|
|
)
|
|
self.conn.commit()
|
|
self.connections_batch = []
|
|
|
|
def parse_file(self, filepath: str, force_full: bool = False) -> Dict[str, Any]:
|
|
"""Parse a single UDC log file incrementally from last known position"""
|
|
filename = os.path.basename(filepath)
|
|
machine_number = extract_machine_number(filename)
|
|
|
|
if not machine_number:
|
|
return {'success': False, 'error': f'Could not extract machine number from {filename}'}
|
|
|
|
# Get current file size
|
|
try:
|
|
current_size = os.path.getsize(filepath)
|
|
except OSError as e:
|
|
return {'success': False, 'error': f'Cannot access file: {e}'}
|
|
|
|
# Get last known state
|
|
file_state = self.get_file_state(filepath)
|
|
start_offset = 0 if force_full else file_state['offset']
|
|
|
|
# Check if file has new content
|
|
if not force_full and current_size <= file_state['size'] and file_state['offset'] > 0:
|
|
return {'success': False, 'error': f'No new content in {filename}', 'skipped': True}
|
|
|
|
# If file is smaller than last offset, it was likely rotated/truncated - start fresh
|
|
if current_size < start_offset:
|
|
print(f" File {filename} appears truncated, parsing from beginning...")
|
|
start_offset = 0
|
|
|
|
if start_offset > 0:
|
|
print(f"Parsing {filename} (machine {machine_number}) from offset {start_offset}...")
|
|
else:
|
|
print(f"Parsing {filename} (machine {machine_number})...")
|
|
|
|
# Reset state
|
|
self.current_session_id = None
|
|
self.current_partrun_id = None
|
|
self.current_timestamp = None
|
|
self.current_badge = None
|
|
self.current_headers = {}
|
|
self.last_program_end = None
|
|
self.pending_manual_request = None
|
|
self.measurements_batch = []
|
|
self.tooldata_batch = []
|
|
self.events_batch = []
|
|
self.errors_batch = []
|
|
self.connections_batch = []
|
|
self.current_machine_number = machine_number
|
|
|
|
record_count = 0
|
|
session_start = None
|
|
session_end = None
|
|
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
# Seek to last known position
|
|
if start_offset > 0:
|
|
f.seek(start_offset)
|
|
# If we're in the middle of a line, skip to next complete line
|
|
if start_offset > 0:
|
|
f.readline() # Skip partial line
|
|
|
|
for line in f:
|
|
line = line.rstrip()
|
|
|
|
# Skip empty lines
|
|
if not line:
|
|
continue
|
|
|
|
# Check for log start
|
|
match = RE_START_LOG.search(line)
|
|
if match:
|
|
ts = parse_timestamp(match.group(1))
|
|
if ts and not session_start:
|
|
session_start = ts
|
|
self.current_timestamp = ts
|
|
self.current_session_id = self.create_session(filepath, machine_number, ts)
|
|
continue
|
|
|
|
# Check for log end
|
|
match = RE_END_LOG.search(line)
|
|
if match:
|
|
ts = parse_timestamp(match.group(1))
|
|
if ts:
|
|
session_end = ts
|
|
self.current_timestamp = ts
|
|
continue
|
|
|
|
# Check for DATA ENTRY timestamp
|
|
match = RE_DATA_ENTRY.search(line)
|
|
if match:
|
|
ts = parse_timestamp(match.group(1))
|
|
if ts:
|
|
self.current_timestamp = ts
|
|
continue
|
|
|
|
# Check for any other timestamp in line
|
|
match = RE_TIMESTAMP.search(line)
|
|
if match and ': ' in line:
|
|
ts = parse_timestamp(match.group(1))
|
|
if ts:
|
|
self.current_timestamp = ts
|
|
|
|
# Check for headers
|
|
match = RE_HEADERS.search(line)
|
|
if match:
|
|
self.current_headers = {
|
|
'part': match.group(1) if match.group(1) else None,
|
|
'oper': match.group(2) if match.group(2) else None,
|
|
'serial': match.group(3) if match.group(3) else None,
|
|
'progname': match.group(4) if match.group(4) else None,
|
|
'job': match.group(5) if match.group(5) else None
|
|
}
|
|
continue
|
|
|
|
# Check for badge
|
|
match = RE_BADGE.search(line)
|
|
if match:
|
|
self.current_badge = match.group(1)
|
|
continue
|
|
|
|
# Check for program start
|
|
if RE_START_PROGRAM.search(line) or RE_START.search(line):
|
|
if self.current_partrun_id:
|
|
self.end_part_run()
|
|
self.current_partrun_id = self.create_part_run()
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for program end
|
|
if RE_END_PROGRAM.search(line) or RE_END.search(line):
|
|
self.end_part_run()
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for PROCESSDATA
|
|
match = RE_PROCESSDATA.search(line)
|
|
if match:
|
|
self.add_measurement(
|
|
'PROCESSDATA',
|
|
match.group(1), # method
|
|
match.group(2), # dimid
|
|
match.group(3), # description
|
|
int(match.group(4)) if match.group(4) else 0, # seq
|
|
parse_numeric(match.group(5)), # min
|
|
parse_numeric(match.group(6)), # max
|
|
parse_numeric(match.group(7)), # actual
|
|
parse_numeric(match.group(8)), # deviation
|
|
int(match.group(9)) if match.group(9) else 0 # oot
|
|
)
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for TOOLDATA
|
|
match = RE_TOOLDATA.search(line)
|
|
if match:
|
|
self.add_tooldata(
|
|
match.group(1), # method
|
|
match.group(2), # dimid
|
|
match.group(3), # description
|
|
int(match.group(4)) if match.group(4) else 0, # toolnumber (SEQ)
|
|
parse_numeric(match.group(5)), # min
|
|
parse_numeric(match.group(6)), # max
|
|
parse_numeric(match.group(7)), # actual
|
|
parse_numeric(match.group(8)), # deviation
|
|
int(match.group(9)) if match.group(9) else 0 # oot
|
|
)
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for MACHINEDATA (simplified pattern)
|
|
match = RE_MACHINEDATA.search(line)
|
|
if match:
|
|
self.add_measurement(
|
|
'MACHINEDATA',
|
|
match.group(1), # method
|
|
match.group(2), # dimid
|
|
None, # description
|
|
0, # seq
|
|
parse_numeric(match.group(3)),
|
|
parse_numeric(match.group(4)),
|
|
parse_numeric(match.group(5)),
|
|
parse_numeric(match.group(6)),
|
|
int(match.group(7)) if match.group(7) else 0
|
|
)
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for item crossing
|
|
match = RE_ITEM_CROSSING.search(line)
|
|
if match:
|
|
self.add_event('ITEM-CROSSING', match.group(1), match.group(2))
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for manual request
|
|
match = RE_MANUAL_REQUEST.search(line)
|
|
if match:
|
|
self.start_manual_request(match.group(1))
|
|
self.add_event('MANUALREQUEST', None, match.group(1))
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for message
|
|
if RE_MESSAGE.search(line):
|
|
self.add_event('MESSAGE', None, line[:500])
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for errors
|
|
match = RE_ERROR_MSG.search(line)
|
|
if match:
|
|
self.add_error('ERROR', match.group(1))
|
|
record_count += 1
|
|
continue
|
|
|
|
match = RE_EXCEPTION.search(line)
|
|
if match:
|
|
self.add_error('EXCEPTION', match.group(1))
|
|
record_count += 1
|
|
continue
|
|
|
|
match = RE_CANNOT.search(line)
|
|
if match:
|
|
self.add_error('FAILURE', match.group(1))
|
|
record_count += 1
|
|
continue
|
|
|
|
# Check for connection events
|
|
match = RE_SERIAL_OPEN.search(line)
|
|
if match:
|
|
self.add_connection('OPEN', f'Serial{match.group(1)}', 'Serial connection opened')
|
|
record_count += 1
|
|
continue
|
|
|
|
match = RE_SERIAL_ACCEPT.search(line)
|
|
if match:
|
|
self.add_connection('LISTENING', match.group(1), f'Accepting RS232 strings on {match.group(1)}')
|
|
record_count += 1
|
|
continue
|
|
|
|
match = RE_SERIAL_CLOSE.search(line)
|
|
if match:
|
|
self.add_connection('CLOSE', f'Serial{match.group(1)}', 'Serial connection closed')
|
|
record_count += 1
|
|
continue
|
|
|
|
# Flush remaining batches
|
|
self.flush_measurements()
|
|
self.flush_tooldata()
|
|
self.flush_events()
|
|
self.flush_errors()
|
|
self.flush_connections()
|
|
|
|
# Close any open part run
|
|
if self.current_partrun_id:
|
|
self.end_part_run()
|
|
|
|
# Update session end
|
|
if self.current_session_id:
|
|
self.update_session_end(self.current_session_id, session_end or self.current_timestamp, record_count)
|
|
|
|
# Update file state with current position
|
|
self.update_file_state(filepath, machine_number, current_size, current_size)
|
|
|
|
print(f" Parsed {record_count} records")
|
|
return {'success': True, 'records': record_count, 'session_id': self.current_session_id, 'new_content': True}
|
|
|
|
except Exception as e:
|
|
print(f" Error parsing file: {e}")
|
|
return {'success': False, 'error': str(e)}
|
|
|
|
def parse_directory(self, directory: str, max_workers: int = 4, force_full: bool = False) -> Dict[str, Any]:
|
|
"""Parse all UDC log files in a directory using parallel processing"""
|
|
results = {
|
|
'total_files': 0,
|
|
'imported': 0,
|
|
'skipped': 0,
|
|
'errors': 0,
|
|
'total_records': 0
|
|
}
|
|
|
|
# Find all log files
|
|
pattern = os.path.join(directory, 'UDC_Log_*.log')
|
|
files = glob.glob(pattern)
|
|
|
|
# Also check subdirectories
|
|
for subdir in ['3204_BAD', '7606_BAD', 'Dual Spindle']:
|
|
subpath = os.path.join(directory, subdir)
|
|
if os.path.isdir(subpath):
|
|
files.extend(glob.glob(os.path.join(subpath, '**', 'UDC_Log_*.log'), recursive=True))
|
|
|
|
results['total_files'] = len(files)
|
|
print(f"Found {len(files)} log files")
|
|
|
|
if max_workers <= 1:
|
|
# Sequential processing
|
|
for filepath in sorted(files):
|
|
result = self.parse_file(filepath, force_full=force_full)
|
|
|
|
if result['success']:
|
|
results['imported'] += 1
|
|
results['total_records'] += result.get('records', 0)
|
|
elif result.get('skipped') or 'no new content' in result.get('error', '').lower():
|
|
results['skipped'] += 1
|
|
elif 'already imported' in result.get('error', '').lower():
|
|
results['skipped'] += 1
|
|
else:
|
|
results['errors'] += 1
|
|
print(f" Error: {result.get('error')}")
|
|
else:
|
|
# Parallel processing with connection pool
|
|
# Pool size = workers + 1 buffer for safety
|
|
pool_size = min(max_workers + 1, 10)
|
|
init_connection_pool(self.db_config, pool_size=pool_size)
|
|
|
|
print(f"Using {max_workers} parallel workers with connection pool...")
|
|
completed = 0
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
# Submit all files for processing (use pool, not individual connections)
|
|
futures = {
|
|
executor.submit(parse_file_worker, filepath, None, True, 3, force_full): filepath
|
|
for filepath in sorted(files)
|
|
}
|
|
|
|
for future in as_completed(futures):
|
|
completed += 1
|
|
filepath = futures[future]
|
|
|
|
try:
|
|
result = future.result()
|
|
|
|
if result['success']:
|
|
results['imported'] += 1
|
|
results['total_records'] += result.get('records', 0)
|
|
elif result.get('skipped') or 'no new content' in result.get('error', '').lower():
|
|
results['skipped'] += 1
|
|
elif 'already imported' in result.get('error', '').lower():
|
|
results['skipped'] += 1
|
|
else:
|
|
results['errors'] += 1
|
|
print(f" Error in {os.path.basename(filepath)}: {result.get('error')}")
|
|
|
|
except Exception as e:
|
|
results['errors'] += 1
|
|
print(f" Worker error for {os.path.basename(filepath)}: {e}")
|
|
|
|
# Progress update
|
|
if completed % 10 == 0 or completed == len(files):
|
|
print(f" Progress: {completed}/{len(files)} files processed...", end='\r')
|
|
|
|
print() # New line after progress
|
|
|
|
return results
|
|
|
|
|
|
def parse_file_worker(filepath: str, db_config: dict = None, use_pool: bool = True, max_retries: int = 3, force_full: bool = False) -> Dict[str, Any]:
|
|
"""Worker function for parallel file parsing. Uses connection pool by default with retry logic."""
|
|
last_error = None
|
|
|
|
for attempt in range(max_retries):
|
|
parser = UDCParser(db_config=db_config, use_pool=use_pool)
|
|
try:
|
|
if not parser.connect():
|
|
last_error = 'Failed to connect to database'
|
|
continue
|
|
|
|
# Verify connection is alive before using
|
|
try:
|
|
parser.conn.ping(reconnect=True, attempts=2, delay=1)
|
|
except Exception:
|
|
parser.disconnect()
|
|
last_error = 'Connection ping failed'
|
|
continue
|
|
|
|
result = parser.parse_file(filepath, force_full=force_full)
|
|
return result
|
|
|
|
except Error as e:
|
|
last_error = str(e)
|
|
# Check if it's a connection error worth retrying
|
|
if e.errno in (2006, 2013, 2055): # Connection lost errors
|
|
parser.disconnect()
|
|
continue
|
|
else:
|
|
return {'success': False, 'error': str(e)}
|
|
finally:
|
|
parser.disconnect()
|
|
|
|
return {'success': False, 'error': f'Failed after {max_retries} attempts: {last_error}'}
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Parse UDC log files and import to database')
|
|
parser.add_argument('--dir', default=LOG_DIRECTORY, help='Directory containing log files')
|
|
parser.add_argument('--file', help='Parse a specific file instead of directory')
|
|
parser.add_argument('--force', action='store_true', help='Force full re-parse, ignoring previous position')
|
|
parser.add_argument('--workers', type=int, default=1, help='Number of parallel workers (default: 1 sequential, increase only if server supports it)')
|
|
parser.add_argument('--host', default=DB_CONFIG['host'], help='MySQL host')
|
|
parser.add_argument('--port', type=int, default=DB_CONFIG['port'], help='MySQL port')
|
|
parser.add_argument('--user', default=DB_CONFIG['user'], help='MySQL user')
|
|
parser.add_argument('--password', default=DB_CONFIG['password'], help='MySQL password')
|
|
parser.add_argument('--database', default=DB_CONFIG['database'], help='MySQL database')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Build config from args
|
|
db_config = {
|
|
'host': args.host,
|
|
'port': args.port,
|
|
'user': args.user,
|
|
'password': args.password,
|
|
'database': args.database
|
|
}
|
|
|
|
# Create parser and connect
|
|
udc_parser = UDCParser(db_config)
|
|
if not udc_parser.connect():
|
|
sys.exit(1)
|
|
|
|
try:
|
|
if args.file:
|
|
result = udc_parser.parse_file(args.file, force_full=args.force)
|
|
if result['success']:
|
|
print(f"\nSuccessfully imported {result.get('records', 0)} records")
|
|
elif result.get('skipped'):
|
|
print(f"\nNo new content to parse")
|
|
else:
|
|
print(f"\nFailed: {result.get('error')}")
|
|
sys.exit(1)
|
|
else:
|
|
results = udc_parser.parse_directory(args.dir, max_workers=args.workers, force_full=args.force)
|
|
print(f"\n{'='*50}")
|
|
print(f"Import Summary:")
|
|
print(f" Total files found: {results['total_files']}")
|
|
print(f" Files imported: {results['imported']}")
|
|
print(f" Files skipped: {results['skipped']}")
|
|
print(f" Files with errors: {results['errors']}")
|
|
print(f" Total records: {results['total_records']}")
|
|
|
|
finally:
|
|
udc_parser.disconnect()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|