#!/usr/bin/env python3 """ Historical VM Performance Report Pull performance stats from vCenter for the past month to identify patterns. """ import argparse import configparser import csv import ssl import sys from datetime import datetime, timedelta try: from pyVim.connect import SmartConnect, Disconnect from pyVmomi import vim except ImportError: print("Error: pyvmomi is required. Install with: pip install pyvmomi") sys.exit(1) def connect_vcenter(server, username, password, port=443): """Connect to vCenter.""" context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context.check_hostname = False context.verify_mode = ssl.CERT_NONE try: si = SmartConnect(host=server, user=username, pwd=password, port=port, sslContext=context) return si except Exception as e: print(f"Error connecting: {e}") sys.exit(1) def get_historical_intervals(perf_manager): """Get available historical intervals.""" intervals = {} for interval in perf_manager.historicalInterval: intervals[interval.samplingPeriod] = { 'name': interval.name, 'length': interval.length, 'level': interval.level, } return intervals def get_counter_ids(perf_manager, metrics_needed): """Get performance counter IDs for specified metrics.""" metric_ids = {m: None for m in metrics_needed} for counter in perf_manager.perfCounter: full_name = f"{counter.groupInfo.key}.{counter.nameInfo.key}.{counter.rollupType}" if full_name in metric_ids: metric_ids[full_name] = counter.key return metric_ids def get_vm_by_name(content, vm_name): """Find VM by name.""" container = content.viewManager.CreateContainerView( content.rootFolder, [vim.VirtualMachine], True ) target_vm = None for vm in container.view: if vm.name.lower() == vm_name.lower(): target_vm = vm break container.Destroy() return target_vm def get_historical_perf(si, entity, metric_ids, days=30): """Get historical performance data.""" content = si.RetrieveContent() perf_manager = content.perfManager # Use daily interval (86400 seconds) for month-long data # Or hourly (3600) for more detail but more data # Available intervals: 300 (5min), 1800 (30min), 7200 (2hr), 86400 (daily) if days <= 1: interval_id = 300 # 5-minute samples for last day elif days <= 7: interval_id = 1800 # 30-minute samples for last week else: interval_id = 7200 # 2-hour samples for longer periods end_time = datetime.now() start_time = end_time - timedelta(days=days) # Build metric ID objects metric_id_objs = [] for name, counter_id in metric_ids.items(): if counter_id: metric_id_objs.append(vim.PerformanceManager.MetricId( counterId=counter_id, instance="" )) if not metric_id_objs: print("No valid metrics found") return [] query_spec = vim.PerformanceManager.QuerySpec( entity=entity, metricId=metric_id_objs, intervalId=interval_id, startTime=start_time, endTime=end_time, ) try: results = perf_manager.QueryPerf(querySpec=[query_spec]) except Exception as e: print(f"Error querying performance: {e}") return [] # Parse results into time series data = [] if results: for result in results: # Get timestamps timestamps = result.sampleInfo # Create data structure for i, sample_info in enumerate(timestamps): sample = { 'timestamp': sample_info.timestamp, 'interval': sample_info.interval, } for val in result.value: counter_id = val.id.counterId if i < len(val.value): value = val.value[i] for name, cid in metric_ids.items(): if cid == counter_id: sample[name] = value break data.append(sample) return data def analyze_vm_history(si, vm_name, days=30): """Analyze historical performance for a VM.""" content = si.RetrieveContent() perf_manager = content.perfManager vm = get_vm_by_name(content, vm_name) if not vm: print(f"VM '{vm_name}' not found") return print(f"\nAnalyzing historical performance for: {vm_name}") print(f"Period: Last {days} days") print("-" * 60) metrics = [ 'cpu.usage.average', 'cpu.ready.summation', 'mem.usage.average', 'disk.read.average', 'disk.write.average', 'disk.totalReadLatency.average', 'disk.totalWriteLatency.average', 'disk.maxTotalLatency.latest', 'net.received.average', 'net.transmitted.average', ] metric_ids = get_counter_ids(perf_manager, metrics) data = get_historical_perf(si, vm, metric_ids, days) if not data: print("No historical data available") return print(f"Retrieved {len(data)} samples") # Calculate statistics stats = {} for metric in metrics: values = [d.get(metric, 0) for d in data if metric in d] if values: stats[metric] = { 'min': min(values), 'max': max(values), 'avg': sum(values) / len(values), 'samples': len(values), } # Display results print("\n" + "=" * 60) print("PERFORMANCE STATISTICS") print("=" * 60) if 'cpu.usage.average' in stats: s = stats['cpu.usage.average'] print(f"\nCPU Usage:") print(f" Average: {s['avg']/100:.1f}%") print(f" Maximum: {s['max']/100:.1f}%") if s['max']/100 > 80: print(f" ⚠️ CPU reached {s['max']/100:.1f}% - potential bottleneck") if 'mem.usage.average' in stats: s = stats['mem.usage.average'] print(f"\nMemory Usage:") print(f" Average: {s['avg']/100:.1f}%") print(f" Maximum: {s['max']/100:.1f}%") if 'disk.read.average' in stats and 'disk.write.average' in stats: r = stats['disk.read.average'] w = stats['disk.write.average'] print(f"\nDisk I/O (KB/s):") print(f" Read - Avg: {r['avg']:.0f}, Max: {r['max']:.0f} ({r['max']/1024:.1f} MB/s)") print(f" Write - Avg: {w['avg']:.0f}, Max: {w['max']:.0f} ({w['max']/1024:.1f} MB/s)") if 'disk.totalReadLatency.average' in stats and 'disk.totalWriteLatency.average' in stats: rl = stats['disk.totalReadLatency.average'] wl = stats['disk.totalWriteLatency.average'] print(f"\nDisk Latency (ms):") print(f" Read - Avg: {rl['avg']:.1f}, Max: {rl['max']:.0f}") print(f" Write - Avg: {wl['avg']:.1f}, Max: {wl['max']:.0f}") if rl['max'] > 20 or wl['max'] > 20: print(f" ⚠️ High disk latency detected - storage may be bottleneck") if 'disk.maxTotalLatency.latest' in stats: s = stats['disk.maxTotalLatency.latest'] print(f"\nPeak Disk Latency:") print(f" Average Peak: {s['avg']:.1f} ms") print(f" Maximum Peak: {s['max']:.0f} ms") if s['max'] > 50: print(f" ⚠️ SEVERE: Peak latency reached {s['max']} ms!") if 'net.received.average' in stats and 'net.transmitted.average' in stats: rx = stats['net.received.average'] tx = stats['net.transmitted.average'] print(f"\nNetwork I/O (KB/s):") print(f" RX - Avg: {rx['avg']:.0f}, Max: {rx['max']:.0f} ({rx['max']/1024:.1f} MB/s)") print(f" TX - Avg: {tx['avg']:.0f}, Max: {tx['max']:.0f} ({tx['max']/1024:.1f} MB/s)") # Summary print("\n" + "=" * 60) print("BOTTLENECK ANALYSIS") print("=" * 60) issues = [] if 'cpu.usage.average' in stats and stats['cpu.usage.average']['max']/100 > 80: issues.append(f"CPU spiked to {stats['cpu.usage.average']['max']/100:.0f}%") if 'disk.maxTotalLatency.latest' in stats: max_lat = stats['disk.maxTotalLatency.latest']['max'] avg_lat = stats['disk.maxTotalLatency.latest']['avg'] if max_lat > 50: issues.append(f"Disk latency peaked at {max_lat:.0f}ms (severe)") elif max_lat > 20: issues.append(f"Disk latency peaked at {max_lat:.0f}ms (moderate)") if issues: print("\nPotential issues detected:") for issue in issues: print(f" ⚠️ {issue}") else: print("\n✓ No major VMware-side bottlenecks detected in historical data") print(" If backups are still slow, the issue is likely:") print(" - DATTO agent/MercuryFTP performance") print(" - DATTO appliance storage/CPU") print(" - Network between guest and DATTO (not VMware layer)") return data, stats def export_to_csv(data, filename, vm_name): """Export historical data to CSV.""" if not data: return with open(filename, 'w', newline='') as f: writer = csv.writer(f) # Get all keys keys = set() for d in data: keys.update(d.keys()) keys = sorted(keys) writer.writerow(['vm_name'] + keys) for d in data: row = [vm_name] + [d.get(k, '') for k in keys] writer.writerow(row) print(f"\nData exported to: {filename}") def main(): parser = argparse.ArgumentParser(description='Historical VM performance analysis') parser.add_argument('--config', '-c', help='Config file path') parser.add_argument('--server', '-s', help='vCenter server') parser.add_argument('--username', '-u', help='Username') parser.add_argument('--password', '-p', help='Password') parser.add_argument('--vm', '-v', required=True, help='VM name to analyze') parser.add_argument('--days', '-d', type=int, default=30, help='Number of days to analyze (default: 30)') parser.add_argument('--export', '-e', help='Export data to CSV file') args = parser.parse_args() server = args.server username = args.username password = args.password if args.config: config = configparser.ConfigParser() config.read(args.config) if 'vcenter' in config: server = server or config.get('vcenter', 'server', fallback=None) username = username or config.get('vcenter', 'username', fallback=None) password = password or config.get('vcenter', 'password', fallback=None) if not all([server, username, password]): print("Error: server, username, and password required") sys.exit(1) print(f"Connecting to {server}...") si = connect_vcenter(server, username, password) try: data, stats = analyze_vm_history(si, args.vm, args.days) if args.export and data: export_to_csv(data, args.export, args.vm) finally: Disconnect(si) if __name__ == '__main__': main()