#!/usr/bin/env python3 """ Real-time VM Performance Monitor Run this during a backup to identify bottlenecks (CPU, disk, network). """ import argparse import configparser import ssl import sys import time from datetime import datetime try: from pyVim.connect import SmartConnect, Disconnect from pyVmomi import vim except ImportError: print("Error: pyvmomi is required. Install with: pip install pyvmomi") sys.exit(1) def connect_vcenter(server, username, password, port=443): """Connect to vCenter.""" context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context.check_hostname = False context.verify_mode = ssl.CERT_NONE try: si = SmartConnect(host=server, user=username, pwd=password, port=port, sslContext=context) return si except Exception as e: print(f"Error connecting: {e}") sys.exit(1) def get_counter_ids(perf_manager): """Get performance counter IDs.""" metric_ids = { 'cpu.usage.average': None, 'cpu.ready.summation': None, 'disk.read.average': None, 'disk.write.average': None, 'disk.totalReadLatency.average': None, 'disk.totalWriteLatency.average': None, 'disk.maxTotalLatency.latest': None, 'net.received.average': None, 'net.transmitted.average': None, 'mem.usage.average': None, } for counter in perf_manager.perfCounter: full_name = f"{counter.groupInfo.key}.{counter.nameInfo.key}.{counter.rollupType}" if full_name in metric_ids: metric_ids[full_name] = counter.key return metric_ids def get_vm_perf(si, vm_name, metric_ids): """Get performance stats for a specific VM.""" content = si.RetrieveContent() perf_manager = content.perfManager container = content.viewManager.CreateContainerView( content.rootFolder, [vim.VirtualMachine], True ) target_vm = None for vm in container.view: if vm.name.lower() == vm_name.lower(): target_vm = vm break container.Destroy() if not target_vm: print(f"VM '{vm_name}' not found") return None if target_vm.runtime.powerState != vim.VirtualMachinePowerState.poweredOn: print(f"VM '{vm_name}' is not powered on") return None # Build query metric_id_objs = [] for name, counter_id in metric_ids.items(): if counter_id: metric_id_objs.append(vim.PerformanceManager.MetricId( counterId=counter_id, instance="" )) query_spec = vim.PerformanceManager.QuerySpec( entity=target_vm, metricId=metric_id_objs, intervalId=20, maxSample=1 ) results = perf_manager.QueryPerf(querySpec=[query_spec]) perf_data = { 'cpu_pct': 0, 'cpu_ready_ms': 0, 'mem_pct': 0, 'disk_read_kbps': 0, 'disk_write_kbps': 0, 'disk_read_lat_ms': 0, 'disk_write_lat_ms': 0, 'disk_max_lat_ms': 0, 'net_rx_kbps': 0, 'net_tx_kbps': 0, } if results: for result in results: for val in result.value: counter_id = val.id.counterId value = val.value[0] if val.value else 0 for name, cid in metric_ids.items(): if cid == counter_id: if name == 'cpu.usage.average': perf_data['cpu_pct'] = round(value / 100, 1) elif name == 'cpu.ready.summation': perf_data['cpu_ready_ms'] = round(value / 20, 1) # Convert to ms per interval elif name == 'mem.usage.average': perf_data['mem_pct'] = round(value / 100, 1) elif name == 'disk.read.average': perf_data['disk_read_kbps'] = value elif name == 'disk.write.average': perf_data['disk_write_kbps'] = value elif name == 'disk.totalReadLatency.average': perf_data['disk_read_lat_ms'] = value elif name == 'disk.totalWriteLatency.average': perf_data['disk_write_lat_ms'] = value elif name == 'disk.maxTotalLatency.latest': perf_data['disk_max_lat_ms'] = value elif name == 'net.received.average': perf_data['net_rx_kbps'] = value elif name == 'net.transmitted.average': perf_data['net_tx_kbps'] = value break return perf_data def get_all_vms_perf(si, metric_ids): """Get performance stats for all powered-on VMs.""" content = si.RetrieveContent() perf_manager = content.perfManager container = content.viewManager.CreateContainerView( content.rootFolder, [vim.VirtualMachine], True ) all_perf = [] for vm in container.view: if vm.runtime.powerState != vim.VirtualMachinePowerState.poweredOn: continue try: metric_id_objs = [] for name, counter_id in metric_ids.items(): if counter_id: metric_id_objs.append(vim.PerformanceManager.MetricId( counterId=counter_id, instance="" )) query_spec = vim.PerformanceManager.QuerySpec( entity=vm, metricId=metric_id_objs, intervalId=20, maxSample=1 ) results = perf_manager.QueryPerf(querySpec=[query_spec]) perf_data = { 'name': vm.name, 'cpu_pct': 0, 'mem_pct': 0, 'disk_read_mbps': 0, 'disk_write_mbps': 0, 'disk_lat_ms': 0, 'net_mbps': 0, } if results: for result in results: for val in result.value: counter_id = val.id.counterId value = val.value[0] if val.value else 0 for name, cid in metric_ids.items(): if cid == counter_id: if name == 'cpu.usage.average': perf_data['cpu_pct'] = round(value / 100, 1) elif name == 'mem.usage.average': perf_data['mem_pct'] = round(value / 100, 1) elif name == 'disk.read.average': perf_data['disk_read_mbps'] = round(value / 1024, 1) elif name == 'disk.write.average': perf_data['disk_write_mbps'] = round(value / 1024, 1) elif name == 'disk.maxTotalLatency.latest': perf_data['disk_lat_ms'] = value elif name == 'net.received.average': perf_data['net_mbps'] += round(value / 1024, 1) elif name == 'net.transmitted.average': perf_data['net_mbps'] += round(value / 1024, 1) break all_perf.append(perf_data) except Exception as e: pass container.Destroy() return sorted(all_perf, key=lambda x: x['disk_write_mbps'], reverse=True) def format_bar(value, max_val, width=20): """Create ASCII progress bar.""" filled = int((value / max_val) * width) if max_val > 0 else 0 filled = min(filled, width) return '█' * filled + '░' * (width - filled) def monitor_vm(si, vm_name, interval=5): """Monitor a specific VM in real-time.""" content = si.RetrieveContent() metric_ids = get_counter_ids(content.perfManager) print(f"\nMonitoring VM: {vm_name}") print("Press Ctrl+C to stop\n") print("-" * 100) try: while True: perf = get_vm_perf(si, vm_name, metric_ids) if not perf: break timestamp = datetime.now().strftime('%H:%M:%S') # Determine bottleneck indicators cpu_warn = "⚠️ " if perf['cpu_pct'] > 80 else "" lat_warn = "⚠️ " if perf['disk_max_lat_ms'] > 20 else "" print(f"\r{timestamp} | " f"CPU: {cpu_warn}{perf['cpu_pct']:5.1f}% | " f"Mem: {perf['mem_pct']:5.1f}% | " f"Disk R: {perf['disk_read_kbps']:6} KB/s | " f"Disk W: {perf['disk_write_kbps']:6} KB/s | " f"Lat: {lat_warn}{perf['disk_max_lat_ms']:3}ms | " f"Net RX: {perf['net_rx_kbps']:6} KB/s | " f"Net TX: {perf['net_tx_kbps']:6} KB/s", end='', flush=True) time.sleep(interval) print() # New line for next update except KeyboardInterrupt: print("\n\nMonitoring stopped.") def show_all_vms(si): """Show performance summary for all VMs.""" content = si.RetrieveContent() metric_ids = get_counter_ids(content.perfManager) print("\nCollecting VM performance data...") all_perf = get_all_vms_perf(si, metric_ids) print("\n" + "=" * 100) print(f"{'VM Name':<35} {'CPU%':>6} {'Mem%':>6} {'DiskR':>8} {'DiskW':>8} {'Lat':>6} {'Net':>8}") print(f"{'':<35} {'':>6} {'':>6} {'(MB/s)':>8} {'(MB/s)':>8} {'(ms)':>6} {'(MB/s)':>8}") print("=" * 100) for vm in all_perf: # Highlight high values cpu_mark = "*" if vm['cpu_pct'] > 80 else " " lat_mark = "*" if vm['disk_lat_ms'] > 20 else " " print(f"{vm['name']:<35} {vm['cpu_pct']:>5.1f}{cpu_mark} {vm['mem_pct']:>6.1f} " f"{vm['disk_read_mbps']:>8.1f} {vm['disk_write_mbps']:>8.1f} " f"{vm['disk_lat_ms']:>5}{lat_mark} {vm['net_mbps']:>8.1f}") print("=" * 100) print("* = potential bottleneck (CPU > 80% or Latency > 20ms)") def main(): parser = argparse.ArgumentParser(description='Real-time VM performance monitor') parser.add_argument('--config', '-c', help='Config file path') parser.add_argument('--server', '-s', help='vCenter server') parser.add_argument('--username', '-u', help='Username') parser.add_argument('--password', '-p', help='Password') parser.add_argument('--vm', '-v', help='VM name to monitor (omit for all VMs summary)') parser.add_argument('--interval', '-i', type=int, default=5, help='Polling interval in seconds (default: 5)') parser.add_argument('--watch', '-w', action='store_true', help='Continuous monitoring mode') args = parser.parse_args() server = args.server username = args.username password = args.password if args.config: config = configparser.ConfigParser() config.read(args.config) if 'vcenter' in config: server = server or config.get('vcenter', 'server', fallback=None) username = username or config.get('vcenter', 'username', fallback=None) password = password or config.get('vcenter', 'password', fallback=None) if not all([server, username, password]): print("Error: server, username, and password required") sys.exit(1) print(f"Connecting to {server}...") si = connect_vcenter(server, username, password) try: if args.vm: if args.watch: monitor_vm(si, args.vm, args.interval) else: content = si.RetrieveContent() metric_ids = get_counter_ids(content.perfManager) perf = get_vm_perf(si, args.vm, metric_ids) if perf: print(f"\nPerformance for {args.vm}:") print(f" CPU Usage: {perf['cpu_pct']}%") print(f" CPU Ready: {perf['cpu_ready_ms']} ms") print(f" Memory Usage: {perf['mem_pct']}%") print(f" Disk Read: {perf['disk_read_kbps']} KB/s ({perf['disk_read_kbps']/1024:.1f} MB/s)") print(f" Disk Write: {perf['disk_write_kbps']} KB/s ({perf['disk_write_kbps']/1024:.1f} MB/s)") print(f" Disk Read Lat: {perf['disk_read_lat_ms']} ms") print(f" Disk Write Lat: {perf['disk_write_lat_ms']} ms") print(f" Disk Max Lat: {perf['disk_max_lat_ms']} ms") print(f" Network RX: {perf['net_rx_kbps']} KB/s ({perf['net_rx_kbps']/1024:.1f} MB/s)") print(f" Network TX: {perf['net_tx_kbps']} KB/s ({perf['net_tx_kbps']/1024:.1f} MB/s)") # Analysis print("\n Analysis:") if perf['cpu_pct'] > 80: print(" ⚠️ HIGH CPU - VM may be CPU bottlenecked") if perf['disk_max_lat_ms'] > 20: print(" ⚠️ HIGH DISK LATENCY - Storage may be bottleneck") if perf['disk_max_lat_ms'] <= 20 and perf['cpu_pct'] <= 80: print(" ✓ No obvious VMware-side bottlenecks detected") else: if args.watch: try: while True: print("\033[2J\033[H") # Clear screen show_all_vms(si) print(f"\nRefreshing every {args.interval} seconds... (Ctrl+C to stop)") time.sleep(args.interval) except KeyboardInterrupt: print("\nStopped.") else: show_all_vms(si) finally: Disconnect(si) if __name__ == '__main__': main()