Initial commit
This commit is contained in:
373
perf_monitor.py
Normal file
373
perf_monitor.py
Normal file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Real-time VM Performance Monitor
|
||||
Run this during a backup to identify bottlenecks (CPU, disk, network).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import configparser
|
||||
import ssl
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from pyVim.connect import SmartConnect, Disconnect
|
||||
from pyVmomi import vim
|
||||
except ImportError:
|
||||
print("Error: pyvmomi is required. Install with: pip install pyvmomi")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def connect_vcenter(server, username, password, port=443):
|
||||
"""Connect to vCenter."""
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context.check_hostname = False
|
||||
context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
try:
|
||||
si = SmartConnect(host=server, user=username, pwd=password, port=port, sslContext=context)
|
||||
return si
|
||||
except Exception as e:
|
||||
print(f"Error connecting: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_counter_ids(perf_manager):
|
||||
"""Get performance counter IDs."""
|
||||
metric_ids = {
|
||||
'cpu.usage.average': None,
|
||||
'cpu.ready.summation': None,
|
||||
'disk.read.average': None,
|
||||
'disk.write.average': None,
|
||||
'disk.totalReadLatency.average': None,
|
||||
'disk.totalWriteLatency.average': None,
|
||||
'disk.maxTotalLatency.latest': None,
|
||||
'net.received.average': None,
|
||||
'net.transmitted.average': None,
|
||||
'mem.usage.average': None,
|
||||
}
|
||||
|
||||
for counter in perf_manager.perfCounter:
|
||||
full_name = f"{counter.groupInfo.key}.{counter.nameInfo.key}.{counter.rollupType}"
|
||||
if full_name in metric_ids:
|
||||
metric_ids[full_name] = counter.key
|
||||
|
||||
return metric_ids
|
||||
|
||||
|
||||
def get_vm_perf(si, vm_name, metric_ids):
|
||||
"""Get performance stats for a specific VM."""
|
||||
content = si.RetrieveContent()
|
||||
perf_manager = content.perfManager
|
||||
|
||||
container = content.viewManager.CreateContainerView(
|
||||
content.rootFolder, [vim.VirtualMachine], True
|
||||
)
|
||||
|
||||
target_vm = None
|
||||
for vm in container.view:
|
||||
if vm.name.lower() == vm_name.lower():
|
||||
target_vm = vm
|
||||
break
|
||||
|
||||
container.Destroy()
|
||||
|
||||
if not target_vm:
|
||||
print(f"VM '{vm_name}' not found")
|
||||
return None
|
||||
|
||||
if target_vm.runtime.powerState != vim.VirtualMachinePowerState.poweredOn:
|
||||
print(f"VM '{vm_name}' is not powered on")
|
||||
return None
|
||||
|
||||
# Build query
|
||||
metric_id_objs = []
|
||||
for name, counter_id in metric_ids.items():
|
||||
if counter_id:
|
||||
metric_id_objs.append(vim.PerformanceManager.MetricId(
|
||||
counterId=counter_id,
|
||||
instance=""
|
||||
))
|
||||
|
||||
query_spec = vim.PerformanceManager.QuerySpec(
|
||||
entity=target_vm,
|
||||
metricId=metric_id_objs,
|
||||
intervalId=20,
|
||||
maxSample=1
|
||||
)
|
||||
|
||||
results = perf_manager.QueryPerf(querySpec=[query_spec])
|
||||
|
||||
perf_data = {
|
||||
'cpu_pct': 0,
|
||||
'cpu_ready_ms': 0,
|
||||
'mem_pct': 0,
|
||||
'disk_read_kbps': 0,
|
||||
'disk_write_kbps': 0,
|
||||
'disk_read_lat_ms': 0,
|
||||
'disk_write_lat_ms': 0,
|
||||
'disk_max_lat_ms': 0,
|
||||
'net_rx_kbps': 0,
|
||||
'net_tx_kbps': 0,
|
||||
}
|
||||
|
||||
if results:
|
||||
for result in results:
|
||||
for val in result.value:
|
||||
counter_id = val.id.counterId
|
||||
value = val.value[0] if val.value else 0
|
||||
|
||||
for name, cid in metric_ids.items():
|
||||
if cid == counter_id:
|
||||
if name == 'cpu.usage.average':
|
||||
perf_data['cpu_pct'] = round(value / 100, 1)
|
||||
elif name == 'cpu.ready.summation':
|
||||
perf_data['cpu_ready_ms'] = round(value / 20, 1) # Convert to ms per interval
|
||||
elif name == 'mem.usage.average':
|
||||
perf_data['mem_pct'] = round(value / 100, 1)
|
||||
elif name == 'disk.read.average':
|
||||
perf_data['disk_read_kbps'] = value
|
||||
elif name == 'disk.write.average':
|
||||
perf_data['disk_write_kbps'] = value
|
||||
elif name == 'disk.totalReadLatency.average':
|
||||
perf_data['disk_read_lat_ms'] = value
|
||||
elif name == 'disk.totalWriteLatency.average':
|
||||
perf_data['disk_write_lat_ms'] = value
|
||||
elif name == 'disk.maxTotalLatency.latest':
|
||||
perf_data['disk_max_lat_ms'] = value
|
||||
elif name == 'net.received.average':
|
||||
perf_data['net_rx_kbps'] = value
|
||||
elif name == 'net.transmitted.average':
|
||||
perf_data['net_tx_kbps'] = value
|
||||
break
|
||||
|
||||
return perf_data
|
||||
|
||||
|
||||
def get_all_vms_perf(si, metric_ids):
|
||||
"""Get performance stats for all powered-on VMs."""
|
||||
content = si.RetrieveContent()
|
||||
perf_manager = content.perfManager
|
||||
|
||||
container = content.viewManager.CreateContainerView(
|
||||
content.rootFolder, [vim.VirtualMachine], True
|
||||
)
|
||||
|
||||
all_perf = []
|
||||
|
||||
for vm in container.view:
|
||||
if vm.runtime.powerState != vim.VirtualMachinePowerState.poweredOn:
|
||||
continue
|
||||
|
||||
try:
|
||||
metric_id_objs = []
|
||||
for name, counter_id in metric_ids.items():
|
||||
if counter_id:
|
||||
metric_id_objs.append(vim.PerformanceManager.MetricId(
|
||||
counterId=counter_id,
|
||||
instance=""
|
||||
))
|
||||
|
||||
query_spec = vim.PerformanceManager.QuerySpec(
|
||||
entity=vm,
|
||||
metricId=metric_id_objs,
|
||||
intervalId=20,
|
||||
maxSample=1
|
||||
)
|
||||
|
||||
results = perf_manager.QueryPerf(querySpec=[query_spec])
|
||||
|
||||
perf_data = {
|
||||
'name': vm.name,
|
||||
'cpu_pct': 0,
|
||||
'mem_pct': 0,
|
||||
'disk_read_mbps': 0,
|
||||
'disk_write_mbps': 0,
|
||||
'disk_lat_ms': 0,
|
||||
'net_mbps': 0,
|
||||
}
|
||||
|
||||
if results:
|
||||
for result in results:
|
||||
for val in result.value:
|
||||
counter_id = val.id.counterId
|
||||
value = val.value[0] if val.value else 0
|
||||
|
||||
for name, cid in metric_ids.items():
|
||||
if cid == counter_id:
|
||||
if name == 'cpu.usage.average':
|
||||
perf_data['cpu_pct'] = round(value / 100, 1)
|
||||
elif name == 'mem.usage.average':
|
||||
perf_data['mem_pct'] = round(value / 100, 1)
|
||||
elif name == 'disk.read.average':
|
||||
perf_data['disk_read_mbps'] = round(value / 1024, 1)
|
||||
elif name == 'disk.write.average':
|
||||
perf_data['disk_write_mbps'] = round(value / 1024, 1)
|
||||
elif name == 'disk.maxTotalLatency.latest':
|
||||
perf_data['disk_lat_ms'] = value
|
||||
elif name == 'net.received.average':
|
||||
perf_data['net_mbps'] += round(value / 1024, 1)
|
||||
elif name == 'net.transmitted.average':
|
||||
perf_data['net_mbps'] += round(value / 1024, 1)
|
||||
break
|
||||
|
||||
all_perf.append(perf_data)
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
container.Destroy()
|
||||
return sorted(all_perf, key=lambda x: x['disk_write_mbps'], reverse=True)
|
||||
|
||||
|
||||
def format_bar(value, max_val, width=20):
|
||||
"""Create ASCII progress bar."""
|
||||
filled = int((value / max_val) * width) if max_val > 0 else 0
|
||||
filled = min(filled, width)
|
||||
return '█' * filled + '░' * (width - filled)
|
||||
|
||||
|
||||
def monitor_vm(si, vm_name, interval=5):
|
||||
"""Monitor a specific VM in real-time."""
|
||||
content = si.RetrieveContent()
|
||||
metric_ids = get_counter_ids(content.perfManager)
|
||||
|
||||
print(f"\nMonitoring VM: {vm_name}")
|
||||
print("Press Ctrl+C to stop\n")
|
||||
print("-" * 100)
|
||||
|
||||
try:
|
||||
while True:
|
||||
perf = get_vm_perf(si, vm_name, metric_ids)
|
||||
if not perf:
|
||||
break
|
||||
|
||||
timestamp = datetime.now().strftime('%H:%M:%S')
|
||||
|
||||
# Determine bottleneck indicators
|
||||
cpu_warn = "⚠️ " if perf['cpu_pct'] > 80 else ""
|
||||
lat_warn = "⚠️ " if perf['disk_max_lat_ms'] > 20 else ""
|
||||
|
||||
print(f"\r{timestamp} | "
|
||||
f"CPU: {cpu_warn}{perf['cpu_pct']:5.1f}% | "
|
||||
f"Mem: {perf['mem_pct']:5.1f}% | "
|
||||
f"Disk R: {perf['disk_read_kbps']:6} KB/s | "
|
||||
f"Disk W: {perf['disk_write_kbps']:6} KB/s | "
|
||||
f"Lat: {lat_warn}{perf['disk_max_lat_ms']:3}ms | "
|
||||
f"Net RX: {perf['net_rx_kbps']:6} KB/s | "
|
||||
f"Net TX: {perf['net_tx_kbps']:6} KB/s",
|
||||
end='', flush=True)
|
||||
|
||||
time.sleep(interval)
|
||||
print() # New line for next update
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nMonitoring stopped.")
|
||||
|
||||
|
||||
def show_all_vms(si):
|
||||
"""Show performance summary for all VMs."""
|
||||
content = si.RetrieveContent()
|
||||
metric_ids = get_counter_ids(content.perfManager)
|
||||
|
||||
print("\nCollecting VM performance data...")
|
||||
all_perf = get_all_vms_perf(si, metric_ids)
|
||||
|
||||
print("\n" + "=" * 100)
|
||||
print(f"{'VM Name':<35} {'CPU%':>6} {'Mem%':>6} {'DiskR':>8} {'DiskW':>8} {'Lat':>6} {'Net':>8}")
|
||||
print(f"{'':<35} {'':>6} {'':>6} {'(MB/s)':>8} {'(MB/s)':>8} {'(ms)':>6} {'(MB/s)':>8}")
|
||||
print("=" * 100)
|
||||
|
||||
for vm in all_perf:
|
||||
# Highlight high values
|
||||
cpu_mark = "*" if vm['cpu_pct'] > 80 else " "
|
||||
lat_mark = "*" if vm['disk_lat_ms'] > 20 else " "
|
||||
|
||||
print(f"{vm['name']:<35} {vm['cpu_pct']:>5.1f}{cpu_mark} {vm['mem_pct']:>6.1f} "
|
||||
f"{vm['disk_read_mbps']:>8.1f} {vm['disk_write_mbps']:>8.1f} "
|
||||
f"{vm['disk_lat_ms']:>5}{lat_mark} {vm['net_mbps']:>8.1f}")
|
||||
|
||||
print("=" * 100)
|
||||
print("* = potential bottleneck (CPU > 80% or Latency > 20ms)")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Real-time VM performance monitor')
|
||||
parser.add_argument('--config', '-c', help='Config file path')
|
||||
parser.add_argument('--server', '-s', help='vCenter server')
|
||||
parser.add_argument('--username', '-u', help='Username')
|
||||
parser.add_argument('--password', '-p', help='Password')
|
||||
parser.add_argument('--vm', '-v', help='VM name to monitor (omit for all VMs summary)')
|
||||
parser.add_argument('--interval', '-i', type=int, default=5, help='Polling interval in seconds (default: 5)')
|
||||
parser.add_argument('--watch', '-w', action='store_true', help='Continuous monitoring mode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
server = args.server
|
||||
username = args.username
|
||||
password = args.password
|
||||
|
||||
if args.config:
|
||||
config = configparser.ConfigParser()
|
||||
config.read(args.config)
|
||||
if 'vcenter' in config:
|
||||
server = server or config.get('vcenter', 'server', fallback=None)
|
||||
username = username or config.get('vcenter', 'username', fallback=None)
|
||||
password = password or config.get('vcenter', 'password', fallback=None)
|
||||
|
||||
if not all([server, username, password]):
|
||||
print("Error: server, username, and password required")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Connecting to {server}...")
|
||||
si = connect_vcenter(server, username, password)
|
||||
|
||||
try:
|
||||
if args.vm:
|
||||
if args.watch:
|
||||
monitor_vm(si, args.vm, args.interval)
|
||||
else:
|
||||
content = si.RetrieveContent()
|
||||
metric_ids = get_counter_ids(content.perfManager)
|
||||
perf = get_vm_perf(si, args.vm, metric_ids)
|
||||
if perf:
|
||||
print(f"\nPerformance for {args.vm}:")
|
||||
print(f" CPU Usage: {perf['cpu_pct']}%")
|
||||
print(f" CPU Ready: {perf['cpu_ready_ms']} ms")
|
||||
print(f" Memory Usage: {perf['mem_pct']}%")
|
||||
print(f" Disk Read: {perf['disk_read_kbps']} KB/s ({perf['disk_read_kbps']/1024:.1f} MB/s)")
|
||||
print(f" Disk Write: {perf['disk_write_kbps']} KB/s ({perf['disk_write_kbps']/1024:.1f} MB/s)")
|
||||
print(f" Disk Read Lat: {perf['disk_read_lat_ms']} ms")
|
||||
print(f" Disk Write Lat: {perf['disk_write_lat_ms']} ms")
|
||||
print(f" Disk Max Lat: {perf['disk_max_lat_ms']} ms")
|
||||
print(f" Network RX: {perf['net_rx_kbps']} KB/s ({perf['net_rx_kbps']/1024:.1f} MB/s)")
|
||||
print(f" Network TX: {perf['net_tx_kbps']} KB/s ({perf['net_tx_kbps']/1024:.1f} MB/s)")
|
||||
|
||||
# Analysis
|
||||
print("\n Analysis:")
|
||||
if perf['cpu_pct'] > 80:
|
||||
print(" ⚠️ HIGH CPU - VM may be CPU bottlenecked")
|
||||
if perf['disk_max_lat_ms'] > 20:
|
||||
print(" ⚠️ HIGH DISK LATENCY - Storage may be bottleneck")
|
||||
if perf['disk_max_lat_ms'] <= 20 and perf['cpu_pct'] <= 80:
|
||||
print(" ✓ No obvious VMware-side bottlenecks detected")
|
||||
else:
|
||||
if args.watch:
|
||||
try:
|
||||
while True:
|
||||
print("\033[2J\033[H") # Clear screen
|
||||
show_all_vms(si)
|
||||
print(f"\nRefreshing every {args.interval} seconds... (Ctrl+C to stop)")
|
||||
time.sleep(args.interval)
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
else:
|
||||
show_all_vms(si)
|
||||
|
||||
finally:
|
||||
Disconnect(si)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user