347 lines
11 KiB
Python
347 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Historical VM Performance Report
|
|
Pull performance stats from vCenter for the past month to identify patterns.
|
|
"""
|
|
|
|
import argparse
|
|
import configparser
|
|
import csv
|
|
import ssl
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
|
|
try:
|
|
from pyVim.connect import SmartConnect, Disconnect
|
|
from pyVmomi import vim
|
|
except ImportError:
|
|
print("Error: pyvmomi is required. Install with: pip install pyvmomi")
|
|
sys.exit(1)
|
|
|
|
|
|
def connect_vcenter(server, username, password, port=443):
|
|
"""Connect to vCenter."""
|
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
|
context.check_hostname = False
|
|
context.verify_mode = ssl.CERT_NONE
|
|
|
|
try:
|
|
si = SmartConnect(host=server, user=username, pwd=password, port=port, sslContext=context)
|
|
return si
|
|
except Exception as e:
|
|
print(f"Error connecting: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def get_historical_intervals(perf_manager):
|
|
"""Get available historical intervals."""
|
|
intervals = {}
|
|
for interval in perf_manager.historicalInterval:
|
|
intervals[interval.samplingPeriod] = {
|
|
'name': interval.name,
|
|
'length': interval.length,
|
|
'level': interval.level,
|
|
}
|
|
return intervals
|
|
|
|
|
|
def get_counter_ids(perf_manager, metrics_needed):
|
|
"""Get performance counter IDs for specified metrics."""
|
|
metric_ids = {m: None for m in metrics_needed}
|
|
|
|
for counter in perf_manager.perfCounter:
|
|
full_name = f"{counter.groupInfo.key}.{counter.nameInfo.key}.{counter.rollupType}"
|
|
if full_name in metric_ids:
|
|
metric_ids[full_name] = counter.key
|
|
|
|
return metric_ids
|
|
|
|
|
|
def get_vm_by_name(content, vm_name):
|
|
"""Find VM by name."""
|
|
container = content.viewManager.CreateContainerView(
|
|
content.rootFolder, [vim.VirtualMachine], True
|
|
)
|
|
|
|
target_vm = None
|
|
for vm in container.view:
|
|
if vm.name.lower() == vm_name.lower():
|
|
target_vm = vm
|
|
break
|
|
|
|
container.Destroy()
|
|
return target_vm
|
|
|
|
|
|
def get_historical_perf(si, entity, metric_ids, days=30):
|
|
"""Get historical performance data."""
|
|
content = si.RetrieveContent()
|
|
perf_manager = content.perfManager
|
|
|
|
# Use daily interval (86400 seconds) for month-long data
|
|
# Or hourly (3600) for more detail but more data
|
|
# Available intervals: 300 (5min), 1800 (30min), 7200 (2hr), 86400 (daily)
|
|
|
|
if days <= 1:
|
|
interval_id = 300 # 5-minute samples for last day
|
|
elif days <= 7:
|
|
interval_id = 1800 # 30-minute samples for last week
|
|
else:
|
|
interval_id = 7200 # 2-hour samples for longer periods
|
|
|
|
end_time = datetime.now()
|
|
start_time = end_time - timedelta(days=days)
|
|
|
|
# Build metric ID objects
|
|
metric_id_objs = []
|
|
for name, counter_id in metric_ids.items():
|
|
if counter_id:
|
|
metric_id_objs.append(vim.PerformanceManager.MetricId(
|
|
counterId=counter_id,
|
|
instance=""
|
|
))
|
|
|
|
if not metric_id_objs:
|
|
print("No valid metrics found")
|
|
return []
|
|
|
|
query_spec = vim.PerformanceManager.QuerySpec(
|
|
entity=entity,
|
|
metricId=metric_id_objs,
|
|
intervalId=interval_id,
|
|
startTime=start_time,
|
|
endTime=end_time,
|
|
)
|
|
|
|
try:
|
|
results = perf_manager.QueryPerf(querySpec=[query_spec])
|
|
except Exception as e:
|
|
print(f"Error querying performance: {e}")
|
|
return []
|
|
|
|
# Parse results into time series
|
|
data = []
|
|
if results:
|
|
for result in results:
|
|
# Get timestamps
|
|
timestamps = result.sampleInfo
|
|
|
|
# Create data structure
|
|
for i, sample_info in enumerate(timestamps):
|
|
sample = {
|
|
'timestamp': sample_info.timestamp,
|
|
'interval': sample_info.interval,
|
|
}
|
|
|
|
for val in result.value:
|
|
counter_id = val.id.counterId
|
|
if i < len(val.value):
|
|
value = val.value[i]
|
|
|
|
for name, cid in metric_ids.items():
|
|
if cid == counter_id:
|
|
sample[name] = value
|
|
break
|
|
|
|
data.append(sample)
|
|
|
|
return data
|
|
|
|
|
|
def analyze_vm_history(si, vm_name, days=30):
|
|
"""Analyze historical performance for a VM."""
|
|
content = si.RetrieveContent()
|
|
perf_manager = content.perfManager
|
|
|
|
vm = get_vm_by_name(content, vm_name)
|
|
if not vm:
|
|
print(f"VM '{vm_name}' not found")
|
|
return
|
|
|
|
print(f"\nAnalyzing historical performance for: {vm_name}")
|
|
print(f"Period: Last {days} days")
|
|
print("-" * 60)
|
|
|
|
metrics = [
|
|
'cpu.usage.average',
|
|
'cpu.ready.summation',
|
|
'mem.usage.average',
|
|
'disk.read.average',
|
|
'disk.write.average',
|
|
'disk.totalReadLatency.average',
|
|
'disk.totalWriteLatency.average',
|
|
'disk.maxTotalLatency.latest',
|
|
'net.received.average',
|
|
'net.transmitted.average',
|
|
]
|
|
|
|
metric_ids = get_counter_ids(perf_manager, metrics)
|
|
data = get_historical_perf(si, vm, metric_ids, days)
|
|
|
|
if not data:
|
|
print("No historical data available")
|
|
return
|
|
|
|
print(f"Retrieved {len(data)} samples")
|
|
|
|
# Calculate statistics
|
|
stats = {}
|
|
for metric in metrics:
|
|
values = [d.get(metric, 0) for d in data if metric in d]
|
|
if values:
|
|
stats[metric] = {
|
|
'min': min(values),
|
|
'max': max(values),
|
|
'avg': sum(values) / len(values),
|
|
'samples': len(values),
|
|
}
|
|
|
|
# Display results
|
|
print("\n" + "=" * 60)
|
|
print("PERFORMANCE STATISTICS")
|
|
print("=" * 60)
|
|
|
|
if 'cpu.usage.average' in stats:
|
|
s = stats['cpu.usage.average']
|
|
print(f"\nCPU Usage:")
|
|
print(f" Average: {s['avg']/100:.1f}%")
|
|
print(f" Maximum: {s['max']/100:.1f}%")
|
|
if s['max']/100 > 80:
|
|
print(f" ⚠️ CPU reached {s['max']/100:.1f}% - potential bottleneck")
|
|
|
|
if 'mem.usage.average' in stats:
|
|
s = stats['mem.usage.average']
|
|
print(f"\nMemory Usage:")
|
|
print(f" Average: {s['avg']/100:.1f}%")
|
|
print(f" Maximum: {s['max']/100:.1f}%")
|
|
|
|
if 'disk.read.average' in stats and 'disk.write.average' in stats:
|
|
r = stats['disk.read.average']
|
|
w = stats['disk.write.average']
|
|
print(f"\nDisk I/O (KB/s):")
|
|
print(f" Read - Avg: {r['avg']:.0f}, Max: {r['max']:.0f} ({r['max']/1024:.1f} MB/s)")
|
|
print(f" Write - Avg: {w['avg']:.0f}, Max: {w['max']:.0f} ({w['max']/1024:.1f} MB/s)")
|
|
|
|
if 'disk.totalReadLatency.average' in stats and 'disk.totalWriteLatency.average' in stats:
|
|
rl = stats['disk.totalReadLatency.average']
|
|
wl = stats['disk.totalWriteLatency.average']
|
|
print(f"\nDisk Latency (ms):")
|
|
print(f" Read - Avg: {rl['avg']:.1f}, Max: {rl['max']:.0f}")
|
|
print(f" Write - Avg: {wl['avg']:.1f}, Max: {wl['max']:.0f}")
|
|
if rl['max'] > 20 or wl['max'] > 20:
|
|
print(f" ⚠️ High disk latency detected - storage may be bottleneck")
|
|
|
|
if 'disk.maxTotalLatency.latest' in stats:
|
|
s = stats['disk.maxTotalLatency.latest']
|
|
print(f"\nPeak Disk Latency:")
|
|
print(f" Average Peak: {s['avg']:.1f} ms")
|
|
print(f" Maximum Peak: {s['max']:.0f} ms")
|
|
if s['max'] > 50:
|
|
print(f" ⚠️ SEVERE: Peak latency reached {s['max']} ms!")
|
|
|
|
if 'net.received.average' in stats and 'net.transmitted.average' in stats:
|
|
rx = stats['net.received.average']
|
|
tx = stats['net.transmitted.average']
|
|
print(f"\nNetwork I/O (KB/s):")
|
|
print(f" RX - Avg: {rx['avg']:.0f}, Max: {rx['max']:.0f} ({rx['max']/1024:.1f} MB/s)")
|
|
print(f" TX - Avg: {tx['avg']:.0f}, Max: {tx['max']:.0f} ({tx['max']/1024:.1f} MB/s)")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("BOTTLENECK ANALYSIS")
|
|
print("=" * 60)
|
|
|
|
issues = []
|
|
|
|
if 'cpu.usage.average' in stats and stats['cpu.usage.average']['max']/100 > 80:
|
|
issues.append(f"CPU spiked to {stats['cpu.usage.average']['max']/100:.0f}%")
|
|
|
|
if 'disk.maxTotalLatency.latest' in stats:
|
|
max_lat = stats['disk.maxTotalLatency.latest']['max']
|
|
avg_lat = stats['disk.maxTotalLatency.latest']['avg']
|
|
if max_lat > 50:
|
|
issues.append(f"Disk latency peaked at {max_lat:.0f}ms (severe)")
|
|
elif max_lat > 20:
|
|
issues.append(f"Disk latency peaked at {max_lat:.0f}ms (moderate)")
|
|
|
|
if issues:
|
|
print("\nPotential issues detected:")
|
|
for issue in issues:
|
|
print(f" ⚠️ {issue}")
|
|
else:
|
|
print("\n✓ No major VMware-side bottlenecks detected in historical data")
|
|
print(" If backups are still slow, the issue is likely:")
|
|
print(" - DATTO agent/MercuryFTP performance")
|
|
print(" - DATTO appliance storage/CPU")
|
|
print(" - Network between guest and DATTO (not VMware layer)")
|
|
|
|
return data, stats
|
|
|
|
|
|
def export_to_csv(data, filename, vm_name):
|
|
"""Export historical data to CSV."""
|
|
if not data:
|
|
return
|
|
|
|
with open(filename, 'w', newline='') as f:
|
|
writer = csv.writer(f)
|
|
|
|
# Get all keys
|
|
keys = set()
|
|
for d in data:
|
|
keys.update(d.keys())
|
|
keys = sorted(keys)
|
|
|
|
writer.writerow(['vm_name'] + keys)
|
|
|
|
for d in data:
|
|
row = [vm_name] + [d.get(k, '') for k in keys]
|
|
writer.writerow(row)
|
|
|
|
print(f"\nData exported to: {filename}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Historical VM performance analysis')
|
|
parser.add_argument('--config', '-c', help='Config file path')
|
|
parser.add_argument('--server', '-s', help='vCenter server')
|
|
parser.add_argument('--username', '-u', help='Username')
|
|
parser.add_argument('--password', '-p', help='Password')
|
|
parser.add_argument('--vm', '-v', required=True, help='VM name to analyze')
|
|
parser.add_argument('--days', '-d', type=int, default=30, help='Number of days to analyze (default: 30)')
|
|
parser.add_argument('--export', '-e', help='Export data to CSV file')
|
|
|
|
args = parser.parse_args()
|
|
|
|
server = args.server
|
|
username = args.username
|
|
password = args.password
|
|
|
|
if args.config:
|
|
config = configparser.ConfigParser()
|
|
config.read(args.config)
|
|
if 'vcenter' in config:
|
|
server = server or config.get('vcenter', 'server', fallback=None)
|
|
username = username or config.get('vcenter', 'username', fallback=None)
|
|
password = password or config.get('vcenter', 'password', fallback=None)
|
|
|
|
if not all([server, username, password]):
|
|
print("Error: server, username, and password required")
|
|
sys.exit(1)
|
|
|
|
print(f"Connecting to {server}...")
|
|
si = connect_vcenter(server, username, password)
|
|
|
|
try:
|
|
data, stats = analyze_vm_history(si, args.vm, args.days)
|
|
|
|
if args.export and data:
|
|
export_to_csv(data, args.export, args.vm)
|
|
|
|
finally:
|
|
Disconnect(si)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|