Initial commit
This commit is contained in:
346
perf_history.py
Normal file
346
perf_history.py
Normal file
@@ -0,0 +1,346 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Historical VM Performance Report
|
||||
Pull performance stats from vCenter for the past month to identify patterns.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import configparser
|
||||
import csv
|
||||
import ssl
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
from pyVim.connect import SmartConnect, Disconnect
|
||||
from pyVmomi import vim
|
||||
except ImportError:
|
||||
print("Error: pyvmomi is required. Install with: pip install pyvmomi")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def connect_vcenter(server, username, password, port=443):
|
||||
"""Connect to vCenter."""
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context.check_hostname = False
|
||||
context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
try:
|
||||
si = SmartConnect(host=server, user=username, pwd=password, port=port, sslContext=context)
|
||||
return si
|
||||
except Exception as e:
|
||||
print(f"Error connecting: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_historical_intervals(perf_manager):
|
||||
"""Get available historical intervals."""
|
||||
intervals = {}
|
||||
for interval in perf_manager.historicalInterval:
|
||||
intervals[interval.samplingPeriod] = {
|
||||
'name': interval.name,
|
||||
'length': interval.length,
|
||||
'level': interval.level,
|
||||
}
|
||||
return intervals
|
||||
|
||||
|
||||
def get_counter_ids(perf_manager, metrics_needed):
|
||||
"""Get performance counter IDs for specified metrics."""
|
||||
metric_ids = {m: None for m in metrics_needed}
|
||||
|
||||
for counter in perf_manager.perfCounter:
|
||||
full_name = f"{counter.groupInfo.key}.{counter.nameInfo.key}.{counter.rollupType}"
|
||||
if full_name in metric_ids:
|
||||
metric_ids[full_name] = counter.key
|
||||
|
||||
return metric_ids
|
||||
|
||||
|
||||
def get_vm_by_name(content, vm_name):
|
||||
"""Find VM by name."""
|
||||
container = content.viewManager.CreateContainerView(
|
||||
content.rootFolder, [vim.VirtualMachine], True
|
||||
)
|
||||
|
||||
target_vm = None
|
||||
for vm in container.view:
|
||||
if vm.name.lower() == vm_name.lower():
|
||||
target_vm = vm
|
||||
break
|
||||
|
||||
container.Destroy()
|
||||
return target_vm
|
||||
|
||||
|
||||
def get_historical_perf(si, entity, metric_ids, days=30):
|
||||
"""Get historical performance data."""
|
||||
content = si.RetrieveContent()
|
||||
perf_manager = content.perfManager
|
||||
|
||||
# Use daily interval (86400 seconds) for month-long data
|
||||
# Or hourly (3600) for more detail but more data
|
||||
# Available intervals: 300 (5min), 1800 (30min), 7200 (2hr), 86400 (daily)
|
||||
|
||||
if days <= 1:
|
||||
interval_id = 300 # 5-minute samples for last day
|
||||
elif days <= 7:
|
||||
interval_id = 1800 # 30-minute samples for last week
|
||||
else:
|
||||
interval_id = 7200 # 2-hour samples for longer periods
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=days)
|
||||
|
||||
# Build metric ID objects
|
||||
metric_id_objs = []
|
||||
for name, counter_id in metric_ids.items():
|
||||
if counter_id:
|
||||
metric_id_objs.append(vim.PerformanceManager.MetricId(
|
||||
counterId=counter_id,
|
||||
instance=""
|
||||
))
|
||||
|
||||
if not metric_id_objs:
|
||||
print("No valid metrics found")
|
||||
return []
|
||||
|
||||
query_spec = vim.PerformanceManager.QuerySpec(
|
||||
entity=entity,
|
||||
metricId=metric_id_objs,
|
||||
intervalId=interval_id,
|
||||
startTime=start_time,
|
||||
endTime=end_time,
|
||||
)
|
||||
|
||||
try:
|
||||
results = perf_manager.QueryPerf(querySpec=[query_spec])
|
||||
except Exception as e:
|
||||
print(f"Error querying performance: {e}")
|
||||
return []
|
||||
|
||||
# Parse results into time series
|
||||
data = []
|
||||
if results:
|
||||
for result in results:
|
||||
# Get timestamps
|
||||
timestamps = result.sampleInfo
|
||||
|
||||
# Create data structure
|
||||
for i, sample_info in enumerate(timestamps):
|
||||
sample = {
|
||||
'timestamp': sample_info.timestamp,
|
||||
'interval': sample_info.interval,
|
||||
}
|
||||
|
||||
for val in result.value:
|
||||
counter_id = val.id.counterId
|
||||
if i < len(val.value):
|
||||
value = val.value[i]
|
||||
|
||||
for name, cid in metric_ids.items():
|
||||
if cid == counter_id:
|
||||
sample[name] = value
|
||||
break
|
||||
|
||||
data.append(sample)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def analyze_vm_history(si, vm_name, days=30):
|
||||
"""Analyze historical performance for a VM."""
|
||||
content = si.RetrieveContent()
|
||||
perf_manager = content.perfManager
|
||||
|
||||
vm = get_vm_by_name(content, vm_name)
|
||||
if not vm:
|
||||
print(f"VM '{vm_name}' not found")
|
||||
return
|
||||
|
||||
print(f"\nAnalyzing historical performance for: {vm_name}")
|
||||
print(f"Period: Last {days} days")
|
||||
print("-" * 60)
|
||||
|
||||
metrics = [
|
||||
'cpu.usage.average',
|
||||
'cpu.ready.summation',
|
||||
'mem.usage.average',
|
||||
'disk.read.average',
|
||||
'disk.write.average',
|
||||
'disk.totalReadLatency.average',
|
||||
'disk.totalWriteLatency.average',
|
||||
'disk.maxTotalLatency.latest',
|
||||
'net.received.average',
|
||||
'net.transmitted.average',
|
||||
]
|
||||
|
||||
metric_ids = get_counter_ids(perf_manager, metrics)
|
||||
data = get_historical_perf(si, vm, metric_ids, days)
|
||||
|
||||
if not data:
|
||||
print("No historical data available")
|
||||
return
|
||||
|
||||
print(f"Retrieved {len(data)} samples")
|
||||
|
||||
# Calculate statistics
|
||||
stats = {}
|
||||
for metric in metrics:
|
||||
values = [d.get(metric, 0) for d in data if metric in d]
|
||||
if values:
|
||||
stats[metric] = {
|
||||
'min': min(values),
|
||||
'max': max(values),
|
||||
'avg': sum(values) / len(values),
|
||||
'samples': len(values),
|
||||
}
|
||||
|
||||
# Display results
|
||||
print("\n" + "=" * 60)
|
||||
print("PERFORMANCE STATISTICS")
|
||||
print("=" * 60)
|
||||
|
||||
if 'cpu.usage.average' in stats:
|
||||
s = stats['cpu.usage.average']
|
||||
print(f"\nCPU Usage:")
|
||||
print(f" Average: {s['avg']/100:.1f}%")
|
||||
print(f" Maximum: {s['max']/100:.1f}%")
|
||||
if s['max']/100 > 80:
|
||||
print(f" ⚠️ CPU reached {s['max']/100:.1f}% - potential bottleneck")
|
||||
|
||||
if 'mem.usage.average' in stats:
|
||||
s = stats['mem.usage.average']
|
||||
print(f"\nMemory Usage:")
|
||||
print(f" Average: {s['avg']/100:.1f}%")
|
||||
print(f" Maximum: {s['max']/100:.1f}%")
|
||||
|
||||
if 'disk.read.average' in stats and 'disk.write.average' in stats:
|
||||
r = stats['disk.read.average']
|
||||
w = stats['disk.write.average']
|
||||
print(f"\nDisk I/O (KB/s):")
|
||||
print(f" Read - Avg: {r['avg']:.0f}, Max: {r['max']:.0f} ({r['max']/1024:.1f} MB/s)")
|
||||
print(f" Write - Avg: {w['avg']:.0f}, Max: {w['max']:.0f} ({w['max']/1024:.1f} MB/s)")
|
||||
|
||||
if 'disk.totalReadLatency.average' in stats and 'disk.totalWriteLatency.average' in stats:
|
||||
rl = stats['disk.totalReadLatency.average']
|
||||
wl = stats['disk.totalWriteLatency.average']
|
||||
print(f"\nDisk Latency (ms):")
|
||||
print(f" Read - Avg: {rl['avg']:.1f}, Max: {rl['max']:.0f}")
|
||||
print(f" Write - Avg: {wl['avg']:.1f}, Max: {wl['max']:.0f}")
|
||||
if rl['max'] > 20 or wl['max'] > 20:
|
||||
print(f" ⚠️ High disk latency detected - storage may be bottleneck")
|
||||
|
||||
if 'disk.maxTotalLatency.latest' in stats:
|
||||
s = stats['disk.maxTotalLatency.latest']
|
||||
print(f"\nPeak Disk Latency:")
|
||||
print(f" Average Peak: {s['avg']:.1f} ms")
|
||||
print(f" Maximum Peak: {s['max']:.0f} ms")
|
||||
if s['max'] > 50:
|
||||
print(f" ⚠️ SEVERE: Peak latency reached {s['max']} ms!")
|
||||
|
||||
if 'net.received.average' in stats and 'net.transmitted.average' in stats:
|
||||
rx = stats['net.received.average']
|
||||
tx = stats['net.transmitted.average']
|
||||
print(f"\nNetwork I/O (KB/s):")
|
||||
print(f" RX - Avg: {rx['avg']:.0f}, Max: {rx['max']:.0f} ({rx['max']/1024:.1f} MB/s)")
|
||||
print(f" TX - Avg: {tx['avg']:.0f}, Max: {tx['max']:.0f} ({tx['max']/1024:.1f} MB/s)")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("BOTTLENECK ANALYSIS")
|
||||
print("=" * 60)
|
||||
|
||||
issues = []
|
||||
|
||||
if 'cpu.usage.average' in stats and stats['cpu.usage.average']['max']/100 > 80:
|
||||
issues.append(f"CPU spiked to {stats['cpu.usage.average']['max']/100:.0f}%")
|
||||
|
||||
if 'disk.maxTotalLatency.latest' in stats:
|
||||
max_lat = stats['disk.maxTotalLatency.latest']['max']
|
||||
avg_lat = stats['disk.maxTotalLatency.latest']['avg']
|
||||
if max_lat > 50:
|
||||
issues.append(f"Disk latency peaked at {max_lat:.0f}ms (severe)")
|
||||
elif max_lat > 20:
|
||||
issues.append(f"Disk latency peaked at {max_lat:.0f}ms (moderate)")
|
||||
|
||||
if issues:
|
||||
print("\nPotential issues detected:")
|
||||
for issue in issues:
|
||||
print(f" ⚠️ {issue}")
|
||||
else:
|
||||
print("\n✓ No major VMware-side bottlenecks detected in historical data")
|
||||
print(" If backups are still slow, the issue is likely:")
|
||||
print(" - DATTO agent/MercuryFTP performance")
|
||||
print(" - DATTO appliance storage/CPU")
|
||||
print(" - Network between guest and DATTO (not VMware layer)")
|
||||
|
||||
return data, stats
|
||||
|
||||
|
||||
def export_to_csv(data, filename, vm_name):
|
||||
"""Export historical data to CSV."""
|
||||
if not data:
|
||||
return
|
||||
|
||||
with open(filename, 'w', newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
|
||||
# Get all keys
|
||||
keys = set()
|
||||
for d in data:
|
||||
keys.update(d.keys())
|
||||
keys = sorted(keys)
|
||||
|
||||
writer.writerow(['vm_name'] + keys)
|
||||
|
||||
for d in data:
|
||||
row = [vm_name] + [d.get(k, '') for k in keys]
|
||||
writer.writerow(row)
|
||||
|
||||
print(f"\nData exported to: {filename}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Historical VM performance analysis')
|
||||
parser.add_argument('--config', '-c', help='Config file path')
|
||||
parser.add_argument('--server', '-s', help='vCenter server')
|
||||
parser.add_argument('--username', '-u', help='Username')
|
||||
parser.add_argument('--password', '-p', help='Password')
|
||||
parser.add_argument('--vm', '-v', required=True, help='VM name to analyze')
|
||||
parser.add_argument('--days', '-d', type=int, default=30, help='Number of days to analyze (default: 30)')
|
||||
parser.add_argument('--export', '-e', help='Export data to CSV file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
server = args.server
|
||||
username = args.username
|
||||
password = args.password
|
||||
|
||||
if args.config:
|
||||
config = configparser.ConfigParser()
|
||||
config.read(args.config)
|
||||
if 'vcenter' in config:
|
||||
server = server or config.get('vcenter', 'server', fallback=None)
|
||||
username = username or config.get('vcenter', 'username', fallback=None)
|
||||
password = password or config.get('vcenter', 'password', fallback=None)
|
||||
|
||||
if not all([server, username, password]):
|
||||
print("Error: server, username, and password required")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Connecting to {server}...")
|
||||
si = connect_vcenter(server, username, password)
|
||||
|
||||
try:
|
||||
data, stats = analyze_vm_history(si, args.vm, args.days)
|
||||
|
||||
if args.export and data:
|
||||
export_to_csv(data, args.export, args.vm)
|
||||
|
||||
finally:
|
||||
Disconnect(si)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user