Package Products :: Package ZenUtils :: Module ZenCheckRRD
[hide private]
[frames] | no frames]

Source Code for Module Products.ZenUtils.ZenCheckRRD

  1  #!/usr/bin/env python 
  2  ############################################################################# 
  3  # This program is part of Zenoss Core, an open source monitoring platform. 
  4  # Copyright (C) 2010, Zenoss Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify it 
  7  # under the terms of the GNU General Public License version 2 or (at your 
  8  # option) any later version as published by the Free Software Foundation. 
  9  # 
 10  # For complete information please visit: http://www.zenoss.com/oss/ 
 11  ############################################################################# 
 12   
 13  import os 
 14  import re 
 15  import sys 
 16  from pickle import dump, load 
 17  from subprocess import Popen, PIPE 
 18  import logging 
 19  log = logging.getLogger('zen.checkRRD') 
 20   
 21  import Globals 
 22  import transaction 
 23  from Products.ZenUtils.Utils import zenPath 
 24  from Products.ZenUtils.ZenScriptBase import ZenScriptBase 
 25   
 26  CACHE_FILE = zenPath('var', 'zencheckrrd.cache') 
 27  rrdMatch = re.compile('DEF:[^=]+=([^:]+)').match 
 28   
29 -class collectorStats:
30 - def __init__(self, id, hostname):
31 self.id = id 32 self.hostname = hostname 33 self.expectedComponents = 0 34 self.stale = 0 35 self.missing = 0 36 self.orphan = 0 37 self.expectedFiles = set() 38 self.staleFiles = set() 39 self.allFiles = set()
40
41 -class ZenCheckRRD(ZenScriptBase):
42 - def __init__(self):
43 ZenScriptBase.__init__(self, connect=True)
44 45
46 - def buildOptions(self):
47 ZenScriptBase.buildOptions(self) 48 self.parser.add_option('--age', dest='age', 49 type='int', default=1, 50 help="Number of days old to consider fresh (default=1)") 51 self.parser.add_option('--all', dest='all', 52 action="store_true", default=False, 53 help="Check all data points. Not just ones used in graphs") 54 self.parser.add_option('--pathcache', dest='pathcache', 55 action="store_true", default=False, 56 help="Cache the full list of RRD file paths in the model") 57 self.parser.add_option('--devicesonly', dest='devicesonly', 58 action="store_true", default=False, 59 help="Only check for device files. Not components") 60 self.parser.add_option('--collector', dest='collector', 61 help="Name of specific collector to check (optional)") 62 self.parser.add_option('--file', dest='file', 63 help="Output filename")
64 65
66 - def run(self):
67 if not self.options.file: 68 log.critical("You must specify the output file.") 69 return 70 71 try: 72 outfile = open(self.options.file, 'w') 73 except IOError, ex: 74 log.critical("Unable to open %s for writing: %s", 75 self.options.file, ex) 76 return 77 78 if self.options.all: 79 log.info("Starting check for missing, stale or orphaned RRD files") 80 log.info("Results based on all RRD files defined by data points") 81 else: 82 log.info("Starting check for missing or stale RRD files") 83 log.info("Results based on all RRD files used in graphs") 84 85 if self.options.collector: 86 try: 87 collectors = [self.dmd.Monitors.Performance._getOb( 88 self.options.collector)] 89 except AttributeError: 90 log.critical("No collector named %s could be found. Exiting", 91 self.options.collector) 92 return 93 else: 94 collectors = self.dmd.Monitors.Performance.objectValues( 95 spec="PerformanceConf") 96 97 collectors = [collectorStats(x.id, getattr(x, 'hostname', x.id)) \ 98 for x in collectors] 99 self._getExpectedFiles(collectors) 100 101 for collector in collectors: 102 if len(collector.expectedFiles) == 0: 103 collector.expected = 0 104 log.debug("No expected files found for collector %s", 105 collector.id) 106 continue 107 108 self._getCollectorFiles(collector) 109 if self.options.all: 110 for path in sorted(collector.allFiles - collector.expectedFiles): 111 outfile.write("orphaned:%s:%s\n" % (collector.id, path)) 112 collector.orphan += 1 113 114 for path in sorted(collector.expectedFiles - collector.staleFiles): 115 if path in collector.allFiles: 116 outfile.write("stale:%s:%s\n" % (collector.id, path)) 117 collector.stale += 1 118 else: 119 outfile.write("missing:%s:%s\n" % (collector.id, path)) 120 collector.missing += 1 121 122 outfile.close() 123 self.report(collectors)
124 125
126 - def report(self, collectors):
127 totalExpectedRRDs = sum(len(x.expectedFiles) for x in collectors) 128 totalAllRRDs = sum(len(x.allFiles) for x in collectors) 129 totalMissingRRDs = sum(x.missing for x in collectors) 130 totalStaleRRDs = sum(x.stale for x in collectors) 131 #totalComponentRRDs = sum(x.expectedComponents for x in collectors) 132 #totalDeviceRRDs = totalExpectedRRDs - totalComponentRRDs 133 header = """ 134 On-disk Expected Missing Stale 135 Collector RRDs RRDs RRDs RRDs""" 136 delimLen = 65 137 if self.options.all: 138 header += " Orphans" 139 delimLen = 75 140 print header 141 print '-' * delimLen 142 143 collectorNames = dict(zip(map(lambda x: x.id, collectors), collectors)) 144 for name in sorted(collectorNames.keys()): 145 collector = collectorNames[name] 146 expected = len(collector.expectedFiles) 147 all = len(collector.allFiles) 148 line = "%-30s %6s %6s %6s %6s" % ( 149 name, all, expected, collector.missing, collector.stale) 150 if self.options.all: 151 line += " %6s" % collector.orphan 152 print line 153 154 print '-' * delimLen 155 trailer = "%-30s %6s %6s %6s %6s" % ( 156 'Total', totalAllRRDs, totalExpectedRRDs, 157 totalMissingRRDs, totalStaleRRDs) 158 if self.options.all: 159 trailer += " %6s" % sum(x.orphan for x in collectors) 160 print trailer
161 162
163 - def _getExpectedFiles(self, collectors):
164 rrdFiles = set() 165 componentRrdFiles = set() 166 167 if self.options.pathcache and os.path.isfile(CACHE_FILE): 168 log.debug("Reading list of expected RRD files from cache...") 169 f = open('.rrdcheck.state', 'r') 170 rrdFiles = load(f) 171 f.close() 172 else: 173 log.debug("Building list of expected device RRD files..") 174 for device in self.dmd.Devices.getSubDevicesGen(): 175 if not device.monitorDevice(): continue 176 rrdFiles.update(self._getRRDPaths(device)) 177 device._p_deactivate() 178 179 if not self.options.devicesonly: 180 log.debug("Building list of expected component RRD files..") 181 for component in self._getAllMonitoredComponents(): 182 componentRrdFiles.update(self._getRRDPaths(component)) 183 component._p_deactivate() 184 185 # Dump the cache in case we want to use it next time. 186 f = open('.rrdcheck.state', 'w') 187 dump(rrdFiles, f) 188 f.close() 189 190 collectorNames = dict(zip(map(lambda x: x.id, collectors), collectors)) 191 for collectorName, path in rrdFiles: 192 collector = collectorNames.get(collectorName, None) 193 if collector: 194 collector.expectedFiles.add(path) 195 196 for collectorName, path in componentRrdFiles: 197 collector = collectorNames.get(collectorName, None) 198 if collector: 199 collector.expectedComponents += 1 200 collector.expectedFiles.add(path)
201 202
203 - def _getRRDPaths(self, ob):
204 ob_rrds = set() 205 path = ob.fullRRDPath() 206 perfServer = ob.getPerformanceServer() 207 if not perfServer: return [] 208 if self.options.all: 209 for t in ob.getRRDTemplates(): 210 for ds in t.datasources(): 211 for dp in ds.datapoints(): 212 ob_rrds.add((perfServer.id, 213 os.path.join(path, "%s_%s.rrd" % (ds.id, dp.id)))) 214 else: 215 for t in ob.getRRDTemplates(): 216 for g in t.graphDefs(): 217 for cmd in g.getGraphCmds(ob, path): 218 match = rrdMatch(cmd) 219 if match: 220 ob_rrds.add((perfServer.id, match.group(1))) 221 transaction.abort() 222 return ob_rrds
223 224
226 brains = self.dmd.Devices.componentSearch({'monitored': True}) 227 for component in ( b.getObject() for b in brains ): 228 if not component.snmpIgnore(): 229 yield component
230 231
232 - def _getCollectorFiles(self, collector):
233 def parseOutput(output): 234 files = set() 235 for line in ( l.strip() for l in output.split('\n') if l ): 236 files.add(line) 237 return files
238 239 log.debug("Checking collector %s (%s) for RRD files", 240 collector.id, collector.hostname) 241 242 allCmd = "find %s -name *.rrd" % zenPath('perf', 'Devices') 243 staleCmd = "%s -mtime -%s" % (allCmd, self.options.age) 244 245 if collector.hostname == 'localhost': 246 allOutput = Popen([allCmd], 247 shell=True, stdout=PIPE).communicate()[0] 248 staleOutput = Popen([staleCmd], 249 shell=True, stdout=PIPE).communicate()[0] 250 else: 251 # Quick check to see if we can SSH to the collector. 252 p1 = Popen(["echo", "0"], stdout=PIPE) 253 p2 = Popen(["nc", "-w", "4", collector.hostname, "22"], 254 stdin=p1.stdout, stdout=PIPE, stderr=PIPE) 255 256 if os.waitpid(p2.pid, 0)[1] != 0: 257 log.warn("Unable to SSH to collector %s (%s)", 258 collector.id, collector.hostname) 259 return 260 261 allOutput = Popen(["ssh", collector.hostname, allCmd], 262 stdout=PIPE).communicate()[0] 263 staleOutput = Popen(["ssh", collector.hostname, staleCmd], 264 stdout=PIPE).communicate()[0] 265 266 collector.allFiles = parseOutput(allOutput) 267 collector.staleFiles = parseOutput(staleOutput)
268 269 270 if __name__ == '__main__': 271 zrc = ZenCheckRRD() 272 zrc.run() 273