1
2
3
4
5
6
7
8
9
10
11
12
13 import os
14 import re
15 import sys
16 from pickle import dump, load
17 from subprocess import Popen, PIPE
18 import logging
19 log = logging.getLogger('zen.checkRRD')
20
21 import Globals
22 import transaction
23 from Products.ZenUtils.Utils import zenPath
24 from Products.ZenUtils.ZenScriptBase import ZenScriptBase
25
26 CACHE_FILE = zenPath('var', 'zencheckrrd.cache')
27 rrdMatch = re.compile('DEF:[^=]+=([^:]+)').match
28
31 self.id = id
32 self.hostname = hostname
33 self.expectedComponents = 0
34 self.stale = 0
35 self.missing = 0
36 self.orphan = 0
37 self.expectedFiles = set()
38 self.staleFiles = set()
39 self.allFiles = set()
40
44
45
47 ZenScriptBase.buildOptions(self)
48 self.parser.add_option('--age', dest='age',
49 type='int', default=1,
50 help="Number of days old to consider fresh (default=1)")
51 self.parser.add_option('--all', dest='all',
52 action="store_true", default=False,
53 help="Check all data points. Not just ones used in graphs")
54 self.parser.add_option('--pathcache', dest='pathcache',
55 action="store_true", default=False,
56 help="Cache the full list of RRD file paths in the model")
57 self.parser.add_option('--devicesonly', dest='devicesonly',
58 action="store_true", default=False,
59 help="Only check for device files. Not components")
60 self.parser.add_option('--collector', dest='collector',
61 help="Name of specific collector to check (optional)")
62 self.parser.add_option('--file', dest='file',
63 help="Output filename")
64
65
67 if not self.options.file:
68 log.critical("You must specify the output file.")
69 return
70
71 try:
72 outfile = open(self.options.file, 'w')
73 except IOError, ex:
74 log.critical("Unable to open %s for writing: %s",
75 self.options.file, ex)
76 return
77
78 if self.options.all:
79 log.info("Starting check for missing, stale or orphaned RRD files")
80 log.info("Results based on all RRD files defined by data points")
81 else:
82 log.info("Starting check for missing or stale RRD files")
83 log.info("Results based on all RRD files used in graphs")
84
85 if self.options.collector:
86 try:
87 collectors = [self.dmd.Monitors.Performance._getOb(
88 self.options.collector)]
89 except AttributeError:
90 log.critical("No collector named %s could be found. Exiting",
91 self.options.collector)
92 return
93 else:
94 collectors = self.dmd.Monitors.Performance.objectValues(
95 spec="PerformanceConf")
96
97 collectors = [collectorStats(x.id, getattr(x, 'hostname', x.id)) \
98 for x in collectors]
99 self._getExpectedFiles(collectors)
100
101 for collector in collectors:
102 if len(collector.expectedFiles) == 0:
103 collector.expected = 0
104 log.debug("No expected files found for collector %s",
105 collector.id)
106 continue
107
108 self._getCollectorFiles(collector)
109 if self.options.all:
110 for path in sorted(collector.allFiles - collector.expectedFiles):
111 outfile.write("orphaned:%s:%s\n" % (collector.id, path))
112 collector.orphan += 1
113
114 for path in sorted(collector.expectedFiles - collector.staleFiles):
115 if path in collector.allFiles:
116 outfile.write("stale:%s:%s\n" % (collector.id, path))
117 collector.stale += 1
118 else:
119 outfile.write("missing:%s:%s\n" % (collector.id, path))
120 collector.missing += 1
121
122 outfile.close()
123 self.report(collectors)
124
125
126 - def report(self, collectors):
127 totalExpectedRRDs = sum(len(x.expectedFiles) for x in collectors)
128 totalAllRRDs = sum(len(x.allFiles) for x in collectors)
129 totalMissingRRDs = sum(x.missing for x in collectors)
130 totalStaleRRDs = sum(x.stale for x in collectors)
131
132
133 header = """
134 On-disk Expected Missing Stale
135 Collector RRDs RRDs RRDs RRDs"""
136 delimLen = 65
137 if self.options.all:
138 header += " Orphans"
139 delimLen = 75
140 print header
141 print '-' * delimLen
142
143 collectorNames = dict(zip(map(lambda x: x.id, collectors), collectors))
144 for name in sorted(collectorNames.keys()):
145 collector = collectorNames[name]
146 expected = len(collector.expectedFiles)
147 all = len(collector.allFiles)
148 line = "%-30s %6s %6s %6s %6s" % (
149 name, all, expected, collector.missing, collector.stale)
150 if self.options.all:
151 line += " %6s" % collector.orphan
152 print line
153
154 print '-' * delimLen
155 trailer = "%-30s %6s %6s %6s %6s" % (
156 'Total', totalAllRRDs, totalExpectedRRDs,
157 totalMissingRRDs, totalStaleRRDs)
158 if self.options.all:
159 trailer += " %6s" % sum(x.orphan for x in collectors)
160 print trailer
161
162
164 rrdFiles = set()
165 componentRrdFiles = set()
166
167 if self.options.pathcache and os.path.isfile(CACHE_FILE):
168 log.debug("Reading list of expected RRD files from cache...")
169 f = open('.rrdcheck.state', 'r')
170 rrdFiles = load(f)
171 f.close()
172 else:
173 log.debug("Building list of expected device RRD files..")
174 for device in self.dmd.Devices.getSubDevicesGen():
175 if not device.monitorDevice(): continue
176 rrdFiles.update(self._getRRDPaths(device))
177 device._p_deactivate()
178
179 if not self.options.devicesonly:
180 log.debug("Building list of expected component RRD files..")
181 for component in self._getAllMonitoredComponents():
182 componentRrdFiles.update(self._getRRDPaths(component))
183 component._p_deactivate()
184
185
186 f = open('.rrdcheck.state', 'w')
187 dump(rrdFiles, f)
188 f.close()
189
190 collectorNames = dict(zip(map(lambda x: x.id, collectors), collectors))
191 for collectorName, path in rrdFiles:
192 collector = collectorNames.get(collectorName, None)
193 if collector:
194 collector.expectedFiles.add(path)
195
196 for collectorName, path in componentRrdFiles:
197 collector = collectorNames.get(collectorName, None)
198 if collector:
199 collector.expectedComponents += 1
200 collector.expectedFiles.add(path)
201
202
223
224
230
231
233 def parseOutput(output):
234 files = set()
235 for line in ( l.strip() for l in output.split('\n') if l ):
236 files.add(line)
237 return files
238
239 log.debug("Checking collector %s (%s) for RRD files",
240 collector.id, collector.hostname)
241
242 allCmd = "find %s -name *.rrd" % zenPath('perf', 'Devices')
243 staleCmd = "%s -mtime -%s" % (allCmd, self.options.age)
244
245 if collector.hostname == 'localhost':
246 allOutput = Popen([allCmd],
247 shell=True, stdout=PIPE).communicate()[0]
248 staleOutput = Popen([staleCmd],
249 shell=True, stdout=PIPE).communicate()[0]
250 else:
251
252 p1 = Popen(["echo", "0"], stdout=PIPE)
253 p2 = Popen(["nc", "-w", "4", collector.hostname, "22"],
254 stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
255
256 if os.waitpid(p2.pid, 0)[1] != 0:
257 log.warn("Unable to SSH to collector %s (%s)",
258 collector.id, collector.hostname)
259 return
260
261 allOutput = Popen(["ssh", collector.hostname, allCmd],
262 stdout=PIPE).communicate()[0]
263 staleOutput = Popen(["ssh", collector.hostname, staleCmd],
264 stdout=PIPE).communicate()[0]
265
266 collector.allFiles = parseOutput(allOutput)
267 collector.staleFiles = parseOutput(staleOutput)
268
269
270 if __name__ == '__main__':
271 zrc = ZenCheckRRD()
272 zrc.run()
273