Package Products :: Package ZenEvents :: Module Availability
[hide private]
[frames] | no frames]

Source Code for Module Products.ZenEvents.Availability

  1  ########################################################################### 
  2  # 
  3  # This program is part of Zenoss Core, an open source monitoring platform. 
  4  # Copyright (C) 2007, Zenoss Inc. 
  5  # 
  6  # This program is free software; you can redistribute it and/or modify it 
  7  # under the terms of the GNU General Public License version 2 or (at your 
  8  # option) any later version as published by the Free Software Foundation. 
  9  # 
 10  # For complete information please visit: http://www.zenoss.com/oss/ 
 11  # 
 12  ########################################################################### 
 13   
 14  import time 
 15  from collections import defaultdict 
 16  from itertools import takewhile, chain 
 17   
 18  from Globals import InitializeClass 
 19  from Products.ZenModel.DeviceClass import DeviceClass 
 20  from Products.ZenModel.DeviceGroup import DeviceGroup 
 21  from Products.ZenModel.Location import Location 
 22  from Products.ZenModel.System import System 
 23  from Products.ZenUtils import Map 
 24  from Products.ZenEvents.ZenEventClasses import Status_Ping, Status_Snmp 
 25  from Products.ZenEvents.ZenEventClasses import Status_OSProcess 
 26  from Products.Zuul import getFacade 
 27  from Products.Zuul.interfaces import ICatalogTool 
 28  from Products.AdvancedQuery import Eq, Or, Not 
 29  from zenoss.protocols.protobufs.zep_pb2 import (SEVERITY_CRITICAL, SEVERITY_ERROR, 
 30                                                  SEVERITY_WARNING, SEVERITY_INFO, 
 31                                                  SEVERITY_DEBUG, SEVERITY_CLEAR) 
 32  from zenoss.protocols.protobufs.zep_pb2 import (STATUS_NEW, STATUS_ACKNOWLEDGED, 
 33                                                  STATUS_SUPPRESSED, STATUS_CLOSED, 
 34                                                  STATUS_CLEARED, STATUS_DROPPED, 
 35                                                  STATUS_AGED) 
 36   
 37  ALL_EVENT_STATUSES = set([STATUS_NEW, STATUS_ACKNOWLEDGED, 
 38                          STATUS_SUPPRESSED, STATUS_CLOSED, 
 39                          STATUS_CLEARED, STATUS_DROPPED, 
 40                          STATUS_AGED]) 
 41  CLOSED_EVENT_STATUSES = set([STATUS_CLOSED, STATUS_CLEARED, 
 42                               STATUS_DROPPED, STATUS_AGED]) 
 43  OPEN_EVENT_STATUSES = ALL_EVENT_STATUSES - CLOSED_EVENT_STATUSES 
44 45 -def _severityGreaterThanOrEqual(sev):
46 """function to return a list of severities >= the given severity; 47 defines severity priority using arbitrary order, instead of 48 assuming numeric ordering""" 49 severities_in_order = (SEVERITY_CRITICAL, 50 SEVERITY_ERROR, 51 SEVERITY_WARNING, 52 SEVERITY_INFO, 53 SEVERITY_DEBUG, 54 SEVERITY_CLEAR) 55 return list(takewhile(lambda x : x != sev, severities_in_order)) + [sev,]
56
57 -def _lookupUuid(catalog, cls, identifier):
58 """function to retrieve uuid given an object's catalog, type, and identifier""" 59 results = ICatalogTool(catalog).search(cls, 60 query=Or(Eq('id', identifier), 61 Eq('name', identifier))) 62 if results.total: 63 return results.results.next().uuid
64 65 from AccessControl import ClassSecurityInfo 66 67 CACHE_TIME = 60. 68 69 _cache = Map.Locked(Map.Timed({}, CACHE_TIME))
70 71 -def _round(value):
72 if value is None: return None 73 return (value // CACHE_TIME) * CACHE_TIME
74
75 -def _findComponent(device, name):
76 for c in device.getMonitoredComponents(): 77 if c.name() == name: 78 return c 79 return None
80
81 -class Availability(object):
82 security = ClassSecurityInfo() 83 security.setDefaultAccess('allow') 84 85 @staticmethod
87 return time.time() - dmd.ZenEventManager.defaultAvailabilityDays*24*60*60
88 89 @staticmethod
91 return time.time()
92 93 # Simple record for holding availability information
94 - def __init__(self, device, component, downtime, total, systems=''):
95 self.device = device 96 self.systems = systems 97 self.component = component 98 99 # Guard against endDate being equal to or less than startDate. 100 if total <= 0: 101 self.availability = 0 if downtime else 1 102 else: 103 self.availability = max(0, 1 - (float(downtime) / total))
104
105 - def floatStr(self):
106 return '%2.3f%%' % (self.availability * 100)
107
108 - def __str__(self):
109 return self.floatStr()
110
111 - def __repr__(self):
112 return '[%s %s %s]' % (self.device, self.component, self.floatStr())
113
114 - def __float__(self):
115 return float(self.availability)
116
117 - def __int__(self):
118 return int(self.availability * 100)
119
120 - def __cmp__(self, other):
121 return cmp((self.availability, self.device, self.component()), 122 (other.availability, other.device, other.component()))
123
124 - def getDevice(self, dmd):
125 return dmd.Devices.findDevice(self.device)
126
127 - def getComponent(self, dmd):
128 if self.device and self.component: 129 device = self.getDevice(dmd) 130 if device: 131 return _findComponent(device, self.component) 132 return None
133
139 140 InitializeClass(Availability)
141 142 -class Report(object):
143 "Determine availability by counting the amount of time down" 144
145 - def __init__(self, 146 startDate = None, 147 endDate = None, 148 eventClass=Status_Ping, 149 severity=5, 150 device=None, 151 component='', 152 prodState=1000, 153 manager=None, 154 agent=None, 155 DeviceClass=None, 156 Location=None, 157 System=None, 158 DeviceGroup=None, 159 DevicePriority=None, 160 monitor=None):
161 self.startDate = _round(startDate) 162 self.endDate = _round(endDate) 163 self.eventClass = eventClass 164 self.severity = int(severity) if severity is not None else None 165 self.device = device 166 self.component = component 167 self.prodState = int(prodState) if prodState is not None else None 168 self.manager = manager 169 self.agent = agent 170 self.DeviceClass = DeviceClass 171 self.Location = Location 172 self.System = System 173 self.DeviceGroup = DeviceGroup 174 self.DevicePriority = int(DevicePriority) if DevicePriority is not None else None 175 self.monitor = monitor
176
177 - def tuple(self):
178 return ( 179 self.startDate, self.endDate, self.eventClass, self.severity, 180 self.device, self.component, self.prodState, self.manager, 181 self.agent, self.DeviceClass, self.Location, self.System, 182 self.DeviceGroup, self.DevicePriority, self.monitor)
183
184 - def __hash__(self):
185 return hash(self.tuple())
186
187 - def __cmp__(self, other):
188 return cmp(self.tuple(), other.tuple())
189 190
191 - def run(self, dmd):
192 """Run the report, returning an Availability object for each device""" 193 # Note: we don't handle overlapping "down" events, so down 194 # time could get get double-counted. 195 __pychecker__='no-local' 196 now = time.time() 197 zep = getFacade("zep", dmd) 198 endDate = self.endDate or Availability.getDefaultAvailabilityEnd() 199 endDate = min(endDate, now) 200 startDate = self.startDate 201 if not startDate: 202 startDate = Availability.getDefaultAvailabilityStart(dmd) 203 204 # convert start and end date to integer milliseconds for defining filters 205 startDate = int(startDate*1000) 206 endDate = int(endDate*1000) 207 total_report_window = endDate - startDate 208 now_ms = int(now * 1000) 209 210 create_filter_args = { 211 'operator' : zep.AND, 212 'severity' : _severityGreaterThanOrEqual(self.severity), 213 'event_class' : self.eventClass + 214 ('/' if not self.eventClass.endswith('/') else '') 215 } 216 if self.device: 217 create_filter_args['element_identifier'] = '"%s"' % self.device 218 if self.component: 219 create_filter_args['element_sub_identifier'] = '"%s"' % self.component 220 if self.agent: 221 create_filter_args['agent'] = self.agent 222 if self.monitor is not None: 223 create_filter_args['monitor'] = self.monitor 224 225 # add filters on details 226 filter_details = {} 227 if self.DevicePriority is not None: 228 filter_details['zenoss.device.priority'] = "%d:" % self.DevicePriority 229 if self.prodState: 230 filter_details['zenoss.device.production_state'] = "%d:" % self.prodState 231 if filter_details: 232 create_filter_args['details'] = filter_details 233 234 # add filters on tagged values 235 tag_uuids = [] 236 if self.DeviceClass: 237 tag_uuids.append(_lookupUuid(dmd.Devices, DeviceClass, self.DeviceClass)) 238 if self.Location: 239 tag_uuids.append(_lookupUuid(dmd.Locations, Location, self.Location)) 240 if self.System is not None: 241 tag_uuids.append(_lookupUuid(dmd.Systems, System, self.System)) 242 if self.DeviceGroup is not None: 243 tag_uuids.append(_lookupUuid(dmd.Groups, DeviceGroup, self.DeviceGroup)) 244 tag_uuids = filter(None, tag_uuids) 245 if tag_uuids: 246 create_filter_args['tags'] = tag_uuids 247 248 # query zep for matching event summaries 249 # 1. get all open events that: 250 # - first_seen < endDate 251 # (only need to check active events) 252 # 2. get all closed events that: 253 # - first_seen < endDate 254 # - status_change > startDate 255 # (must get both active and archived events) 256 257 # 1. get open events 258 create_filter_args['first_seen'] = (0,endDate) 259 create_filter_args['status'] = OPEN_EVENT_STATUSES 260 event_filter = zep.createEventFilter(**create_filter_args) 261 open_events = zep.getEventSummariesGenerator(event_filter) 262 263 # 2. get closed events 264 create_filter_args['status_change'] = (startDate+1,) 265 create_filter_args['status'] = CLOSED_EVENT_STATUSES 266 event_filter = zep.createEventFilter(**create_filter_args) 267 closed_events = zep.getEventSummariesGenerator(event_filter) 268 # must also get events from archive 269 closed_events_from_archive = zep.getEventSummariesGenerator(event_filter, archive=True) 270 271 # walk events, tallying up downtime 272 accumulator = defaultdict(int) 273 for evtsumm in chain(open_events, closed_events, closed_events_from_archive): 274 275 first = evtsumm['first_seen_time'] 276 # if event is still open, downtime persists til end of report window 277 if evtsumm['status'] not in CLOSED_EVENT_STATUSES: 278 last = endDate 279 else: 280 last = evtsumm['status_change_time'] 281 282 # discard any events that have no elapsed time 283 if first == last: 284 continue 285 286 # clip first and last within report time window 287 first = max(first, startDate) 288 last = min(last, endDate) 289 290 evt = evtsumm['occurrence'][0] 291 evt_actor = evt['actor'] 292 device = evt_actor.get('element_identifier') 293 component = evt_actor.get('element_sub_identifier') 294 295 # Only treat component specially if a component filter was specified. 296 if self.component: 297 accumKey = (device, component) 298 else: 299 accumKey = (device, '') 300 301 accumulator[accumKey] += (last-first) 302 303 if self.device: 304 deviceList = [] 305 device = dmd.Devices.findDevice(self.device) 306 if device: 307 deviceList = [device] 308 accumulator[(self.device, self.component)] += 0 309 else: 310 deviceList = [] 311 if (not self.DeviceClass and not self.Location and 312 not self.System and not self.DeviceGroup): 313 deviceList = dmd.Devices.getSubDevices() 314 else: 315 allDevices = dict((dev.id,dev) for dev in dmd.Devices.getSubDevices()) 316 allDeviceIds = set(allDevices.keys()) 317 318 def getOrgSubDevices(cat, orgId, allIds=allDeviceIds): 319 if orgId: 320 try: 321 org = cat.getOrganizer(orgId) 322 except KeyError: 323 pass 324 else: 325 return set(d.id for d in org.getSubDevices()) 326 return allIds
327 deviceClassDevices = getOrgSubDevices(dmd.Devices, self.DeviceClass) 328 locationDevices = getOrgSubDevices(dmd.Locations, self.Location) 329 systemDevices = getOrgSubDevices(dmd.Systems, self.System) 330 deviceGroupDevices = getOrgSubDevices(dmd.Groups, self.DeviceGroup) 331 332 # Intersect all of the organizers. 333 deviceList.extend(allDevices[deviceId] 334 for deviceId in (deviceClassDevices & locationDevices & 335 systemDevices & deviceGroupDevices)) 336 337 if not self.component: 338 for dev in dmd.Devices.getSubDevices(): 339 accumulator[(dev.id, '')] += 0 340 341 # walk accumulator, generate report results 342 deviceLookup = dict((dev.id, dev) for dev in deviceList) 343 result = [] 344 lastdevid = None 345 sysname = '' 346 for (devid, compid), downtime in sorted(accumulator.items()): 347 if devid != lastdevid: 348 dev = deviceLookup.get(devid, None) 349 if dev: 350 sysname = dev.getSystemNamesString() 351 else: 352 sysname = '' 353 lastdevid = devid 354 result.append(Availability(devid, compid, downtime, total_report_window, sysname)) 355 356 # add in the devices that have the component, but no events - assume this means no downtime 357 if self.component: 358 downtime = 0 359 for dev in deviceList: 360 sysname = dev.getSystemNamesString() 361 for comp in dev.getMonitoredComponents(): 362 if self.component in comp.name(): 363 result.append(Availability(dev.id, comp.name(), downtime, total_report_window, sysname)) 364 return result
365
366 367 -def query(dmd, *args, **kwargs):
368 r = Report(*args, **kwargs) 369 try: 370 return _cache[r.tuple()] 371 except KeyError: 372 result = r.run(dmd) 373 _cache[r.tuple()] = result 374 return result
375 376 377 if __name__ == '__main__': 378 import pprint 379 r = Report(time.time() - 60*60*24*30) 380 start = time.time() - 60*60*24*30 381 # r.component = 'snmp' 382 r.component = None 383 r.eventClass = Status_Snmp 384 r.severity = 3 385 from Products.ZenUtils.ZCmdBase import ZCmdBase 386 z = ZCmdBase() 387 pprint.pprint(r.run(z.dmd)) 388 a = query(z.dmd, start, device='gate.zenoss.loc', eventClass=Status_Ping) 389 assert 0 <= float(a[0]) <= 1. 390 b = query(z.dmd, start, device='gate.zenoss.loc', eventClass=Status_Ping) 391 assert a == b 392 assert id(a) == id(b) 393 pprint.pprint(r.run(z.dmd)) 394 r.component = 'httpd' 395 r.eventClass = Status_OSProcess 396 r.severity = 4 397 pprint.pprint(r.run(z.dmd)) 398 r.device = 'gate.zenoss.loc' 399 r.component = '' 400 r.eventClass = Status_Ping 401 r.severity = 4 402 pprint.pprint(r.run(z.dmd)) 403