diff --git a/msg/Metric.msg b/msg/Metric.msg index 08f38888fbc42add8b521e2a666e4c7f21fa00a6..b48d41da4e59744b4078d67fe86bcca27e572af9 100644 --- a/msg/Metric.msg +++ b/msg/Metric.msg @@ -1,5 +1,7 @@ -string component # A metric is affiliated with a roboter component. -string label # Label of the metric. -string value # Value of the metric. -string unit # Unit of the metric. -float32 critical # Critical level of the metric. \ No newline at end of file +string domain # The domain the metric + # is affiliated with. +string label # Label of the metric. +string value # Value of the metric. +string unit # Unit of the metric. +float32 error # Error level of the + # metric. \ No newline at end of file diff --git a/msg/State.msg b/msg/State.msg deleted file mode 100644 index 0d2c235a8af317a531ffa9be244984e49e194967..0000000000000000000000000000000000000000 --- a/msg/State.msg +++ /dev/null @@ -1,3 +0,0 @@ -string component # Name of the component, whose - # critical level is monitored. -float32 critical # Critical level of the family. \ No newline at end of file diff --git a/msg/States.msg b/msg/States.msg deleted file mode 100644 index d6705831806ee8d3908995353186638fe8aa46c5..0000000000000000000000000000000000000000 --- a/msg/States.msg +++ /dev/null @@ -1 +0,0 @@ -State[] states # Array of states. diff --git a/nodes/monitor_distance b/nodes/monitor_distance deleted file mode 100755 index 7eee550064c5e35b31349021740a0eac2d17a4b3..0000000000000000000000000000000000000000 --- a/nodes/monitor_distance +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env pyhton - -import rospy -from monitoring.monitor import Monitor -from sensor_msgs.msg import LaserScan - - -def scan_callback(msg): - global range_ahead - range_ahead = msg.ranges[int(len(msg.ranges) / 2)] - - -if __name__ == '__main__': - try: - range_ahead = 10 - scan_sub = rospy.Subscriber("scan", LaserScan, scan_callback) - rospy.init_node("monitor_distance") - monitor = Monitor(1, 1) - - rate = rospy.Rate(10) - while not rospy.is_shutdown(): - print(range_ahead) - if range_ahead > 0.8: - critical = 0 - else: - if range_ahead > 0.35: - critical = 0.5 - else: - critical = 1.0 - - monitor.update_metric("scan", "distance", range_ahead, "unit", critical) - rate.sleep() - except rospy.ROSInterruptException: - pass diff --git a/nodes/monitor_distance.py b/nodes/monitor_distance.py new file mode 100755 index 0000000000000000000000000000000000000000..63e2da420ad58fc38b49b1e70f315b383b231a34 --- /dev/null +++ b/nodes/monitor_distance.py @@ -0,0 +1,35 @@ +#!/usr/bin/env pyhton + +import rospy +from sensor_msgs.msg import LaserScan +from monitoring.monitor import Monitor + + +def calculate_distance(msg): + global distance + distance = msg.ranges[int(len(msg.ranges) / 2)] + + +if __name__ == '__main__': + try: + sub = rospy.Subscriber("scan", LaserScan, calculate_distance) + rospy.init_node("monitor_distance") + + monitor = Monitor() + + rate = rospy.Rate(10) + while not rospy.is_shutdown(): + + if distance > 0.8: + error = 0 + else: + if distance > 0.35: + error = 0.5 + else: + error = 1.0 + + monitor.update_metric("kinect", "distance", distance, "meter", error) + rate.sleep() + + except rospy.ROSInterruptException: + pass diff --git a/src/monitoring/monitor.py b/src/monitoring/monitor.py index 307b26a2f0fbe5f5ab5d0c47651f5412ddfc51f7..5d4f21688ef88746af8fbacc85d25ad963281325 100755 --- a/src/monitoring/monitor.py +++ b/src/monitoring/monitor.py @@ -2,21 +2,36 @@ import rospy import socket -from monitoring.msg import Monitoring + from monitoring.msg import Metric +from monitoring.msg import Monitoring class Monitor: - def __init__(self, mode=1, freq=1.0): + """ + Each metric should have its own monitor. + A monitor has four modes: + + - Mode 1: Publish the last received value. + - Mode 2: Publish the lowest received value. + - Mode 3: Publish the highest received value. + - Mode 4: Publish the average received value + over five or more seconds. + + Furthermore, a monitor publishes messages with a + certain frequency. + """ + def __init__(self, mon_mode=1, mon_freq=1.0): self.log = {} # Logging dictionary. Is used to accomplish different publishing modes. - self.pub = rospy.Publisher('monitoring', Monitoring, queue_size=1) - self.mode = mode + self.pub = rospy.Publisher('/monitoring', Monitoring, queue_size=1) + self.mon_mode = mon_mode - if not freq > 0.0: - rospy.logwarn("Frequency must be greater then 0! Using 1 as frequency!") - freq = 1.0 + if not mon_freq > 0.0: + rospy.logwarn("The frequency at which a monitor publishes messages must be greater then 0! " + "Using 1 as frequency!") + mon_freq = 1.0 - self.timer = rospy.Timer(rospy.Duration(1.0 / freq), self.publish_monitoring) + self.timer = rospy.Timer(rospy.Duration(1.0 / mon_freq), self.publish_monitoring) self.monitoring = Monitoring() self.monitoring.origin = socket.gethostname() + rospy.get_name() @@ -25,14 +40,14 @@ class Monitor: def publish_monitoring(self, event): self.pub.publish(self.monitoring) - def update_metric(self, component, label, value, unit, critical): + def update_metric(self, domain, label, value, unit, error): def set_metric(): metric = Metric() - metric.component = str(component) + metric.domain = str(domain) metric.label = str(label) metric.value = str(value) metric.unit = str(unit) - metric.critical = critical + metric.error = error self.monitoring.metric = metric @@ -41,17 +56,17 @@ class Monitor: if " " in label: # A label cannot contain whitespace! rospy.logwarn("Whitespaces are not allowed in metric labels!") - elif critical > 1 or critical < 0: - rospy.logwarn("Critical level must be between 1 and 0!") + elif error > 1 or error < 0: + rospy.logwarn("Error level must be between 1 and 0!") else: if label in self.log: # Mode 1: Set the last obtained value. - if self.mode == 1: + if self.mon_mode == 1: set_metric() # Mode 2: Remember and set the minimum value, # obtained after mode selection. - elif self.mode == 2: + elif self.mon_mode == 2: if value < self.log[label]['val']: self.log[label]['val'] = value else: @@ -61,7 +76,7 @@ class Monitor: # Mode 3: Remember and set the maximum value, # obtained after mode selection. - elif self.mode == 3: + elif self.mon_mode == 3: if value > self.log[label]['val']: self.log[label]['val'] = value else: @@ -71,7 +86,7 @@ class Monitor: # Mode 4: Set the average obtained value over five # or more seconds. - elif self.mode == 4: + elif self.mon_mode == 4: duration = rospy.get_rostime() - self.log[label]['dur'] if duration < rospy.Duration(5): self.log[label]['num'] += 1 diff --git a/src/monitoring/watchdog.py b/src/monitoring/watchdog.py index 2d8080fd39c9e3d142c853e56aae188853dcb44b..7ac2a86a9c85ca9ba9738d792c325d37a608b878 100755 --- a/src/monitoring/watchdog.py +++ b/src/monitoring/watchdog.py @@ -1,74 +1,142 @@ #!/usr/bin/env python import rospy + from monitoring.msg import Monitoring -from monitoring.msg import State -from monitoring.msg import States class Watchdog: - def __init__(self, mode=1, freq=1.0): - self.mode = mode - self.pub = rospy.Publisher('states', States, queue_size=1) - self.component_states = {} - self.observing = {} + def __init__(self): + self.sub = rospy.Subscriber("/monitoring", Monitoring, self.update_log) + + # Stores the most current values of each metric. + self.log = {} + + # Strategies that specify how the critical level of each metric should be aggregated in a domain. + # It is possible to specify a different aggregation strategy for every single domain. + self.aggregation_strategy_metrics = rospy.get_param("/aggregation_strategy_metrics") + # Dictionary that stores the aggregated critical levels of the metrics of a domain. + self.aggregated_metrics = {} + + # Strategy that specifies how the critical levels of all domains should be aggregated. + self.aggregation_strategy_domains = rospy.get_param("/aggregation_strategy_domains") + # Variable that stores the aggregated critical level of all domains. + self.aggregated_domains = None - if not freq > 0.0: - rospy.logwarn("Frequency must be greater then 0! Using 1 as frequency!") - freq = 1.0 + # Frequency at which the metrics in the log are aggregated. + wdg_freq = rospy.get_param("/wdg_freq", 1.0) - self.timer = rospy.Timer(rospy.Duration(1.0 / freq), self.update_states) + if not wdg_freq > 0.0: + rospy.logwarn( + "Watchdog: The frequency at which the metrics of the robot are aggregated must be greater then 0! " + "Using 1 as frequency!") + wdg_freq = 1.0 - rospy.Subscriber("monitoring", Monitoring, self.logging) + self.timer = rospy.Timer(rospy.Duration(1.0 / wdg_freq), self.update_condition) # TODO: Think about efficiency! - def logging(self, monitoring): + def update_log(self, monitoring): origin = monitoring.origin metric = monitoring.metric - component = metric.component + domain = metric.domain label = metric.label value = metric.value unit = metric.unit - critical = metric.critical + error = metric.error - if component in self.observing: - if origin in self.observing[component]: - self.observing[component][origin].update({label: {"value": value, "unit": unit, "critical": critical}}) + if domain in self.log: + if origin in self.log[domain]: + self.log[domain][origin].update({label: {"value": value, "unit": unit, "error": error}}) else: - self.observing[component][origin] = {} - self.observing[component][origin][label] = {"value": value, "unit": unit, "critical": critical} + self.log[domain][origin] = {} + self.log[domain][origin][label] = {"value": value, "unit": unit, "error": error} else: - self.observing[component] = {} - self.observing[component][origin] = {} - self.observing[component][origin][label] = {"value": value, "unit": unit, "critical": critical} + self.log[domain] = {} + self.log[domain][origin] = {} + self.log[domain][origin][label] = {"value": value, "unit": unit, "error": error} + + def update_condition(self, event): + self.aggregate_metrics() + self.aggregate_domains() # TODO: Add more modes! - def update_states(self, event): - for component in self.observing: - # Mode 1: Take the average critical level for a component - if self.mode == 1: - aggregation = 0 - number = 0 - - for origin in self.observing[component]: - for label in self.observing[component][origin]: - number = number + 1 - aggregation = aggregation + self.observing[component][origin][label]["critical"] - - quotient = aggregation / number - self.component_states.update({component: quotient}) - - self.publish_states() - - # TODO: Update names! - def publish_states(self): - topic = States() - for component in self.component_states: - state = State() - state.component = component - state.critical = self.component_states[component] - - topic.states.append(state) - - self.pub.publish(topic) + def aggregate_metrics(self): + for domain in self.log: + if domain in self.aggregation_strategy_metrics: + + # Strategy 1: Take the highest error level of any metric in a domain. + if self.aggregation_strategy_metrics[domain] == 1: + highest_error = 0.0 + + for origin in self.log[domain]: + for label in self.log[domain][origin]: + local_error = self.log[domain][origin][label]["error"] + if local_error > highest_error: + highest_error = local_error + + self.aggregated_metrics.update({domain: highest_error}) + + # Strategy 2: Take the lowest error level of any metric in a domain. + if self.aggregation_strategy_metrics[domain] == 2: + lowest_error = 1.0 + + for origin in self.log[domain]: + for label in self.log[domain][origin]: + local_error = self.log[domain][origin][label]["error"] + if local_error < lowest_error: + lowest_error = local_error + + self.aggregated_metrics.update({domain: lowest_error}) + + else: + rospy.logwarn("Watchdog: The specified method for aggregating metrics is not recognised!") + + # Default strategy: Take the average error level of every metric in a domain. + else: + count = 0.001 + aggregated_error = 0.0 + + for origin in self.log[domain]: + for label in self.log[domain][origin]: + count = count + 1 + aggregated_error = aggregated_error + self.log[domain][origin][label]["error"] + + quotient = aggregated_error / count + self.aggregated_metrics.update({domain: quotient}) + + # TODO: Add more modes! + def aggregate_domains(self): + # Strategy 1: Take the highest error level of any domain. + if self.aggregation_strategy_domains == 1: + highest_error = 0.0 + + for domain in self.aggregated_metrics: + local_error = self.aggregated_metrics[domain] + if local_error > highest_error: + highest_error = local_error + + self.aggregated_domains = highest_error + + # Strategy 2: Take the lowes error level of any domain. + elif self.aggregation_strategy_domains == 2: + lowest_error = 1.0 + + for domain in self.aggregated_metrics: + local_error = self.aggregated_metrics[domain] + if local_error < lowest_error: + lowest_error = local_error + + self.aggregated_domains = lowest_error + + # Default strategy: Take the average error level of every domain. + else: + count = 0.001 + aggregated_error = 0.0 + + for domain in self.aggregated_metrics: + count = count + 1 + aggregated_error = aggregated_error + self.aggregated_metrics[domain] + + quotient = aggregated_error / count + self.aggregated_domains = quotient