# Author: Andrea Santomauro # Email: 20013777@studenti.uniupo.it # Università degli Studi del Piemonte Orientale # The aim of this library is to provide a Resouce Monitor for the VM in CHI envirorment. # The main function is 'monitor()' in 'ResourcesMonitor' class, which works as follow: # - It takes as input a metric to monitor (e.g. load@load, the metric about CPU usage), the threshold, the number of measures, a value k and # other params about the instance to monitor (the id) and the new instance to spawn, if needed # - When it's called, it will look at the last 'measures_number'; if k of the measuremente is higher than the threshold specified, then # it'll be spawn a new VM (with the params specified as input) and it will assign a floating IP (if there's a fip avaible, otherwise a new fip will be required # and then associated) # - e.g. monitor('load@load', XXX, 0.5, 5, 3) if the load is bigger then 0.5 at least 3 times in the last 5 measures, then start a new VM. # # The class GnocchiInterface is a wrapper of gnocchi library, which contains only the needed functionality # # The class TaccInterface is a wrapper of chi library, provided by ChameleonCloud. It adds some necessary busy waiting and exception handling in order # to avoid runtime errors and crashes. import chi from chi import lease from chi import server from time import sleep import json class GnocchiInterface(): def __init__(self): self.session = chi.session() self.gnocchi_client = chi.gnocchi(self.session) # Returns a list of available metrics for the instance identified by 'instance_id' def get_metrics_for_instance(self, instance_id, resource_type='generic'): return self.gnocchi_client.resource.get(resource_type=resource_type,resource_id=instance_id)['metrics'] def get_metric_value(self, metric, instance_id): return self.gnocchi_client.metric.get(metric, resource_id=instance_id) # Returns a list of the measurement of a specific metric, instance identified by 'instance_id', at a certain granularity def get_metric_measures(self, metric, instance_id, granularity=None): return self.gnocchi_client.metric.get_measures(metric, resource_id=instance_id, granularity = granularity) class TACCInterface(): # In order to start an instance in CHI envirorment it's necessary to: # - Request a lease # - Wait for the lease activation # - Create an instance associated to the lease # - Wait for the instance activation # - Associate a floating IP in order to get the VM reacheable from the outside. # # All this work is done by methods in this class, the names are self-explicative def __init__(self, image_name, node_count, server_name,lease_name, key_name): chi.use_site('CHI@TACC') self.reservations = [] self.image_name = image_name self.node_count = node_count self.server_name = server_name self.lease_name = lease_name self.key_name = key_name def create_lease(self): chi.lease.add_node_reservation(self.reservations, count=self.node_count, node_type=node_type) start_date, end_date = chi.lease.lease_duration(hours=1) lease = chi.lease.create_lease(self.lease_name, self.reservations, start_date=start_date,end_date=end_date) return lease def get_lease_id(self, lease): return lease['reservations'][0]['lease_id'] def wait_lease_activation(self, lease_id): print('[WAITING FOR LEASE ACTIVATION]') activated = False while not activated: try: chi.lease.wait_for_active(lease_id) activated = True except: sleep(30) print('[LEASE ' + self.lease_name + ' IS NOW ACTIVE]') def create_instance(self, lease_id): reservation_id = chi.lease.get_node_reservation(lease_id) server = chi.server.create_server(self.server_name, reservation_id=reservation_id, image_name=self.image_name, count=self.node_count, key_name=self.key_name) self.server = server def wait_server_activation(self): print('[WAITING FOR INSTANCE ACTIVATION]') activated = False while not activated: try: chi.server.wait_for_active(self.server.id) activated = True except: sleep(30) print('[INSTANCE ' + self.server_name + ' IS NOW ACTIVE]') def associate_fip(self,server_id): chi.server.associate_floating_ip(server_id) print('[FLOATING IP ASSOCIATED]') class ResourcesMonitor(): def __init__(self, tacc, gnocchi_interface): self.tacc = tacc self.gnocchi_interface = gnocchi_interface # Monitor a specific resource for the last 'measures_number'. If k times the value is upper then threshold, start a new VM. # E.g. monitor('load@load', XXX, 0.5, 5, 3) if the load is bigger then 0.5 at least 3 times in the last 5 measures, then start a new VM. def monitor(self, metric, instance_id, threshold, measures_number, k, lease_name, node_type, image_name, server_name, node_count=1, granularity=60): l = self.gnocchi_interface.get_metric_measures(metric, instance_id=instance_id, granularity=granularity) last_measurement = l[-measures_number:] count = 0 for m in last_measurement: if m[2] > threshold: count += 1 if count >= k: return self.start_new_VM(lease_name, node_type, node_count, image_name, server_name) return False # Using the TACCInterface class, it will start a new VM in order to balance the resource def start_new_VM(self, lease_name, node_type, image_name, server_name, node_count=1): try: lease = self.tacc.create_lease() lease_id = self.tacc.get_lease_id(lease) self.tacc.wait_lease_activation(lease_id) self.tacc.create_instance(lease_id) self.tacc.wait_server_activation() self.tacc.associate_fip(self.tacc.server.id) except: raise Exception('Error during creation of VM') return True # Here a brief example of the usage lease_name = "my-lease-2" node_type = "compute_haswell" node_count = 1 image_name = "CC-CentOS7" server_name = 'instance-2' # NOTE: before using this script it's mandatory to execute the command "source CH-XXX" in order to authenticate yourself. # NOTE: it's mandatory to substitute YOUR_KEY_PAIR_NAME with the name of your own keypair name and INSTANCE_ID with the id of the instance to monitor # NOTE: you can use the metric you want from the metrics available for the instance identified by instance id. tacc = TACCInterface(image_name,node_count,server_name,lease_name, 'YOUR_KEY_PAIR_NAME') gnocchi_interface = GnocchiInterface() resource_monitor = ResourcesMonitor(tacc, gnocchi_interface) resource_monitor.monitor('load@load','INSTANCE_ID', 0.4, 5, 2, lease_name, node_type, image_name, server_name, node_count, 60)