Skip to content
Snippets Groups Projects
monitorAndCloning-OS.py 7.01 KiB
Newer Older
Massimo Canonico's avatar
Massimo Canonico committed
#   Author: Andrea Santomauro
#   Email: 20013777@studenti.uniupo.it
#   Università degli Studi del Piemonte Orientale
#   The aim of this library is to provide a Resouce Monitor for the VM in CHI envirorment.
#   The main function is 'monitor()' in 'ResourcesMonitor' class, which works as follow:
#   - It takes as input a metric to monitor (e.g. load@load, the metric about CPU usage), the threshold, the number of measures, a value k and 
#     other params about the instance to monitor (the id) and the new instance to spawn, if needed
#   - When it's called, it will look at the last 'measures_number'; if k of the measuremente is higher than the threshold specified, then
#     it'll be spawn a new VM (with the params specified as input) and it will assign a floating IP (if there's a fip avaible, otherwise a new fip will be required
#     and then associated)
#   - e.g. monitor('load@load', XXX, 0.5, 5, 3) if the load is bigger then 0.5 at least 3 times in the last 5 measures, then start a new VM.
#
#   The class GnocchiInterface is a wrapper of gnocchi library, which contains only the needed functionality
#   
#   The class TaccInterface is a wrapper of chi library, provided by ChameleonCloud. It adds some necessary busy waiting and exception handling in order
#   to avoid runtime errors and crashes.

import chi
from chi import lease
from chi import server
from time import sleep

import json

class GnocchiInterface():
    def __init__(self):
        self.session = chi.session()
        self.gnocchi_client = chi.gnocchi(self.session)

    #   Returns a list of available metrics for the instance identified by 'instance_id'
    def get_metrics_for_instance(self, instance_id, resource_type='generic'):
        return self.gnocchi_client.resource.get(resource_type=resource_type,resource_id=instance_id)['metrics']
        
    
    def get_metric_value(self, metric, instance_id):
        return self.gnocchi_client.metric.get(metric, resource_id=instance_id)
    
    #   Returns a list of the measurement of a specific metric, instance identified by 'instance_id', at a certain granularity
    def get_metric_measures(self, metric, instance_id, granularity=None):
        return self.gnocchi_client.metric.get_measures(metric, resource_id=instance_id, granularity = granularity)


class TACCInterface():

    #   In order to start an instance in CHI envirorment it's necessary to:
    #   - Request a lease
    #   - Wait for the lease activation
    #   - Create an instance associated to the lease
    #   - Wait for the instance activation
    #   - Associate a floating IP in order to get the VM reacheable from the outside.
    #
    #   All this work is done by methods in this class, the names are self-explicative
    

    def __init__(self, image_name, node_count, server_name,lease_name, key_name):
        chi.use_site('CHI@TACC')
        self.reservations = []
        self.image_name = image_name
        self.node_count = node_count
        self.server_name = server_name
        self.lease_name = lease_name
        self.key_name = key_name

    def create_lease(self):
        chi.lease.add_node_reservation(self.reservations, count=self.node_count, node_type=node_type)
        start_date, end_date = chi.lease.lease_duration(hours=1)
        lease = chi.lease.create_lease(self.lease_name, self.reservations, start_date=start_date,end_date=end_date)
        return lease

    def get_lease_id(self, lease):
        return lease['reservations'][0]['lease_id']

    def wait_lease_activation(self, lease_id):
        print('[WAITING FOR LEASE ACTIVATION]')
        activated = False
        while not activated:
            try:
                chi.lease.wait_for_active(lease_id)
                activated = True
            except:
                sleep(30)
        print('[LEASE ' + self.lease_name + ' IS NOW ACTIVE]')


    def create_instance(self, lease_id):
        reservation_id = chi.lease.get_node_reservation(lease_id)
        server = chi.server.create_server(self.server_name, reservation_id=reservation_id,
                        image_name=self.image_name, count=self.node_count, key_name=self.key_name)
        self.server = server
    
    def wait_server_activation(self):
        print('[WAITING FOR INSTANCE ACTIVATION]')
        
        activated = False
        while not activated:
            try:
                chi.server.wait_for_active(self.server.id)
                activated = True
            except:
                sleep(30)
        print('[INSTANCE ' + self.server_name + ' IS NOW ACTIVE]')

    def associate_fip(self,server_id):
        chi.server.associate_floating_ip(server_id)
        print('[FLOATING IP ASSOCIATED]')

class ResourcesMonitor():

    def __init__(self, tacc, gnocchi_interface):
        self.tacc = tacc
        self.gnocchi_interface = gnocchi_interface

    #   Monitor a specific resource for the last 'measures_number'. If k times the value is upper then threshold, start a new VM.
    #   E.g. monitor('load@load', XXX, 0.5, 5, 3) if the load is bigger then 0.5 at least 3 times in the last 5 measures, then start a new VM.
    def monitor(self, metric, instance_id, threshold, measures_number, k, lease_name, node_type, image_name, server_name, node_count=1, granularity=60):
        l = self.gnocchi_interface.get_metric_measures(metric, instance_id=instance_id, granularity=granularity)
        last_measurement = l[-measures_number:]
        count = 0
        for m in last_measurement:
            if m[2] > threshold:
                count += 1
        if count >= k:
            return self.start_new_VM(lease_name, node_type, node_count, image_name, server_name)
        return False


    #   Using the TACCInterface class, it will start a new VM in order to balance the resource
    def start_new_VM(self, lease_name, node_type, image_name, server_name, node_count=1):
        try:
            lease = self.tacc.create_lease()
            lease_id = self.tacc.get_lease_id(lease)
            self.tacc.wait_lease_activation(lease_id)
            self.tacc.create_instance(lease_id)
            self.tacc.wait_server_activation()
            self.tacc.associate_fip(self.tacc.server.id)
        except:
            raise Exception('Error during creation of VM')
        return True

#   Here a brief example of the usage

lease_name = "my-lease-2"
node_type = "compute_haswell"
node_count = 1
image_name = "CC-CentOS7"
server_name = 'instance-2'

#   NOTE: before using this script it's mandatory to execute the command "source CH-XXX" in order to authenticate yourself.
#   NOTE: it's mandatory to substitute YOUR_KEY_PAIR_NAME with the name of your own keypair name and INSTANCE_ID with the id of the instance to monitor
#   NOTE: you can use the metric you want from the metrics available for the instance identified by instance id.
tacc = TACCInterface(image_name,node_count,server_name,lease_name, 'YOUR_KEY_PAIR_NAME')
gnocchi_interface = GnocchiInterface()
resource_monitor = ResourcesMonitor(tacc, gnocchi_interface)
resource_monitor.monitor('load@load','INSTANCE_ID', 0.4, 5, 2, lease_name, node_type, image_name, server_name, node_count, 60)