From 6d61d4930b23a35d497cf6a29e0d295a498400e0 Mon Sep 17 00:00:00 2001 From: suiy Date: Thu, 18 Apr 2013 04:18:07 -0600 Subject: [PATCH 1/3] running large datasets --- automaton.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automaton.py b/automaton.py index e5e84b5..01fa9d9 100755 --- a/automaton.py +++ b/automaton.py @@ -66,7 +66,7 @@ def run(self): def clean_exit(signum, frame): global SIGEXIT SIGEXIT = True - LOG.critical("Exit signal received. Exiting at the next sane time. " + LOG.critical("Exit signal received. Exiting at the next sane time." "Please stand by.") From 538c89198ce509879189bcde7981a7c36b0a9ec2 Mon Sep 17 00:00:00 2001 From: suiy Date: Thu, 18 Apr 2013 04:29:27 -0600 Subject: [PATCH 2/3] add functions on running large datasets --- automaton.py | 4 +- etc/benchmarking.conf | 7 +- etc/clouds.conf | 2 +- etc/global.conf | 2 +- graphing/graphing.py | 2 + lib/util.py | 50 +++++-------- resources/cluster/clusters.py | 133 +++++++++++++++++++++------------- self.db | Bin 8192 -> 8192 bytes 8 files changed, 106 insertions(+), 94 deletions(-) diff --git a/automaton.py b/automaton.py index 01fa9d9..07cc72c 100755 --- a/automaton.py +++ b/automaton.py @@ -48,7 +48,7 @@ def run(self): cluster.launch() if self.config.options.terminate_cluster: cluster.connect() - if self.config.options.terminate_cluster == "all": + if self.config.options.terminate_cluster=="all": cluster.terminate_all() else: cluster.terminate(self.config.options.terminate_cluster) @@ -60,7 +60,7 @@ def run(self): cluster.deploy_software() if self.config.options.excute_benchmarks: cluster.connect() - cluster.excute_benchmarks() + cluster.excute_benchmarks(self.config.options.excute_benchmarks) def clean_exit(signum, frame): diff --git a/etc/benchmarking.conf b/etc/benchmarking.conf index 9570634..eac3a86 100755 --- a/etc/benchmarking.conf +++ b/etc/benchmarking.conf @@ -1,15 +1,10 @@ [Benchmark-01] sierra = 0 -hotel = 2 +hotel = 1 log_files = ~/BioPerf/Outputs/log url = http://www.bioperf.org/BioPerf.zip remote_location = ~/BioPerf [Benchmark-02] -sierra = 0 -hotel = 1 -log_files = ~/BioPerf/Outputs/log -url = http://www.bioperf.org/BioPerf.zip -remote_location = ~/BioPerf [Benchmark-03] \ No newline at end of file diff --git a/etc/clouds.conf b/etc/clouds.conf index 1c199ea..6391b1e 100755 --- a/etc/clouds.conf +++ b/etc/clouds.conf @@ -1,7 +1,7 @@ [hotel] cloud_uri = svc.uc.futuregrid.org cloud_port = 8444 -image_id = automaton.gz +image_id = debian-lenny.gz cloud_type = nimbus availability_zone = us-east-1 instance_type = m1.large diff --git a/etc/global.conf b/etc/global.conf index 3578ec7..80a7b9d 100755 --- a/etc/global.conf +++ b/etc/global.conf @@ -1,6 +1,6 @@ [DEFAULT] key_name = automaton -key_path = /Users/voran/.ssh/id_rsa.pub +key_path = /Users/voran/.ssh/id_rsa1.pub ssh_priv_key = /Users/suiyuan0226/.ssh/automaton.pem git_repo_home = /home/staged-deployment-scripts git_repo_location = https://github.com/alal3177/staged-deployment-scripts.git diff --git a/graphing/graphing.py b/graphing/graphing.py index 259bbad..29a0740 100644 --- a/graphing/graphing.py +++ b/graphing/graphing.py @@ -8,6 +8,8 @@ def __init__(self, config): self.config = config self.parser = Parser(self.config) self.graph_path = self.config.globals.graph_path + if not os.path.exists(self.graph_path): + os.makedirs(self.graph_path) self.attributes = list() self.attributes = self.parser.instance_types diff --git a/lib/util.py b/lib/util.py index 224359e..2235e84 100755 --- a/lib/util.py +++ b/lib/util.py @@ -102,55 +102,41 @@ def parse_options(): parser = OptionParser() + parser = OptionParser() + parser.add_option("-d", "--debug", action="store_true", dest="debug", help="Enable debugging log level.") parser.set_defaults(debug=False) - parser.add_option("-g", "--global_file", action="store", - dest="global_file", - help="Location of the file with global parameters " - "(default: etc/global.conf).") + parser.add_option("-g", "--global_file", action="store", dest="global_file", + help="Location of the file with global parameters (default: etc/global.conf).") parser.set_defaults(global_file="etc/global.conf") - parser.add_option("-c", "--clouds_file", action="store", - dest="clouds_file", - help="Location of the file with cloud parameters " - "(default: etc/clouds.conf).") + parser.add_option("-c", "--clouds_file", action="store", dest="clouds_file", + help="Location of the file with cloud parameters (default: etc/clouds.conf).") parser.set_defaults(clouds_file="etc/clouds.conf") - parser.add_option("-b", "--benchmarking_file", action="store", - dest="benchmarking_file", - help="Location of the file with benchmarking " - "parameters (default: etc/benchmarking.conf).") + parser.add_option("-b", "--benchmarking_file", action="store", dest="benchmarking_file", + help="Location of the file with benchmarking parameters (default: etc/benchmarking.conf).") parser.set_defaults(benchmarking_file="etc/benchmarking.conf") - parser.add_option("-l", "--launch_cluster", action="store_true", - dest="launch_cluster", - help="Launch desired number of clusters") + parser.add_option("-l", "--launch_cluster", action="store_true",dest="launch_cluster",help="Launch clusters") - parser.add_option("-t", "--terminate_cluster", action="store", - dest="terminate_cluster", - help="Terminate specific instance, argument: " - "all/instance_id", - default=False) + parser.add_option("-t", "--terminate_cluster", action="store",dest="terminate_cluster",help="Terminate specific instance, arguement: all/instance_id. To check the instance id: automaton.py -i") + parser.set_defaults(terminate_cluster="all") - parser.add_option("-s", "--deploy_software", action="store_true", - dest="deploy_software", help="Deploy Software") + parser.add_option("-s", "--deploy_software", action="store_true",dest="deploy_software",help="Deploy Softwares on instances.") - parser.add_option("-e", "--excute_benchmarks", action="store_true", - dest="excute_benchmarks", help="excute benchmarks") + parser.add_option("-e", "--excute_benchmarks", action="store",dest="excute_benchmarks",help="Excute benchmarks with different size dataset, arguement: small/medium/large.") + parser.set_defaults(excute_benchmarks="small") - parser.add_option("-o", "--gather_logs", action="store_true", - dest="gather_logs", help="Gather logs") + parser.add_option("-o", "--gather_logs", action="store_true",dest="gather_logs",help="Gather logs from instances.") - parser.add_option("-p", "--generate_graphs", action="store_true", - dest="generate_graphs", - help="Generate graphs that based on the collected logs") + parser.add_option("-p", "--generate_graphs", action="store_true",dest="generate_graphs",help="Generate graphs that based on the collected logs") - parser.add_option("-i", "--show_id", action="store_true", dest="show_id", - help="show the id of all running instances") + parser.add_option("-i", "--show_id", action="store_true",dest="show_id",help="Show the instance id of all running/terminated instances") (options, args) = parser.parse_args() - + return (options, args) diff --git a/resources/cluster/clusters.py b/resources/cluster/clusters.py index 8d7c2d0..8b2cb12 100755 --- a/resources/cluster/clusters.py +++ b/resources/cluster/clusters.py @@ -171,12 +171,38 @@ def download_logs(self): LOG.error("Download logs: " + command.stdout) LOG.error("Download logs error: " + command.stderr) + def download_logs(self): + reservations = list() + ssh_username = self.config.globals.ssh_username + if self.reservations: + reservations = self.reservations + else: + for cloud in self.clouds: + reservations = cloud.conn.get_all_instances() + for reservation in reservations: + for instance in reservation.instances: + if self.database.check_benchmark(self.benchmark.name, instance.id): + local_path = os.path.join(self.config.globals.log_local_path, self.benchmark.name, instance.id) + if not os.path.exists(local_path): + os.makedirs(local_path) + for path in self.path: + file_name = os.path.basename(path) + local_path = os.path.join(local_path,file_name) + local_path = local_path+'_'+(datetime.datetime.now()).strftime("%H%M%S")+'_'+instance.instance_type + com = "scp -r "+ssh_username+"@"+instance.public_dns_name+":"+path+" "+local_path + LOG.debug("Download logs: [%s] download %s into %s" % (self.benchmark.name, os.path.basename(path), local_path)) + command = Command(com) + command_return = command.execute() + if command_return != 0: + LOG.error("Download logs: "+command.stdout) + LOG.error("Download logs error: "+command.stderr) + def deploy_software(self): ssh_priv_key = self.config.globals.ssh_priv_key ssh_username = self.config.globals.ssh_username ssh_timeout = int(self.config.globals.ssh_timeout) - reservations = list() - not_available = 0 + reservations = list() + not_available = 0; if self.reservations: reservations = self.reservations else: @@ -184,34 +210,46 @@ def deploy_software(self): reservations = cloud.conn.get_all_instances() for reservation in reservations: for instance in reservation.instances: - if self.database.check_benchmark(self.benchmark.name, - instance.id): - if not check_port_status(instance.ip_address, 22, - ssh_timeout): - LOG.error("Deploy_software: the port 22 is not " - "available right now. please try it later") - continue - cmds = list() + if self.database.check_benchmark(self.benchmark.name, instance.id): + if not check_port_status(instance.ip_address, 22, ssh_timeout): + LOG.error("Deploy_software: the port 22 is not available right now. please try it later") + continue + cmds = list() + cmds.append("rm -rf ~/*") cmds.append("wget %s" % (self.url)) - #cmds.append("apt-get update") - #cmds.append("apt-get install unzip") + cmds.append("apt-get update") + cmds.append("apt-get install unzip") cmds.append("unzip BioPerf.zip") - cmds.append("sed -i 's/read BIOPERF/#read " - "BIOPERF/g' install-BioPerf.sh") + cmds.append("sed -i 's/read BIOPERF/#read BIOPERF/g' install-BioPerf.sh") cmds.append("./install-BioPerf.sh") + cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/swissprot.tar.gz") + cmds.append("tar -xvf swissprot.tar.gz") + cmds.append("mv Swissprot/* .") + cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/Pfam") + cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/nr") + cmds.append("sed -i '10 i\DATABASES=~/' ~/.profile") + cmds.append("sed -i '10 i\export DATABASES' ~/.profile") + cmds.append("sed -i '5c input='y'' ~/BioPerf/Scripts/Run-scripts/CleanOutputs.sh") + cmds.append("sed -i '21c #' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("sed -i '26c #' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("sed -i '10c arch='X'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("sed -i '71c input3='A'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append('''sed -i "659c ch='Y';" ./BioPerf/Source-codes/Phylip/src/promlk.c''') + cmds.append("sed -i '/scanf/c //' ./BioPerf/Source-codes/Phylip/src/promlk.c") + cmds.append("cd ./BioPerf/Source-codes/Phylip/src;make promlk;cd ~") + cmds.append("mv ./BioPerf/Source-codes/Phylip/src/promlk ./BioPerf/Binaries/x86-Binaries/Phylip") + for c in cmds: - command = RemoteCommand(instance.public_dns_name, - ssh_priv_key, c) + command = RemoteCommand(instance.public_dns_name, ssh_priv_key, c) command_return = command.execute() - if command_return != 0: - LOG.error("Deploy_software: " + command.stdout) - LOG.error("Deploy_software error: " + - command.stderr) + if command_return !=0: + LOG.error("Deploy_software: "+command.stdout) + LOG.error("Deploy_software error: "+command.stderr) - def excute_benchmarks(self): + def excute_benchmarks(self,dataset_size): ssh_priv_key = self.config.globals.ssh_priv_key ssh_username = self.config.globals.ssh_username - reservations = list() + reservations = list() if self.reservations: reservations = self.reservations else: @@ -219,39 +257,30 @@ def excute_benchmarks(self): reservations = cloud.conn.get_all_instances() for reservation in reservations: for instance in reservation.instances: - if self.database.check_benchmark(self.benchmark.name, - instance.id): + if self.database.check_benchmark(self.benchmark.name, instance.id): cmds = list() - cmds.append("sed -i '5c input='y'' ~/BioPerf/Scripts/" - "Run-scripts/CleanOutputs.sh") - cmds.append("sed -i '13c rm -f $BIOPERF/Outputs/log' " - "~/BioPerf/Scripts/Run-scripts/" - "CleanOutputs.sh") - cmds.append("sed -i '21c #' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("sed -i '26c #' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("sed -i '10c arch='X'' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("sed -i '71c input3='A'' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("sed -i '134c input='A'' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("sed -i '145c user1='y'' " - "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") - cmds.append("./BioPerf/Scripts/Run-scripts/" - "CleanOutputs.sh") - cmds.append("echo 'Y' 'Y'|" - "./BioPerf/Scripts/Run-scripts/run-bioperf.sh" - " > ~/BioPerf/Outputs/log") - + cmds.append("sed -i '13c rm -f $BIOPERF/Outputs/log' ~/BioPerf/Scripts/Run-scripts/CleanOutputs.sh") + cmds.append("sed -i '60c FASTA=0' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("sed -i '62c GRAPPA=0' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("./BioPerf/Scripts/Run-scripts/CleanOutputs.sh") + if dataset_size=="large": + #cmds.append("sed -i '134c input='A'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("echo C>c;echo H>>c;echo '1'>>c;echo Y>>c") + + elif dataset_size=="medium": + #cmds.append("sed -i '134c input='B'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("echo B>c;echo H>>c;echo '1'>>c;echo Y>>c") + else: + #cmds.append("sed -i '134c input='C'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh") + cmds.append("echo A>c;echo H>>c;echo '1'>>c;echo Y>>c") + cmds.append("cat c|./BioPerf/Scripts/Run-scripts/run-bioperf.sh > ~/BioPerf/Outputs/log") + for c in cmds: - command = RemoteCommand(instance.public_dns_name, - ssh_priv_key, c) + command = RemoteCommand(instance.public_dns_name, ssh_priv_key, c) command_return = command.execute() - if command_return != 0: - LOG.error("Excute_benchmarks: " + command.stdout) - LOG.error("Excute_benchmarks: " + command.stderr) + if command_return !=0: + LOG.error("Excute_benchmarks: "+command.stdout) + LOG.error("Excute_benchmarks: "+command.stderr) class Clusters(object): diff --git a/self.db b/self.db index 35c61d56329018761f4bbd89a70fc4189bcee8f1..b1fbefb076fe42d3ea085cdcfc51873e7e0d8f94 100644 GIT binary patch delta 97 zcmZp0XmFSyEvUr6z`z8>j6hm$qK+}Y5`*p|6<#2ZL4bjQzkg$)9RI`sHparqllgPm rK*9_Ro7n{J@yi%yx8Wa_3`nj4rTnx{CW<|Sw3CKhGu8W;iqMK~1X delta 97 zcmZp0XmFSyEhx{xz`z8=Fd#co$CzK9LHCgcFHne)zn_7>e`BE{KTAF%yZGeE{JCr( paR!0SYy$WAWsEZNOHy+(buG=zj13Z#4V_Z+k~4A>i?VeMi~v686p#P_ From 9050d42fe8a223220751daf8cddaddcfc4d33dbe Mon Sep 17 00:00:00 2001 From: suiy Date: Thu, 18 Apr 2013 04:34:38 -0600 Subject: [PATCH 3/3] fix bugs --- resources/cluster/clusters.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/resources/cluster/clusters.py b/resources/cluster/clusters.py index 8b2cb12..08d8f28 100755 --- a/resources/cluster/clusters.py +++ b/resources/cluster/clusters.py @@ -170,32 +170,6 @@ def download_logs(self): if command_return != 0: LOG.error("Download logs: " + command.stdout) LOG.error("Download logs error: " + command.stderr) - - def download_logs(self): - reservations = list() - ssh_username = self.config.globals.ssh_username - if self.reservations: - reservations = self.reservations - else: - for cloud in self.clouds: - reservations = cloud.conn.get_all_instances() - for reservation in reservations: - for instance in reservation.instances: - if self.database.check_benchmark(self.benchmark.name, instance.id): - local_path = os.path.join(self.config.globals.log_local_path, self.benchmark.name, instance.id) - if not os.path.exists(local_path): - os.makedirs(local_path) - for path in self.path: - file_name = os.path.basename(path) - local_path = os.path.join(local_path,file_name) - local_path = local_path+'_'+(datetime.datetime.now()).strftime("%H%M%S")+'_'+instance.instance_type - com = "scp -r "+ssh_username+"@"+instance.public_dns_name+":"+path+" "+local_path - LOG.debug("Download logs: [%s] download %s into %s" % (self.benchmark.name, os.path.basename(path), local_path)) - command = Command(com) - command_return = command.execute() - if command_return != 0: - LOG.error("Download logs: "+command.stdout) - LOG.error("Download logs error: "+command.stderr) def deploy_software(self): ssh_priv_key = self.config.globals.ssh_priv_key