From 6d61d4930b23a35d497cf6a29e0d295a498400e0 Mon Sep 17 00:00:00 2001
From: suiy <yuan.sui@colorado.edu>
Date: Thu, 18 Apr 2013 04:18:07 -0600
Subject: [PATCH 1/3] running large datasets

---
 automaton.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/automaton.py b/automaton.py
index e5e84b5..01fa9d9 100755
--- a/automaton.py
+++ b/automaton.py
@@ -66,7 +66,7 @@ def run(self):
 def clean_exit(signum, frame):
     global SIGEXIT
     SIGEXIT = True
-    LOG.critical("Exit signal received. Exiting at the next sane time. "
+    LOG.critical("Exit signal received. Exiting at the next sane time."
                  "Please stand by.")
 
 
From 538c89198ce509879189bcde7981a7c36b0a9ec2 Mon Sep 17 00:00:00 2001
From: suiy <yuan.sui@colorado.edu>
Date: Thu, 18 Apr 2013 04:29:27 -0600
Subject: [PATCH 2/3] add functions on running large datasets

---
 automaton.py                  |   4 +-
 etc/benchmarking.conf         |   7 +-
 etc/clouds.conf               |   2 +-
 etc/global.conf               |   2 +-
 graphing/graphing.py          |   2 +
 lib/util.py                   |  50 +++++--------
 resources/cluster/clusters.py | 133 +++++++++++++++++++++-------------
 self.db                       | Bin 8192 -> 8192 bytes
 8 files changed, 106 insertions(+), 94 deletions(-)

diff --git a/automaton.py b/automaton.py
index 01fa9d9..07cc72c 100755
--- a/automaton.py
+++ b/automaton.py
@@ -48,7 +48,7 @@ def run(self):
                 cluster.launch()
             if self.config.options.terminate_cluster:
                 cluster.connect()
-                if self.config.options.terminate_cluster == "all":
+                if self.config.options.terminate_cluster=="all":
                     cluster.terminate_all()
                 else:
                     cluster.terminate(self.config.options.terminate_cluster)
@@ -60,7 +60,7 @@ def run(self):
                 cluster.deploy_software()
             if self.config.options.excute_benchmarks:
                 cluster.connect()
-                cluster.excute_benchmarks()
+                cluster.excute_benchmarks(self.config.options.excute_benchmarks)
 
 
 def clean_exit(signum, frame):
diff --git a/etc/benchmarking.conf b/etc/benchmarking.conf
index 9570634..eac3a86 100755
--- a/etc/benchmarking.conf
+++ b/etc/benchmarking.conf
@@ -1,15 +1,10 @@
 [Benchmark-01]
 sierra = 0
-hotel = 2
+hotel = 1
 log_files = ~/BioPerf/Outputs/log
 url = http://www.bioperf.org/BioPerf.zip
 remote_location = ~/BioPerf
 
 [Benchmark-02]
-sierra = 0
-hotel = 1
-log_files = ~/BioPerf/Outputs/log
-url = http://www.bioperf.org/BioPerf.zip
-remote_location = ~/BioPerf
 
 [Benchmark-03]
\ No newline at end of file
diff --git a/etc/clouds.conf b/etc/clouds.conf
index 1c199ea..6391b1e 100755
--- a/etc/clouds.conf
+++ b/etc/clouds.conf
@@ -1,7 +1,7 @@
 [hotel]
 cloud_uri = svc.uc.futuregrid.org
 cloud_port = 8444
-image_id = automaton.gz
+image_id = debian-lenny.gz
 cloud_type = nimbus
 availability_zone = us-east-1
 instance_type = m1.large
diff --git a/etc/global.conf b/etc/global.conf
index 3578ec7..80a7b9d 100755
--- a/etc/global.conf
+++ b/etc/global.conf
@@ -1,6 +1,6 @@
 [DEFAULT]
 key_name = automaton
-key_path = /Users/voran/.ssh/id_rsa.pub
+key_path = /Users/voran/.ssh/id_rsa1.pub
 ssh_priv_key = /Users/suiyuan0226/.ssh/automaton.pem
 git_repo_home = /home/staged-deployment-scripts
 git_repo_location = https://github.com/alal3177/staged-deployment-scripts.git
diff --git a/graphing/graphing.py b/graphing/graphing.py
index 259bbad..29a0740 100644
--- a/graphing/graphing.py
+++ b/graphing/graphing.py
@@ -8,6 +8,8 @@ def __init__(self, config):
         self.config = config
         self.parser = Parser(self.config)
         self.graph_path = self.config.globals.graph_path
+        if not os.path.exists(self.graph_path):
+            os.makedirs(self.graph_path)
         self.attributes = list()
         self.attributes = self.parser.instance_types
 
diff --git a/lib/util.py b/lib/util.py
index 224359e..2235e84 100755
--- a/lib/util.py
+++ b/lib/util.py
@@ -102,55 +102,41 @@ def parse_options():
 
     parser = OptionParser()
 
+    parser = OptionParser()
+
     parser.add_option("-d", "--debug", action="store_true", dest="debug",
                       help="Enable debugging log level.")
     parser.set_defaults(debug=False)
 
-    parser.add_option("-g", "--global_file", action="store",
-                      dest="global_file",
-                      help="Location of the file with global parameters "
-                           "(default: etc/global.conf).")
+    parser.add_option("-g", "--global_file", action="store", dest="global_file",
+                      help="Location of the file with global parameters (default: etc/global.conf).")
     parser.set_defaults(global_file="etc/global.conf")
 
-    parser.add_option("-c", "--clouds_file", action="store",
-                      dest="clouds_file",
-                      help="Location of the file with cloud parameters "
-                           "(default: etc/clouds.conf).")
+    parser.add_option("-c", "--clouds_file", action="store", dest="clouds_file",
+                      help="Location of the file with cloud parameters (default: etc/clouds.conf).")
     parser.set_defaults(clouds_file="etc/clouds.conf")
 
-    parser.add_option("-b", "--benchmarking_file", action="store",
-                      dest="benchmarking_file",
-                      help="Location of the file with benchmarking "
-                           "parameters (default: etc/benchmarking.conf).")
+    parser.add_option("-b", "--benchmarking_file", action="store", dest="benchmarking_file",
+                      help="Location of the file with benchmarking parameters (default: etc/benchmarking.conf).")
     parser.set_defaults(benchmarking_file="etc/benchmarking.conf")
 
-    parser.add_option("-l", "--launch_cluster", action="store_true",
-                      dest="launch_cluster",
-                      help="Launch desired number of clusters")
+    parser.add_option("-l", "--launch_cluster", action="store_true",dest="launch_cluster",help="Launch clusters")
 
-    parser.add_option("-t", "--terminate_cluster", action="store",
-                      dest="terminate_cluster",
-                      help="Terminate specific instance, argument: "
-                           "all/instance_id",
-                      default=False)
+    parser.add_option("-t", "--terminate_cluster", action="store",dest="terminate_cluster",help="Terminate specific instance, arguement: all/instance_id. To check the instance id: automaton.py -i")
+    parser.set_defaults(terminate_cluster="all")
 
-    parser.add_option("-s", "--deploy_software", action="store_true",
-                      dest="deploy_software", help="Deploy Software")
+    parser.add_option("-s", "--deploy_software", action="store_true",dest="deploy_software",help="Deploy Softwares on instances.")
 
-    parser.add_option("-e", "--excute_benchmarks", action="store_true",
-                      dest="excute_benchmarks", help="excute benchmarks")
+    parser.add_option("-e", "--excute_benchmarks", action="store",dest="excute_benchmarks",help="Excute benchmarks with different size dataset, arguement: small/medium/large.")
+    parser.set_defaults(excute_benchmarks="small")
 
-    parser.add_option("-o", "--gather_logs", action="store_true",
-                      dest="gather_logs", help="Gather logs")
+    parser.add_option("-o", "--gather_logs", action="store_true",dest="gather_logs",help="Gather logs from instances.")
 
-    parser.add_option("-p", "--generate_graphs", action="store_true",
-                      dest="generate_graphs",
-                      help="Generate graphs that based on the collected logs")
+    parser.add_option("-p", "--generate_graphs", action="store_true",dest="generate_graphs",help="Generate graphs that based on the collected logs")
 
-    parser.add_option("-i", "--show_id", action="store_true", dest="show_id",
-                      help="show the id of all running instances")
+    parser.add_option("-i", "--show_id", action="store_true",dest="show_id",help="Show the instance id of all running/terminated instances")
     (options, args) = parser.parse_args()
-
+    
     return (options, args)
 
 
diff --git a/resources/cluster/clusters.py b/resources/cluster/clusters.py
index 8d7c2d0..8b2cb12 100755
--- a/resources/cluster/clusters.py
+++ b/resources/cluster/clusters.py
@@ -171,12 +171,38 @@ def download_logs(self):
                             LOG.error("Download logs: " + command.stdout)
                             LOG.error("Download logs error: " + command.stderr)
 
+    def download_logs(self):
+        reservations = list()
+        ssh_username = self.config.globals.ssh_username
+        if self.reservations:
+            reservations = self.reservations
+        else:
+            for cloud in self.clouds:
+                reservations = cloud.conn.get_all_instances()
+        for reservation in reservations:
+            for instance in reservation.instances:
+                if self.database.check_benchmark(self.benchmark.name, instance.id):
+                    local_path = os.path.join(self.config.globals.log_local_path, self.benchmark.name, instance.id)
+                    if not os.path.exists(local_path):
+                        os.makedirs(local_path)
+                    for path in self.path:
+                        file_name = os.path.basename(path)
+                        local_path = os.path.join(local_path,file_name)
+                        local_path = local_path+'_'+(datetime.datetime.now()).strftime("%H%M%S")+'_'+instance.instance_type
+                        com = "scp -r "+ssh_username+"@"+instance.public_dns_name+":"+path+" "+local_path
+                        LOG.debug("Download logs: [%s] download %s into %s" % (self.benchmark.name, os.path.basename(path), local_path))
+                        command = Command(com)
+                        command_return = command.execute()
+                        if command_return != 0:
+                            LOG.error("Download logs: "+command.stdout)
+                            LOG.error("Download logs error: "+command.stderr)
+    
     def deploy_software(self):
         ssh_priv_key = self.config.globals.ssh_priv_key
         ssh_username = self.config.globals.ssh_username
         ssh_timeout = int(self.config.globals.ssh_timeout)
-        reservations = list()
-        not_available = 0
+        reservations = list()   
+        not_available = 0;
         if self.reservations:
             reservations = self.reservations
         else:
@@ -184,34 +210,46 @@ def deploy_software(self):
                 reservations = cloud.conn.get_all_instances()
         for reservation in reservations:
             for instance in reservation.instances:
-                if self.database.check_benchmark(self.benchmark.name,
-                                                 instance.id):
-                    if not check_port_status(instance.ip_address, 22,
-                                             ssh_timeout):
-                        LOG.error("Deploy_software: the port 22 is not "
-                                  "available right now. please try it later")
-                        continue
-                    cmds = list()
+                if self.database.check_benchmark(self.benchmark.name, instance.id):
+                    if not check_port_status(instance.ip_address, 22, ssh_timeout):
+                        LOG.error("Deploy_software: the port 22 is not available right now. please try it later")
+                        continue  
+                    cmds = list() 
+                    cmds.append("rm -rf ~/*")
                     cmds.append("wget %s" % (self.url))
-                    #cmds.append("apt-get update")
-                    #cmds.append("apt-get install unzip")
+                    cmds.append("apt-get update")
+                    cmds.append("apt-get install unzip")
                     cmds.append("unzip BioPerf.zip")
-                    cmds.append("sed -i 's/read BIOPERF/#read "
-                                "BIOPERF/g' install-BioPerf.sh")
+                    cmds.append("sed -i 's/read BIOPERF/#read BIOPERF/g' install-BioPerf.sh")
                     cmds.append("./install-BioPerf.sh")
+                    cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/swissprot.tar.gz")
+                    cmds.append("tar -xvf swissprot.tar.gz")
+                    cmds.append("mv Swissprot/* .")
+                    cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/Pfam")
+                    cmds.append("wget ftp://ftp.cc.gatech.edu/pub/people/bader/BioPerf/nr")
+                    cmds.append("sed -i '10 i\DATABASES=~/' ~/.profile")
+                    cmds.append("sed -i '10 i\export DATABASES' ~/.profile")
+                    cmds.append("sed -i '5c input='y'' ~/BioPerf/Scripts/Run-scripts/CleanOutputs.sh")
+                    cmds.append("sed -i '21c #' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append("sed -i '26c #' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append("sed -i '10c arch='X'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append("sed -i '71c input3='A'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append('''sed -i "659c ch='Y';" ./BioPerf/Source-codes/Phylip/src/promlk.c''')
+                    cmds.append("sed -i '/scanf/c //' ./BioPerf/Source-codes/Phylip/src/promlk.c")
+                    cmds.append("cd ./BioPerf/Source-codes/Phylip/src;make promlk;cd ~")
+                    cmds.append("mv ./BioPerf/Source-codes/Phylip/src/promlk ./BioPerf/Binaries/x86-Binaries/Phylip")
+
                     for c in cmds:
-                        command = RemoteCommand(instance.public_dns_name,
-                                                ssh_priv_key, c)
+                        command = RemoteCommand(instance.public_dns_name, ssh_priv_key, c)
                         command_return = command.execute()
-                        if command_return != 0:
-                            LOG.error("Deploy_software: " + command.stdout)
-                            LOG.error("Deploy_software error: " +
-                                      command.stderr)
+                        if command_return !=0:
+                            LOG.error("Deploy_software: "+command.stdout)
+                            LOG.error("Deploy_software error: "+command.stderr)
 
-    def excute_benchmarks(self):
+    def excute_benchmarks(self,dataset_size):
         ssh_priv_key = self.config.globals.ssh_priv_key
         ssh_username = self.config.globals.ssh_username
-        reservations = list()
+        reservations = list()   
         if self.reservations:
             reservations = self.reservations
         else:
@@ -219,39 +257,30 @@ def excute_benchmarks(self):
                 reservations = cloud.conn.get_all_instances()
         for reservation in reservations:
             for instance in reservation.instances:
-                if self.database.check_benchmark(self.benchmark.name,
-                                                 instance.id):
+                if self.database.check_benchmark(self.benchmark.name, instance.id):
                     cmds = list()
-                    cmds.append("sed -i '5c input='y'' ~/BioPerf/Scripts/"
-                                "Run-scripts/CleanOutputs.sh")
-                    cmds.append("sed -i '13c rm -f $BIOPERF/Outputs/log' "
-                                "~/BioPerf/Scripts/Run-scripts/"
-                                "CleanOutputs.sh")
-                    cmds.append("sed -i '21c #' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("sed -i '26c #' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("sed -i '10c arch='X'' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("sed -i '71c input3='A'' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("sed -i '134c input='A'' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("sed -i '145c user1='y'' "
-                                "~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
-                    cmds.append("./BioPerf/Scripts/Run-scripts/"
-                                "CleanOutputs.sh")
-                    cmds.append("echo 'Y' 'Y'|"
-                                "./BioPerf/Scripts/Run-scripts/run-bioperf.sh"
-                                " > ~/BioPerf/Outputs/log")
-
+                    cmds.append("sed -i '13c rm -f $BIOPERF/Outputs/log' ~/BioPerf/Scripts/Run-scripts/CleanOutputs.sh")
+                    cmds.append("sed -i '60c FASTA=0' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append("sed -i '62c GRAPPA=0' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                    cmds.append("./BioPerf/Scripts/Run-scripts/CleanOutputs.sh")
+                    if dataset_size=="large":
+                        #cmds.append("sed -i '134c input='A'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                        cmds.append("echo C>c;echo H>>c;echo '1'>>c;echo Y>>c")
+                        
+                    elif dataset_size=="medium":
+                        #cmds.append("sed -i '134c input='B'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                        cmds.append("echo B>c;echo H>>c;echo '1'>>c;echo Y>>c")
+                    else:
+                        #cmds.append("sed -i '134c input='C'' ~/BioPerf/Scripts/Run-scripts/run-bioperf.sh")
+                        cmds.append("echo A>c;echo H>>c;echo '1'>>c;echo Y>>c")
+                    cmds.append("cat c|./BioPerf/Scripts/Run-scripts/run-bioperf.sh > ~/BioPerf/Outputs/log")
+                    
                     for c in cmds:
-                        command = RemoteCommand(instance.public_dns_name,
-                                                ssh_priv_key, c)
+                        command = RemoteCommand(instance.public_dns_name, ssh_priv_key, c)
                         command_return = command.execute()
-                        if command_return != 0:
-                            LOG.error("Excute_benchmarks: " + command.stdout)
-                            LOG.error("Excute_benchmarks: " + command.stderr)
+                        if command_return !=0:
+                            LOG.error("Excute_benchmarks: "+command.stdout)
+                            LOG.error("Excute_benchmarks: "+command.stderr)
 
 
 class Clusters(object):
diff --git a/self.db b/self.db
index 35c61d56329018761f4bbd89a70fc4189bcee8f1..b1fbefb076fe42d3ea085cdcfc51873e7e0d8f94 100644
GIT binary patch
delta 97
zcmZp0XmFSyEvUr6z`z8>j6hm$qK+}Y5`*p|6<#2ZL4bjQzkg$)9RI`sHparqllgPm
rK*9_Ro7n{J@yi%y<d>x8Wa_3`nj4rTnx{CW<|Sw3CKhGu8W;iqMK~1X

delta 97
zcmZp0XmFSyEhx{xz`z8=Fd#co$CzK9LHCgcFHne)zn_7>e`BE{KTAF%yZGeE{JCr(
paR!0SYy$WAWsEZNOHy+(buG=zj13Z#4V_Z+k~4A>i?VeMi~v686p#P_


From 9050d42fe8a223220751daf8cddaddcfc4d33dbe Mon Sep 17 00:00:00 2001
From: suiy <yuan.sui@colorado.edu>
Date: Thu, 18 Apr 2013 04:34:38 -0600
Subject: [PATCH 3/3] fix bugs

---
 resources/cluster/clusters.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/resources/cluster/clusters.py b/resources/cluster/clusters.py
index 8b2cb12..08d8f28 100755
--- a/resources/cluster/clusters.py
+++ b/resources/cluster/clusters.py
@@ -170,32 +170,6 @@ def download_logs(self):
                         if command_return != 0:
                             LOG.error("Download logs: " + command.stdout)
                             LOG.error("Download logs error: " + command.stderr)
-
-    def download_logs(self):
-        reservations = list()
-        ssh_username = self.config.globals.ssh_username
-        if self.reservations:
-            reservations = self.reservations
-        else:
-            for cloud in self.clouds:
-                reservations = cloud.conn.get_all_instances()
-        for reservation in reservations:
-            for instance in reservation.instances:
-                if self.database.check_benchmark(self.benchmark.name, instance.id):
-                    local_path = os.path.join(self.config.globals.log_local_path, self.benchmark.name, instance.id)
-                    if not os.path.exists(local_path):
-                        os.makedirs(local_path)
-                    for path in self.path:
-                        file_name = os.path.basename(path)
-                        local_path = os.path.join(local_path,file_name)
-                        local_path = local_path+'_'+(datetime.datetime.now()).strftime("%H%M%S")+'_'+instance.instance_type
-                        com = "scp -r "+ssh_username+"@"+instance.public_dns_name+":"+path+" "+local_path
-                        LOG.debug("Download logs: [%s] download %s into %s" % (self.benchmark.name, os.path.basename(path), local_path))
-                        command = Command(com)
-                        command_return = command.execute()
-                        if command_return != 0:
-                            LOG.error("Download logs: "+command.stdout)
-                            LOG.error("Download logs error: "+command.stderr)
     
     def deploy_software(self):
         ssh_priv_key = self.config.globals.ssh_priv_key